xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 3bbf0e9209c918da710d8f50ca5c48af17a42e60)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc-private/kernels/blockinvert.h>
9 #include <petsc-private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ"
129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,*work,*w,*workt,*t;
133   const MatScalar   *v,*aa = a->a, *idiag;
134   const PetscScalar *b,*xb;
135   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136   PetscErrorCode    ierr;
137   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
139 
140   PetscFunctionBegin;
141   its = its*lits;
142   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
143   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
144   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
145   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
146   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   if (!m) PetscFunctionReturn(0);
151   diag  = a->diag;
152   idiag = a->idiag;
153   k    = PetscMax(A->rmap->n,A->cmap->n);
154   if (!a->mult_work) {
155     ierr = PetscMalloc((2*k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
156   }
157   work = a->mult_work;
158   t = work + k+1;
159   if (!a->sor_work) {
160     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
161   }
162   w = a->sor_work;
163 
164   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
165   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
166 
167   if (flag & SOR_ZERO_INITIAL_GUESS) {
168     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
169       switch (bs) {
170       case 1:
171         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
172         t[0] = b[0];
173         i2     = 1;
174         idiag += 1;
175         for (i=1; i<m; i++) {
176           v  = aa + ai[i];
177           vi = aj + ai[i];
178           nz = diag[i] - ai[i];
179           s[0] = b[i2];
180           for (j=0; j<nz; j++) {
181             xw[0] = x[vi[j]];
182             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
183           }
184           t[i2] = s[0];
185           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
186           x[i2]  = xw[0];
187           idiag += 1;
188           i2    += 1;
189         }
190         break;
191       case 2:
192         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
193         t[0] = b[0]; t[1] = b[1];
194         i2     = 2;
195         idiag += 4;
196         for (i=1; i<m; i++) {
197           v  = aa + 4*ai[i];
198           vi = aj + ai[i];
199           nz = diag[i] - ai[i];
200           s[0] = b[i2]; s[1] = b[i2+1];
201           for (j=0; j<nz; j++) {
202             idx = 2*vi[j];
203             it  = 4*j;
204             xw[0] = x[idx]; xw[1] = x[1+idx];
205             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
206           }
207           t[i2] = s[0]; t[i2+1] = s[1];
208           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
209           x[i2]   = xw[0]; x[i2+1] = xw[1];
210           idiag  += 4;
211           i2     += 2;
212         }
213         break;
214       case 3:
215         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
216         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
217         i2     = 3;
218         idiag += 9;
219         for (i=1; i<m; i++) {
220           v  = aa + 9*ai[i];
221           vi = aj + ai[i];
222           nz = diag[i] - ai[i];
223           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
224           while (nz--) {
225             idx = 3*(*vi++);
226             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
227             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
228             v  += 9;
229           }
230           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
231           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
232           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
233           idiag  += 9;
234           i2     += 3;
235         }
236         break;
237       case 4:
238         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
239         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
240         i2     = 4;
241         idiag += 16;
242         for (i=1; i<m; i++) {
243           v  = aa + 16*ai[i];
244           vi = aj + ai[i];
245           nz = diag[i] - ai[i];
246           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
247           while (nz--) {
248             idx = 4*(*vi++);
249             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
250             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
251             v  += 16;
252           }
253           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
254           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
255           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
256           idiag  += 16;
257           i2     += 4;
258         }
259         break;
260       case 5:
261         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
262         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
263         i2     = 5;
264         idiag += 25;
265         for (i=1; i<m; i++) {
266           v  = aa + 25*ai[i];
267           vi = aj + ai[i];
268           nz = diag[i] - ai[i];
269           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
270           while (nz--) {
271             idx = 5*(*vi++);
272             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
273             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
274             v  += 25;
275           }
276           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
277           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
278           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
279           idiag  += 25;
280           i2     += 5;
281         }
282         break;
283       case 6:
284         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
285         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
286         i2     = 6;
287         idiag += 36;
288         for (i=1; i<m; i++) {
289           v  = aa + 36*ai[i];
290           vi = aj + ai[i];
291           nz = diag[i] - ai[i];
292           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
293           while (nz--) {
294             idx = 6*(*vi++);
295             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
296             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
297             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
298             v  += 36;
299           }
300           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
301           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
302           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
303           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
304           idiag  += 36;
305           i2     += 6;
306         }
307         break;
308       case 7:
309         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
310         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
311         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
312         i2     = 7;
313         idiag += 49;
314         for (i=1; i<m; i++) {
315           v  = aa + 49*ai[i];
316           vi = aj + ai[i];
317           nz = diag[i] - ai[i];
318           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
319           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
320           while (nz--) {
321             idx = 7*(*vi++);
322             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
323             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
324             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
325             v  += 49;
326           }
327           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
328           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
329           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
330           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
331           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
332           idiag  += 49;
333           i2     += 7;
334         }
335         break;
336       default:
337         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
338         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
339         i2     = bs;
340         idiag += bs2;
341         for (i=1; i<m; i++) {
342           v  = aa + bs2*ai[i];
343           vi = aj + ai[i];
344           nz = diag[i] - ai[i];
345 
346           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
347           /* copy all rows of x that are needed into contiguous space */
348           workt = work;
349           for (j=0; j<nz; j++) {
350             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
351             workt += bs;
352           }
353           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
354           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
355           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
356 
357           idiag += bs2;
358           i2    += bs;
359         }
360         break;
361       }
362       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
363       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
364       xb = t;
365     }
366     else xb = b;
367     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
368       idiag = a->idiag+bs2*(a->mbs-1);
369       i2 = bs * (m-1);
370       switch (bs) {
371       case 1:
372         s[0]  = xb[i2];
373         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
374         x[i2] = xw[0];
375         i2   -= 1;
376         for (i=m-2; i>=0; i--) {
377           v  = aa + (diag[i]+1);
378           vi = aj + diag[i] + 1;
379           nz = ai[i+1] - diag[i] - 1;
380           s[0] = xb[i2];
381           for (j=0; j<nz; j++) {
382             xw[0] = x[vi[j]];
383             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
384           }
385           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
386           x[i2]  = xw[0];
387           idiag -= 1;
388           i2    -= 1;
389         }
390         break;
391       case 2:
392         s[0]  = xb[i2]; s[1] = xb[i2+1];
393         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
394         x[i2] = xw[0]; x[i2+1] = xw[1];
395         i2    -= 2;
396         idiag -= 4;
397         for (i=m-2; i>=0; i--) {
398           v  = aa + 4*(diag[i] + 1);
399           vi = aj + diag[i] + 1;
400           nz = ai[i+1] - diag[i] - 1;
401           s[0] = xb[i2]; s[1] = xb[i2+1];
402           for (j=0; j<nz; j++) {
403             idx = 2*vi[j];
404             it  = 4*j;
405             xw[0] = x[idx]; xw[1] = x[1+idx];
406             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
407           }
408           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
409           x[i2]   = xw[0]; x[i2+1] = xw[1];
410           idiag  -= 4;
411           i2     -= 2;
412         }
413         break;
414       case 3:
415         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
416         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
417         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
418         i2    -= 3;
419         idiag -= 9;
420         for (i=m-2; i>=0; i--) {
421           v  = aa + 9*(diag[i]+1);
422           vi = aj + diag[i] + 1;
423           nz = ai[i+1] - diag[i] - 1;
424           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425           while (nz--) {
426             idx = 3*(*vi++);
427             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
428             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
429             v  += 9;
430           }
431           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
432           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
433           idiag  -= 9;
434           i2     -= 3;
435         }
436         break;
437       case 4:
438         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
439         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
440         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
441         i2    -= 4;
442         idiag -= 16;
443         for (i=m-2; i>=0; i--) {
444           v  = aa + 16*(diag[i]+1);
445           vi = aj + diag[i] + 1;
446           nz = ai[i+1] - diag[i] - 1;
447           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448           while (nz--) {
449             idx = 4*(*vi++);
450             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
451             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
452             v  += 16;
453           }
454           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
455           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
456           idiag  -= 16;
457           i2     -= 4;
458         }
459         break;
460       case 5:
461         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
462         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
463         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
464         i2    -= 5;
465         idiag -= 25;
466         for (i=m-2; i>=0; i--) {
467           v  = aa + 25*(diag[i]+1);
468           vi = aj + diag[i] + 1;
469           nz = ai[i+1] - diag[i] - 1;
470           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471           while (nz--) {
472             idx = 5*(*vi++);
473             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
474             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
475             v  += 25;
476           }
477           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
478           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
479           idiag  -= 25;
480           i2     -= 5;
481         }
482         break;
483       case 6:
484         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
485         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
486         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
487         i2    -= 6;
488         idiag -= 36;
489         for (i=m-2; i>=0; i--) {
490           v  = aa + 36*(diag[i]+1);
491           vi = aj + diag[i] + 1;
492           nz = ai[i+1] - diag[i] - 1;
493           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494           while (nz--) {
495             idx = 6*(*vi++);
496             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
497             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
498             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
499             v  += 36;
500           }
501           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
502           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
503           idiag  -= 36;
504           i2     -= 6;
505         }
506         break;
507       case 7:
508         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
509         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
510         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
511         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
512         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
513         i2    -= 7;
514         idiag -= 49;
515         for (i=m-2; i>=0; i--) {
516           v  = aa + 49*(diag[i]+1);
517           vi = aj + diag[i] + 1;
518           nz = ai[i+1] - diag[i] - 1;
519           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
520           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
521           while (nz--) {
522             idx = 7*(*vi++);
523             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
524             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
525             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
526             v  += 49;
527           }
528           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
529           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
530           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
531           idiag  -= 49;
532           i2     -= 7;
533         }
534         break;
535       default:
536         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
537         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
538         i2    -= bs;
539         idiag -= bs2;
540         for (i=m-2; i>=0; i--) {
541           v  = aa + bs2*(diag[i]+1);
542           vi = aj + diag[i] + 1;
543           nz = ai[i+1] - diag[i] - 1;
544 
545           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
546           /* copy all rows of x that are needed into contiguous space */
547           workt = work;
548           for (j=0; j<nz; j++) {
549             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
550             workt += bs;
551           }
552           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
553           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
554 
555           idiag -= bs2;
556           i2    -= bs;
557         }
558         break;
559       }
560       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
561     }
562     its--;
563   }
564   while (its--) {
565     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
566       idiag = a->idiag;
567       i2 = 0;
568       switch (bs) {
569       case 1:
570         for (i=0; i<m; i++) {
571           v  = aa + ai[i];
572           vi = aj + ai[i];
573           nz = ai[i+1] - ai[i];
574           s[0] = b[i2];
575           for (j=0; j<nz; j++) {
576             xw[0] = x[vi[j]];
577             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
578           }
579           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
580           x[i2] += xw[0];
581           idiag += 1;
582           i2    += 1;
583         }
584         break;
585       case 2:
586         for (i=0; i<m; i++) {
587           v  = aa + 4*ai[i];
588           vi = aj + ai[i];
589           nz = ai[i+1] - ai[i];
590           s[0] = b[i2]; s[1] = b[i2+1];
591           for (j=0; j<nz; j++) {
592             idx = 2*vi[j];
593             it  = 4*j;
594             xw[0] = x[idx]; xw[1] = x[1+idx];
595             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
596           }
597           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
598           x[i2]  += xw[0]; x[i2+1] += xw[1];
599           idiag  += 4;
600           i2     += 2;
601         }
602         break;
603       case 3:
604         for (i=0; i<m; i++) {
605           v  = aa + 9*ai[i];
606           vi = aj + ai[i];
607           nz = ai[i+1] - ai[i];
608           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
609           while (nz--) {
610             idx = 3*(*vi++);
611             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
612             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
613             v  += 9;
614           }
615           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
616           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
617           idiag  += 9;
618           i2     += 3;
619         }
620         break;
621       case 4:
622         for (i=0; i<m; i++) {
623           v  = aa + 16*ai[i];
624           vi = aj + ai[i];
625           nz = ai[i+1] - ai[i];
626           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
627           while (nz--) {
628             idx = 4*(*vi++);
629             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
630             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
631             v  += 16;
632           }
633           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
634           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
635           idiag  += 16;
636           i2     += 4;
637         }
638         break;
639       case 5:
640         for (i=0; i<m; i++) {
641           v  = aa + 25*ai[i];
642           vi = aj + ai[i];
643           nz = ai[i+1] - ai[i];
644           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
645           while (nz--) {
646             idx = 5*(*vi++);
647             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
648             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
649             v  += 25;
650           }
651           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
652           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
653           idiag  += 25;
654           i2     += 5;
655         }
656         break;
657       case 6:
658         for (i=0; i<m; i++) {
659           v  = aa + 36*ai[i];
660           vi = aj + ai[i];
661           nz = ai[i+1] - ai[i];
662           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
663           while (nz--) {
664             idx = 6*(*vi++);
665             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
666             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
667             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
668             v  += 36;
669           }
670           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
671           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
672           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
673           idiag  += 36;
674           i2     += 6;
675         }
676         break;
677       case 7:
678         for (i=0; i<m; i++) {
679           v  = aa + 49*ai[i];
680           vi = aj + ai[i];
681           nz = ai[i+1] - ai[i];
682           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
683           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
684           while (nz--) {
685             idx = 7*(*vi++);
686             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
687             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
688             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
689             v  += 49;
690           }
691           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
692           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
693           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
694           idiag  += 49;
695           i2     += 7;
696         }
697         break;
698       default:
699         for (i=0; i<m; i++) {
700           v  = aa + bs2*ai[i];
701           vi = aj + ai[i];
702           nz = ai[i+1] - ai[i];
703 
704           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
705           /* copy all rows of x that are needed into contiguous space */
706           workt = work;
707           for (j=0; j<nz; j++) {
708             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
709             workt += bs;
710           }
711           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
712           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
713 
714           idiag += bs2;
715           i2    += bs;
716         }
717         break;
718       }
719       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
720     }
721     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
722       idiag = a->idiag+bs2*(a->mbs-1);
723       i2 = bs * (m-1);
724       switch (bs) {
725       case 1:
726         for (i=m-1; i>=0; i--) {
727           v  = aa + ai[i];
728           vi = aj + ai[i];
729           nz = ai[i+1] - ai[i];
730           s[0] = b[i2];
731           for (j=0; j<nz; j++) {
732             xw[0] = x[vi[j]];
733             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
734           }
735           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
736           x[i2] += xw[0];
737           idiag -= 1;
738           i2    -= 1;
739         }
740         break;
741       case 2:
742         for (i=m-1; i>=0; i--) {
743           v  = aa + 4*ai[i];
744           vi = aj + ai[i];
745           nz = ai[i+1] - ai[i];
746           s[0] = b[i2]; s[1] = b[i2+1];
747           for (j=0; j<nz; j++) {
748             idx = 2*vi[j];
749             it  = 4*j;
750             xw[0] = x[idx]; xw[1] = x[1+idx];
751             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
752           }
753           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
754           x[i2]  += xw[0]; x[i2+1] += xw[1];
755           idiag  -= 4;
756           i2     -= 2;
757         }
758         break;
759       case 3:
760         for (i=m-1; i>=0; i--) {
761           v  = aa + 9*ai[i];
762           vi = aj + ai[i];
763           nz = ai[i+1] - ai[i];
764           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
765           while (nz--) {
766             idx = 3*(*vi++);
767             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
768             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
769             v  += 9;
770           }
771           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
772           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
773           idiag  -= 9;
774           i2     -= 3;
775         }
776         break;
777       case 4:
778         for (i=m-1; i>=0; i--) {
779           v  = aa + 16*ai[i];
780           vi = aj + ai[i];
781           nz = ai[i+1] - ai[i];
782           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
783           while (nz--) {
784             idx = 4*(*vi++);
785             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
786             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
787             v  += 16;
788           }
789           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
790           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
791           idiag  -= 16;
792           i2     -= 4;
793         }
794         break;
795       case 5:
796         for (i=m-1; i>=0; i--) {
797           v  = aa + 25*ai[i];
798           vi = aj + ai[i];
799           nz = ai[i+1] - ai[i];
800           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
801           while (nz--) {
802             idx = 5*(*vi++);
803             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
804             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
805             v  += 25;
806           }
807           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
808           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
809           idiag  -= 25;
810           i2     -= 5;
811         }
812         break;
813       case 6:
814         for (i=m-1; i>=0; i--) {
815           v  = aa + 36*ai[i];
816           vi = aj + ai[i];
817           nz = ai[i+1] - ai[i];
818           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
819           while (nz--) {
820             idx = 6*(*vi++);
821             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
822             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
823             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
824             v  += 36;
825           }
826           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
827           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
828           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
829           idiag  -= 36;
830           i2     -= 6;
831         }
832         break;
833       case 7:
834         for (i=m-1; i>=0; i--) {
835           v  = aa + 49*ai[i];
836           vi = aj + ai[i];
837           nz = ai[i+1] - ai[i];
838           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
839           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
840           while (nz--) {
841             idx = 7*(*vi++);
842             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
843             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
844             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
845             v  += 49;
846           }
847           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
848           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
849           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
850           idiag  -= 49;
851           i2     -= 7;
852         }
853         break;
854       default:
855         for (i=m-1; i>=0; i--) {
856           v  = aa + bs2*ai[i];
857           vi = aj + ai[i];
858           nz = ai[i+1] - ai[i];
859 
860           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
861           /* copy all rows of x that are needed into contiguous space */
862           workt = work;
863           for (j=0; j<nz; j++) {
864             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
865             workt += bs;
866           }
867           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
868           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
869 
870           idiag -= bs2;
871           i2    -= bs;
872         }
873         break;
874       }
875       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
876     }
877   }
878   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
879   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 
884 /*
885     Special version for direct calls from Fortran (Used in PETSc-fun3d)
886 */
887 #if defined(PETSC_HAVE_FORTRAN_CAPS)
888 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
889 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
890 #define matsetvaluesblocked4_ matsetvaluesblocked4
891 #endif
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "matsetvaluesblocked4_"
895 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
896 {
897   Mat               A  = *AA;
898   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
899   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
900   PetscInt          *ai    =a->i,*ailen=a->ilen;
901   PetscInt          *aj    =a->j,stepval,lastcol = -1;
902   const PetscScalar *value = v;
903   MatScalar         *ap,*aa = a->a,*bap;
904 
905   PetscFunctionBegin;
906   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
907   stepval = (n-1)*4;
908   for (k=0; k<m; k++) { /* loop over added rows */
909     row  = im[k];
910     rp   = aj + ai[row];
911     ap   = aa + 16*ai[row];
912     nrow = ailen[row];
913     low  = 0;
914     high = nrow;
915     for (l=0; l<n; l++) { /* loop over added columns */
916       col = in[l];
917       if (col <= lastcol)  low = 0;
918       else                high = nrow;
919       lastcol = col;
920       value   = v + k*(stepval+4 + l)*4;
921       while (high-low > 7) {
922         t = (low+high)/2;
923         if (rp[t] > col) high = t;
924         else             low  = t;
925       }
926       for (i=low; i<high; i++) {
927         if (rp[i] > col) break;
928         if (rp[i] == col) {
929           bap = ap +  16*i;
930           for (ii=0; ii<4; ii++,value+=stepval) {
931             for (jj=ii; jj<16; jj+=4) {
932               bap[jj] += *value++;
933             }
934           }
935           goto noinsert2;
936         }
937       }
938       N = nrow++ - 1;
939       high++; /* added new column index thus must search to one higher than before */
940       /* shift up all the later entries in this row */
941       for (ii=N; ii>=i; ii--) {
942         rp[ii+1] = rp[ii];
943         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
944       }
945       if (N >= i) {
946         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
947       }
948       rp[i] = col;
949       bap   = ap +  16*i;
950       for (ii=0; ii<4; ii++,value+=stepval) {
951         for (jj=ii; jj<16; jj+=4) {
952           bap[jj] = *value++;
953         }
954       }
955       noinsert2:;
956       low = i;
957     }
958     ailen[row] = nrow;
959   }
960   PetscFunctionReturnVoid();
961 }
962 
963 #if defined(PETSC_HAVE_FORTRAN_CAPS)
964 #define matsetvalues4_ MATSETVALUES4
965 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
966 #define matsetvalues4_ matsetvalues4
967 #endif
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatSetValues4_"
971 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
972 {
973   Mat         A  = *AA;
974   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
975   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
976   PetscInt    *ai=a->i,*ailen=a->ilen;
977   PetscInt    *aj=a->j,brow,bcol;
978   PetscInt    ridx,cidx,lastcol = -1;
979   MatScalar   *ap,value,*aa=a->a,*bap;
980 
981   PetscFunctionBegin;
982   for (k=0; k<m; k++) { /* loop over added rows */
983     row  = im[k]; brow = row/4;
984     rp   = aj + ai[brow];
985     ap   = aa + 16*ai[brow];
986     nrow = ailen[brow];
987     low  = 0;
988     high = nrow;
989     for (l=0; l<n; l++) { /* loop over added columns */
990       col   = in[l]; bcol = col/4;
991       ridx  = row % 4; cidx = col % 4;
992       value = v[l + k*n];
993       if (col <= lastcol)  low = 0;
994       else                high = nrow;
995       lastcol = col;
996       while (high-low > 7) {
997         t = (low+high)/2;
998         if (rp[t] > bcol) high = t;
999         else              low  = t;
1000       }
1001       for (i=low; i<high; i++) {
1002         if (rp[i] > bcol) break;
1003         if (rp[i] == bcol) {
1004           bap   = ap +  16*i + 4*cidx + ridx;
1005           *bap += value;
1006           goto noinsert1;
1007         }
1008       }
1009       N = nrow++ - 1;
1010       high++; /* added new column thus must search to one higher than before */
1011       /* shift up all the later entries in this row */
1012       for (ii=N; ii>=i; ii--) {
1013         rp[ii+1] = rp[ii];
1014         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1015       }
1016       if (N>=i) {
1017         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1018       }
1019       rp[i]                    = bcol;
1020       ap[16*i + 4*cidx + ridx] = value;
1021 noinsert1:;
1022       low = i;
1023     }
1024     ailen[brow] = nrow;
1025   }
1026   PetscFunctionReturnVoid();
1027 }
1028 
1029 /*
1030      Checks for missing diagonals
1031 */
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1035 {
1036   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1037   PetscErrorCode ierr;
1038   PetscInt       *diag,*jj = a->j,i;
1039 
1040   PetscFunctionBegin;
1041   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1042   *missing = PETSC_FALSE;
1043   if (A->rmap->n > 0 && !jj) {
1044     *missing = PETSC_TRUE;
1045     if (d) *d = 0;
1046     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1047   } else {
1048     diag = a->diag;
1049     for (i=0; i<a->mbs; i++) {
1050       if (jj[diag[i]] != i) {
1051         *missing = PETSC_TRUE;
1052         if (d) *d = i;
1053         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1054         break;
1055       }
1056     }
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1064 {
1065   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1066   PetscErrorCode ierr;
1067   PetscInt       i,j,m = a->mbs;
1068 
1069   PetscFunctionBegin;
1070   if (!a->diag) {
1071     ierr         = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1072     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1073     a->free_diag = PETSC_TRUE;
1074   }
1075   for (i=0; i<m; i++) {
1076     a->diag[i] = a->i[i+1];
1077     for (j=a->i[i]; j<a->i[i+1]; j++) {
1078       if (a->j[j] == i) {
1079         a->diag[i] = j;
1080         break;
1081       }
1082     }
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091 {
1092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1096 
1097   PetscFunctionBegin;
1098   *nn = n;
1099   if (!ia) PetscFunctionReturn(0);
1100   if (symmetric) {
1101     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1102     nz   = tia[n];
1103   } else {
1104     tia = a->i; tja = a->j;
1105   }
1106 
1107   if (!blockcompressed && bs > 1) {
1108     (*nn) *= bs;
1109     /* malloc & create the natural set of indices */
1110     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1111     if (n) {
1112       (*ia)[0] = 0;
1113       for (j=1; j<bs; j++) {
1114         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115       }
1116     }
1117 
1118     for (i=1; i<n; i++) {
1119       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120       for (j=1; j<bs; j++) {
1121         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122       }
1123     }
1124     if (n) {
1125       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126     }
1127 
1128     if (inja) {
1129       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1130       cnt = 0;
1131       for (i=0; i<n; i++) {
1132         for (j=0; j<bs; j++) {
1133           for (k=tia[i]; k<tia[i+1]; k++) {
1134             for (l=0; l<bs; l++) {
1135               (*ja)[cnt++] = bs*tja[k] + l;
1136             }
1137           }
1138         }
1139       }
1140     }
1141 
1142     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143       ierr = PetscFree(tia);CHKERRQ(ierr);
1144       ierr = PetscFree(tja);CHKERRQ(ierr);
1145     }
1146   } else if (oshift == 1) {
1147     if (symmetric) {
1148       nz = tia[A->rmap->n/bs];
1149       /*  add 1 to i and j indices */
1150       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151       *ia = tia;
1152       if (ja) {
1153         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154         *ja = tja;
1155       }
1156     } else {
1157       nz = a->i[A->rmap->n/bs];
1158       /* malloc space and  add 1 to i and j indices */
1159       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1160       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161       if (ja) {
1162         ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1163         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164       }
1165     }
1166   } else {
1167     *ia = tia;
1168     if (ja) *ja = tja;
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1176 {
1177   PetscErrorCode ierr;
1178 
1179   PetscFunctionBegin;
1180   if (!ia) PetscFunctionReturn(0);
1181   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1182     ierr = PetscFree(*ia);CHKERRQ(ierr);
1183     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1190 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1191 {
1192   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196 #if defined(PETSC_USE_LOG)
1197   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1198 #endif
1199   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1200   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1201   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1202   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1203   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1204   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1205   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1206   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1207   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1208   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1209   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1210   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1211   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1212 
1213   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1214   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1215   ierr = PetscFree(A->data);CHKERRQ(ierr);
1216 
1217   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1233 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1234 {
1235   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   switch (op) {
1240   case MAT_ROW_ORIENTED:
1241     a->roworiented = flg;
1242     break;
1243   case MAT_KEEP_NONZERO_PATTERN:
1244     a->keepnonzeropattern = flg;
1245     break;
1246   case MAT_NEW_NONZERO_LOCATIONS:
1247     a->nonew = (flg ? 0 : 1);
1248     break;
1249   case MAT_NEW_NONZERO_LOCATION_ERR:
1250     a->nonew = (flg ? -1 : 0);
1251     break;
1252   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1253     a->nonew = (flg ? -2 : 0);
1254     break;
1255   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1256     a->nounused = (flg ? -1 : 0);
1257     break;
1258   case MAT_NEW_DIAGONALS:
1259   case MAT_IGNORE_OFF_PROC_ENTRIES:
1260   case MAT_USE_HASH_TABLE:
1261     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1262     break;
1263   case MAT_SPD:
1264   case MAT_SYMMETRIC:
1265   case MAT_STRUCTURALLY_SYMMETRIC:
1266   case MAT_HERMITIAN:
1267   case MAT_SYMMETRY_ETERNAL:
1268     /* These options are handled directly by MatSetOption() */
1269     break;
1270   default:
1271     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1278 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1279 {
1280   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1281   PetscErrorCode ierr;
1282   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1283   MatScalar      *aa,*aa_i;
1284   PetscScalar    *v_i;
1285 
1286   PetscFunctionBegin;
1287   bs  = A->rmap->bs;
1288   ai  = a->i;
1289   aj  = a->j;
1290   aa  = a->a;
1291   bs2 = a->bs2;
1292 
1293   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1294 
1295   bn  = row/bs;   /* Block number */
1296   bp  = row % bs; /* Block Position */
1297   M   = ai[bn+1] - ai[bn];
1298   *nz = bs*M;
1299 
1300   if (v) {
1301     *v = 0;
1302     if (*nz) {
1303       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1304       for (i=0; i<M; i++) { /* for each block in the block row */
1305         v_i  = *v + i*bs;
1306         aa_i = aa + bs2*(ai[bn] + i);
1307         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1308       }
1309     }
1310   }
1311 
1312   if (idx) {
1313     *idx = 0;
1314     if (*nz) {
1315       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1316       for (i=0; i<M; i++) { /* for each block in the block row */
1317         idx_i = *idx + i*bs;
1318         itmp  = bs*aj[ai[bn] + i];
1319         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1320       }
1321     }
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1328 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1329 {
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1334   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1342 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1343 {
1344   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1345   Mat            C;
1346   PetscErrorCode ierr;
1347   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1348   PetscInt       *rows,*cols,bs2=a->bs2;
1349   MatScalar      *array;
1350 
1351   PetscFunctionBegin;
1352   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1353   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1354     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1355     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1356 
1357     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1358     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1359     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1360     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1361     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1362     ierr = PetscFree(col);CHKERRQ(ierr);
1363   } else {
1364     C = *B;
1365   }
1366 
1367   array = a->a;
1368   ierr  = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1369   for (i=0; i<mbs; i++) {
1370     cols[0] = i*bs;
1371     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1372     len = ai[i+1] - ai[i];
1373     for (j=0; j<len; j++) {
1374       rows[0] = (*aj++)*bs;
1375       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1376       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1377       array += bs2;
1378     }
1379   }
1380   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1381 
1382   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1383   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1384 
1385   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1386     *B = C;
1387   } else {
1388     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1389   }
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1395 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1396 {
1397   PetscErrorCode ierr;
1398   Mat            Btrans;
1399 
1400   PetscFunctionBegin;
1401   *f   = PETSC_FALSE;
1402   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1403   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1404   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1410 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1411 {
1412   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1415   int            fd;
1416   PetscScalar    *aa;
1417   FILE           *file;
1418 
1419   PetscFunctionBegin;
1420   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1421   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1422   col_lens[0] = MAT_FILE_CLASSID;
1423 
1424   col_lens[1] = A->rmap->N;
1425   col_lens[2] = A->cmap->n;
1426   col_lens[3] = a->nz*bs2;
1427 
1428   /* store lengths of each row and write (including header) to file */
1429   count = 0;
1430   for (i=0; i<a->mbs; i++) {
1431     for (j=0; j<bs; j++) {
1432       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1433     }
1434   }
1435   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1436   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1437 
1438   /* store column indices (zero start index) */
1439   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1440   count = 0;
1441   for (i=0; i<a->mbs; i++) {
1442     for (j=0; j<bs; j++) {
1443       for (k=a->i[i]; k<a->i[i+1]; k++) {
1444         for (l=0; l<bs; l++) {
1445           jj[count++] = bs*a->j[k] + l;
1446         }
1447       }
1448     }
1449   }
1450   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1451   ierr = PetscFree(jj);CHKERRQ(ierr);
1452 
1453   /* store nonzero values */
1454   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1455   count = 0;
1456   for (i=0; i<a->mbs; i++) {
1457     for (j=0; j<bs; j++) {
1458       for (k=a->i[i]; k<a->i[i+1]; k++) {
1459         for (l=0; l<bs; l++) {
1460           aa[count++] = a->a[bs2*k + l*bs + j];
1461         }
1462       }
1463     }
1464   }
1465   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1466   ierr = PetscFree(aa);CHKERRQ(ierr);
1467 
1468   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1469   if (file) {
1470     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1477 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1478 {
1479   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1480   PetscErrorCode    ierr;
1481   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1482   PetscViewerFormat format;
1483 
1484   PetscFunctionBegin;
1485   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1486   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1487     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1488   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1489     Mat aij;
1490     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1491     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1492     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1493   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1494       PetscFunctionReturn(0);
1495   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1496     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1497     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer);CHKERRQ(ierr);
1498     for (i=0; i<a->mbs; i++) {
1499       for (j=0; j<bs; j++) {
1500         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1501         for (k=a->i[i]; k<a->i[i+1]; k++) {
1502           for (l=0; l<bs; l++) {
1503 #if defined(PETSC_USE_COMPLEX)
1504             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1505               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1506                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1507             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1508               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1509                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1510             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1511               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1512             }
1513 #else
1514             if (a->a[bs2*k + l*bs + j] != 0.0) {
1515               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1516             }
1517 #endif
1518           }
1519         }
1520         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1521       }
1522     }
1523     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1524   } else {
1525     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1526     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer);CHKERRQ(ierr);
1527     for (i=0; i<a->mbs; i++) {
1528       for (j=0; j<bs; j++) {
1529         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1530         for (k=a->i[i]; k<a->i[i+1]; k++) {
1531           for (l=0; l<bs; l++) {
1532 #if defined(PETSC_USE_COMPLEX)
1533             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1534               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1535                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1536             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1537               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1538                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1539             } else {
1540               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1541             }
1542 #else
1543             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1544 #endif
1545           }
1546         }
1547         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1548       }
1549     }
1550     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1551   }
1552   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 #include <petscdraw.h>
1557 #undef __FUNCT__
1558 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1559 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1560 {
1561   Mat               A = (Mat) Aa;
1562   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1563   PetscErrorCode    ierr;
1564   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1565   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1566   MatScalar         *aa;
1567   PetscViewer       viewer;
1568   PetscViewerFormat format;
1569 
1570   PetscFunctionBegin;
1571   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1572   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1573 
1574   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1575 
1576   /* loop over matrix elements drawing boxes */
1577 
1578   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1579     color = PETSC_DRAW_BLUE;
1580     for (i=0,row=0; i<mbs; i++,row+=bs) {
1581       for (j=a->i[i]; j<a->i[i+1]; j++) {
1582         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1583         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1584         aa  = a->a + j*bs2;
1585         for (k=0; k<bs; k++) {
1586           for (l=0; l<bs; l++) {
1587             if (PetscRealPart(*aa++) >=  0.) continue;
1588             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1589           }
1590         }
1591       }
1592     }
1593     color = PETSC_DRAW_CYAN;
1594     for (i=0,row=0; i<mbs; i++,row+=bs) {
1595       for (j=a->i[i]; j<a->i[i+1]; j++) {
1596         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1597         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1598         aa  = a->a + j*bs2;
1599         for (k=0; k<bs; k++) {
1600           for (l=0; l<bs; l++) {
1601             if (PetscRealPart(*aa++) != 0.) continue;
1602             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1603           }
1604         }
1605       }
1606     }
1607     color = PETSC_DRAW_RED;
1608     for (i=0,row=0; i<mbs; i++,row+=bs) {
1609       for (j=a->i[i]; j<a->i[i+1]; j++) {
1610         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1611         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1612         aa  = a->a + j*bs2;
1613         for (k=0; k<bs; k++) {
1614           for (l=0; l<bs; l++) {
1615             if (PetscRealPart(*aa++) <= 0.) continue;
1616             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1617           }
1618         }
1619       }
1620     }
1621   } else {
1622     /* use contour shading to indicate magnitude of values */
1623     /* first determine max of all nonzero values */
1624     PetscDraw popup;
1625     PetscReal scale,maxv = 0.0;
1626 
1627     for (i=0; i<a->nz*a->bs2; i++) {
1628       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1629     }
1630     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1631     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1632     if (popup) {
1633       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1634     }
1635     for (i=0,row=0; i<mbs; i++,row+=bs) {
1636       for (j=a->i[i]; j<a->i[i+1]; j++) {
1637         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1638         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1639         aa  = a->a + j*bs2;
1640         for (k=0; k<bs; k++) {
1641           for (l=0; l<bs; l++) {
1642             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1643             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1644           }
1645         }
1646       }
1647     }
1648   }
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1654 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1655 {
1656   PetscErrorCode ierr;
1657   PetscReal      xl,yl,xr,yr,w,h;
1658   PetscDraw      draw;
1659   PetscBool      isnull;
1660 
1661   PetscFunctionBegin;
1662   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1663   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1664 
1665   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1666   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1667   xr  += w;    yr += h;  xl = -w;     yl = -h;
1668   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1669   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1670   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatView_SeqBAIJ"
1676 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1677 {
1678   PetscErrorCode ierr;
1679   PetscBool      iascii,isbinary,isdraw;
1680 
1681   PetscFunctionBegin;
1682   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1683   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1684   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1685   if (iascii) {
1686     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1687   } else if (isbinary) {
1688     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1689   } else if (isdraw) {
1690     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1691   } else {
1692     Mat B;
1693     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1694     ierr = MatView(B,viewer);CHKERRQ(ierr);
1695     ierr = MatDestroy(&B);CHKERRQ(ierr);
1696   }
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1703 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1704 {
1705   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1706   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1707   PetscInt    *ai = a->i,*ailen = a->ilen;
1708   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1709   MatScalar   *ap,*aa = a->a;
1710 
1711   PetscFunctionBegin;
1712   for (k=0; k<m; k++) { /* loop over rows */
1713     row = im[k]; brow = row/bs;
1714     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1715     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1716     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1717     nrow = ailen[brow];
1718     for (l=0; l<n; l++) { /* loop over columns */
1719       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1720       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1721       col  = in[l];
1722       bcol = col/bs;
1723       cidx = col%bs;
1724       ridx = row%bs;
1725       high = nrow;
1726       low  = 0; /* assume unsorted */
1727       while (high-low > 5) {
1728         t = (low+high)/2;
1729         if (rp[t] > bcol) high = t;
1730         else             low  = t;
1731       }
1732       for (i=low; i<high; i++) {
1733         if (rp[i] > bcol) break;
1734         if (rp[i] == bcol) {
1735           *v++ = ap[bs2*i+bs*cidx+ridx];
1736           goto finished;
1737         }
1738       }
1739       *v++ = 0.0;
1740 finished:;
1741     }
1742   }
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1748 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1749 {
1750   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1751   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1752   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1753   PetscErrorCode    ierr;
1754   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1755   PetscBool         roworiented=a->roworiented;
1756   const PetscScalar *value     = v;
1757   MatScalar         *ap,*aa = a->a,*bap;
1758 
1759   PetscFunctionBegin;
1760   if (roworiented) {
1761     stepval = (n-1)*bs;
1762   } else {
1763     stepval = (m-1)*bs;
1764   }
1765   for (k=0; k<m; k++) { /* loop over added rows */
1766     row = im[k];
1767     if (row < 0) continue;
1768 #if defined(PETSC_USE_DEBUG)
1769     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1770 #endif
1771     rp   = aj + ai[row];
1772     ap   = aa + bs2*ai[row];
1773     rmax = imax[row];
1774     nrow = ailen[row];
1775     low  = 0;
1776     high = nrow;
1777     for (l=0; l<n; l++) { /* loop over added columns */
1778       if (in[l] < 0) continue;
1779 #if defined(PETSC_USE_DEBUG)
1780       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1781 #endif
1782       col = in[l];
1783       if (roworiented) {
1784         value = v + (k*(stepval+bs) + l)*bs;
1785       } else {
1786         value = v + (l*(stepval+bs) + k)*bs;
1787       }
1788       if (col <= lastcol) low = 0;
1789       else high = nrow;
1790       lastcol = col;
1791       while (high-low > 7) {
1792         t = (low+high)/2;
1793         if (rp[t] > col) high = t;
1794         else             low  = t;
1795       }
1796       for (i=low; i<high; i++) {
1797         if (rp[i] > col) break;
1798         if (rp[i] == col) {
1799           bap = ap +  bs2*i;
1800           if (roworiented) {
1801             if (is == ADD_VALUES) {
1802               for (ii=0; ii<bs; ii++,value+=stepval) {
1803                 for (jj=ii; jj<bs2; jj+=bs) {
1804                   bap[jj] += *value++;
1805                 }
1806               }
1807             } else {
1808               for (ii=0; ii<bs; ii++,value+=stepval) {
1809                 for (jj=ii; jj<bs2; jj+=bs) {
1810                   bap[jj] = *value++;
1811                 }
1812               }
1813             }
1814           } else {
1815             if (is == ADD_VALUES) {
1816               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1817                 for (jj=0; jj<bs; jj++) {
1818                   bap[jj] += value[jj];
1819                 }
1820                 bap += bs;
1821               }
1822             } else {
1823               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1824                 for (jj=0; jj<bs; jj++) {
1825                   bap[jj]  = value[jj];
1826                 }
1827                 bap += bs;
1828               }
1829             }
1830           }
1831           goto noinsert2;
1832         }
1833       }
1834       if (nonew == 1) goto noinsert2;
1835       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1836       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1837       N = nrow++ - 1; high++;
1838       /* shift up all the later entries in this row */
1839       for (ii=N; ii>=i; ii--) {
1840         rp[ii+1] = rp[ii];
1841         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1842       }
1843       if (N >= i) {
1844         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1845       }
1846       rp[i] = col;
1847       bap   = ap +  bs2*i;
1848       if (roworiented) {
1849         for (ii=0; ii<bs; ii++,value+=stepval) {
1850           for (jj=ii; jj<bs2; jj+=bs) {
1851             bap[jj] = *value++;
1852           }
1853         }
1854       } else {
1855         for (ii=0; ii<bs; ii++,value+=stepval) {
1856           for (jj=0; jj<bs; jj++) {
1857             *bap++ = *value++;
1858           }
1859         }
1860       }
1861 noinsert2:;
1862       low = i;
1863     }
1864     ailen[row] = nrow;
1865   }
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1871 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1872 {
1873   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1874   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1875   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1876   PetscErrorCode ierr;
1877   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1878   MatScalar      *aa  = a->a,*ap;
1879   PetscReal      ratio=0.6;
1880 
1881   PetscFunctionBegin;
1882   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1883 
1884   if (m) rmax = ailen[0];
1885   for (i=1; i<mbs; i++) {
1886     /* move each row back by the amount of empty slots (fshift) before it*/
1887     fshift += imax[i-1] - ailen[i-1];
1888     rmax    = PetscMax(rmax,ailen[i]);
1889     if (fshift) {
1890       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1891       N  = ailen[i];
1892       for (j=0; j<N; j++) {
1893         ip[j-fshift] = ip[j];
1894 
1895         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1896       }
1897     }
1898     ai[i] = ai[i-1] + ailen[i-1];
1899   }
1900   if (mbs) {
1901     fshift += imax[mbs-1] - ailen[mbs-1];
1902     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1903   }
1904 
1905   /* reset ilen and imax for each row */
1906   a->nonzerorowcnt = 0;
1907   for (i=0; i<mbs; i++) {
1908     ailen[i] = imax[i] = ai[i+1] - ai[i];
1909     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1910   }
1911   a->nz = ai[mbs];
1912 
1913   /* diagonals may have moved, so kill the diagonal pointers */
1914   a->idiagvalid = PETSC_FALSE;
1915   if (fshift && a->diag) {
1916     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1917     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1918     a->diag = 0;
1919   }
1920   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1921   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1922   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1923   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1924 
1925   A->info.mallocs    += a->reallocs;
1926   a->reallocs         = 0;
1927   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1928 
1929   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1930 
1931   A->same_nonzero = PETSC_TRUE;
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 /*
1936    This function returns an array of flags which indicate the locations of contiguous
1937    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1938    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1939    Assume: sizes should be long enough to hold all the values.
1940 */
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1943 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1944 {
1945   PetscInt  i,j,k,row;
1946   PetscBool flg;
1947 
1948   PetscFunctionBegin;
1949   for (i=0,j=0; i<n; j++) {
1950     row = idx[i];
1951     if (row%bs!=0) { /* Not the begining of a block */
1952       sizes[j] = 1;
1953       i++;
1954     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1955       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1956       i++;
1957     } else { /* Begining of the block, so check if the complete block exists */
1958       flg = PETSC_TRUE;
1959       for (k=1; k<bs; k++) {
1960         if (row+k != idx[i+k]) { /* break in the block */
1961           flg = PETSC_FALSE;
1962           break;
1963         }
1964       }
1965       if (flg) { /* No break in the bs */
1966         sizes[j] = bs;
1967         i       += bs;
1968       } else {
1969         sizes[j] = 1;
1970         i++;
1971       }
1972     }
1973   }
1974   *bs_max = j;
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 #undef __FUNCT__
1979 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1980 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1981 {
1982   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1983   PetscErrorCode    ierr;
1984   PetscInt          i,j,k,count,*rows;
1985   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1986   PetscScalar       zero = 0.0;
1987   MatScalar         *aa;
1988   const PetscScalar *xx;
1989   PetscScalar       *bb;
1990 
1991   PetscFunctionBegin;
1992   /* fix right hand side if needed */
1993   if (x && b) {
1994     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1995     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1996     for (i=0; i<is_n; i++) {
1997       bb[is_idx[i]] = diag*xx[is_idx[i]];
1998     }
1999     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2000     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2001   }
2002 
2003   /* Make a copy of the IS and  sort it */
2004   /* allocate memory for rows,sizes */
2005   ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2006 
2007   /* copy IS values to rows, and sort them */
2008   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2009   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2010 
2011   if (baij->keepnonzeropattern) {
2012     for (i=0; i<is_n; i++) sizes[i] = 1;
2013     bs_max          = is_n;
2014     A->same_nonzero = PETSC_TRUE;
2015   } else {
2016     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2017 
2018     A->same_nonzero = PETSC_FALSE;
2019   }
2020 
2021   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2022     row = rows[j];
2023     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2024     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2025     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2026     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2027       if (diag != (PetscScalar)0.0) {
2028         if (baij->ilen[row/bs] > 0) {
2029           baij->ilen[row/bs]       = 1;
2030           baij->j[baij->i[row/bs]] = row/bs;
2031 
2032           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2033         }
2034         /* Now insert all the diagonal values for this bs */
2035         for (k=0; k<bs; k++) {
2036           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2037         }
2038       } else { /* (diag == 0.0) */
2039         baij->ilen[row/bs] = 0;
2040       } /* end (diag == 0.0) */
2041     } else { /* (sizes[i] != bs) */
2042 #if defined(PETSC_USE_DEBUG)
2043       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2044 #endif
2045       for (k=0; k<count; k++) {
2046         aa[0] =  zero;
2047         aa   += bs;
2048       }
2049       if (diag != (PetscScalar)0.0) {
2050         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2051       }
2052     }
2053   }
2054 
2055   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2056   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 #undef __FUNCT__
2061 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2062 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2063 {
2064   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2065   PetscErrorCode    ierr;
2066   PetscInt          i,j,k,count;
2067   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2068   PetscScalar       zero = 0.0;
2069   MatScalar         *aa;
2070   const PetscScalar *xx;
2071   PetscScalar       *bb;
2072   PetscBool         *zeroed,vecs = PETSC_FALSE;
2073 
2074   PetscFunctionBegin;
2075   /* fix right hand side if needed */
2076   if (x && b) {
2077     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2078     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2079     vecs = PETSC_TRUE;
2080   }
2081   A->same_nonzero = PETSC_TRUE;
2082 
2083   /* zero the columns */
2084   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2085   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2086   for (i=0; i<is_n; i++) {
2087     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2088     zeroed[is_idx[i]] = PETSC_TRUE;
2089   }
2090   for (i=0; i<A->rmap->N; i++) {
2091     if (!zeroed[i]) {
2092       row = i/bs;
2093       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2094         for (k=0; k<bs; k++) {
2095           col = bs*baij->j[j] + k;
2096           if (zeroed[col]) {
2097             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2098             if (vecs) bb[i] -= aa[0]*xx[col];
2099             aa[0] = 0.0;
2100           }
2101         }
2102       }
2103     } else if (vecs) bb[i] = diag*xx[i];
2104   }
2105   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2106   if (vecs) {
2107     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2108     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2109   }
2110 
2111   /* zero the rows */
2112   for (i=0; i<is_n; i++) {
2113     row   = is_idx[i];
2114     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2115     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2116     for (k=0; k<count; k++) {
2117       aa[0] =  zero;
2118       aa   += bs;
2119     }
2120     if (diag != (PetscScalar)0.0) {
2121       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2122     }
2123   }
2124   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 #undef __FUNCT__
2129 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2130 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2131 {
2132   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2133   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2134   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2135   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2136   PetscErrorCode ierr;
2137   PetscInt       ridx,cidx,bs2=a->bs2;
2138   PetscBool      roworiented=a->roworiented;
2139   MatScalar      *ap,value,*aa=a->a,*bap;
2140 
2141   PetscFunctionBegin;
2142   if (v) PetscValidScalarPointer(v,6);
2143   for (k=0; k<m; k++) { /* loop over added rows */
2144     row  = im[k];
2145     brow = row/bs;
2146     if (row < 0) continue;
2147 #if defined(PETSC_USE_DEBUG)
2148     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2149 #endif
2150     rp   = aj + ai[brow];
2151     ap   = aa + bs2*ai[brow];
2152     rmax = imax[brow];
2153     nrow = ailen[brow];
2154     low  = 0;
2155     high = nrow;
2156     for (l=0; l<n; l++) { /* loop over added columns */
2157       if (in[l] < 0) continue;
2158 #if defined(PETSC_USE_DEBUG)
2159       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2160 #endif
2161       col  = in[l]; bcol = col/bs;
2162       ridx = row % bs; cidx = col % bs;
2163       if (roworiented) {
2164         value = v[l + k*n];
2165       } else {
2166         value = v[k + l*m];
2167       }
2168       if (col <= lastcol) low = 0; else high = nrow;
2169       lastcol = col;
2170       while (high-low > 7) {
2171         t = (low+high)/2;
2172         if (rp[t] > bcol) high = t;
2173         else              low  = t;
2174       }
2175       for (i=low; i<high; i++) {
2176         if (rp[i] > bcol) break;
2177         if (rp[i] == bcol) {
2178           bap = ap +  bs2*i + bs*cidx + ridx;
2179           if (is == ADD_VALUES) *bap += value;
2180           else                  *bap  = value;
2181           goto noinsert1;
2182         }
2183       }
2184       if (nonew == 1) goto noinsert1;
2185       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2186       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2187       N = nrow++ - 1; high++;
2188       /* shift up all the later entries in this row */
2189       for (ii=N; ii>=i; ii--) {
2190         rp[ii+1] = rp[ii];
2191         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2192       }
2193       if (N>=i) {
2194         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2195       }
2196       rp[i]                      = bcol;
2197       ap[bs2*i + bs*cidx + ridx] = value;
2198       a->nz++;
2199 noinsert1:;
2200       low = i;
2201     }
2202     ailen[brow] = nrow;
2203   }
2204   A->same_nonzero = PETSC_FALSE;
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 #undef __FUNCT__
2209 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2210 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2211 {
2212   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2213   Mat            outA;
2214   PetscErrorCode ierr;
2215   PetscBool      row_identity,col_identity;
2216 
2217   PetscFunctionBegin;
2218   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2219   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2220   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2221   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2222 
2223   outA            = inA;
2224   inA->factortype = MAT_FACTOR_LU;
2225 
2226   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2227 
2228   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2229   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2230   a->row = row;
2231   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2232   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2233   a->col = col;
2234 
2235   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2236   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2237   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2238   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2239 
2240   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2241   if (!a->solve_work) {
2242     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2243     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2244   }
2245   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 #undef __FUNCT__
2250 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2251 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2252 {
2253   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2254   PetscInt    i,nz,mbs;
2255 
2256   PetscFunctionBegin;
2257   nz  = baij->maxnz;
2258   mbs = baij->mbs;
2259   for (i=0; i<nz; i++) {
2260     baij->j[i] = indices[i];
2261   }
2262   baij->nz = nz;
2263   for (i=0; i<mbs; i++) {
2264     baij->ilen[i] = baij->imax[i];
2265   }
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2271 /*@
2272     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2273        in the matrix.
2274 
2275   Input Parameters:
2276 +  mat - the SeqBAIJ matrix
2277 -  indices - the column indices
2278 
2279   Level: advanced
2280 
2281   Notes:
2282     This can be called if you have precomputed the nonzero structure of the
2283   matrix and want to provide it to the matrix object to improve the performance
2284   of the MatSetValues() operation.
2285 
2286     You MUST have set the correct numbers of nonzeros per row in the call to
2287   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2288 
2289     MUST be called before any calls to MatSetValues();
2290 
2291 @*/
2292 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2293 {
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2298   PetscValidPointer(indices,2);
2299   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2305 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2306 {
2307   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2308   PetscErrorCode ierr;
2309   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2310   PetscReal      atmp;
2311   PetscScalar    *x,zero = 0.0;
2312   MatScalar      *aa;
2313   PetscInt       ncols,brow,krow,kcol;
2314 
2315   PetscFunctionBegin;
2316   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2317   bs  = A->rmap->bs;
2318   aa  = a->a;
2319   ai  = a->i;
2320   aj  = a->j;
2321   mbs = a->mbs;
2322 
2323   ierr = VecSet(v,zero);CHKERRQ(ierr);
2324   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2325   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2326   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2327   for (i=0; i<mbs; i++) {
2328     ncols = ai[1] - ai[0]; ai++;
2329     brow  = bs*i;
2330     for (j=0; j<ncols; j++) {
2331       for (kcol=0; kcol<bs; kcol++) {
2332         for (krow=0; krow<bs; krow++) {
2333           atmp = PetscAbsScalar(*aa);aa++;
2334           row  = brow + krow;   /* row index */
2335           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2336           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2337         }
2338       }
2339       aj++;
2340     }
2341   }
2342   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatCopy_SeqBAIJ"
2348 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2349 {
2350   PetscErrorCode ierr;
2351 
2352   PetscFunctionBegin;
2353   /* If the two matrices have the same copy implementation, use fast copy. */
2354   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2355     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2356     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2357     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2358 
2359     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2360     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2361     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2362   } else {
2363     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2364   }
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 #undef __FUNCT__
2369 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2370 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2371 {
2372   PetscErrorCode ierr;
2373 
2374   PetscFunctionBegin;
2375   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2381 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2382 {
2383   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2384 
2385   PetscFunctionBegin;
2386   *array = a->a;
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2392 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2393 {
2394   PetscFunctionBegin;
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 #undef __FUNCT__
2399 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2400 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2401 {
2402   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2403   PetscErrorCode ierr;
2404   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2405   PetscBLASInt   one=1;
2406 
2407   PetscFunctionBegin;
2408   if (str == SAME_NONZERO_PATTERN) {
2409     PetscScalar  alpha = a;
2410     PetscBLASInt bnz;
2411     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2412     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2413   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2414     if (y->xtoy && y->XtoY != X) {
2415       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2416       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2417     }
2418     if (!y->xtoy) { /* get xtoy */
2419       ierr    = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr);
2420       y->XtoY = X;
2421       ierr    = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2422     }
2423     for (i=0; i<x->nz; i++) {
2424       j = 0;
2425       while (j < bs2) {
2426         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2427         j++;
2428       }
2429     }
2430     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2431   } else {
2432     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2433   }
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 #undef __FUNCT__
2438 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2439 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2440 {
2441   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2442   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2443   MatScalar   *aa = a->a;
2444 
2445   PetscFunctionBegin;
2446   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 #undef __FUNCT__
2451 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2452 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2453 {
2454   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2455   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2456   MatScalar   *aa = a->a;
2457 
2458   PetscFunctionBegin;
2459   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2460   PetscFunctionReturn(0);
2461 }
2462 
2463 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2464 
2465 #undef __FUNCT__
2466 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2467 /*
2468     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2469 */
2470 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2471 {
2472   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2473   PetscErrorCode ierr;
2474   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2475   PetscInt       nz = a->i[m],row,*jj,mr,col;
2476 
2477   PetscFunctionBegin;
2478   *nn = n;
2479   if (!ia) PetscFunctionReturn(0);
2480   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2481   else {
2482     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2483     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2484     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2485     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2486     jj   = a->j;
2487     for (i=0; i<nz; i++) {
2488       collengths[jj[i]]++;
2489     }
2490     cia[0] = oshift;
2491     for (i=0; i<n; i++) {
2492       cia[i+1] = cia[i] + collengths[i];
2493     }
2494     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2495     jj   = a->j;
2496     for (row=0; row<m; row++) {
2497       mr = a->i[row+1] - a->i[row];
2498       for (i=0; i<mr; i++) {
2499         col = *jj++;
2500 
2501         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2502       }
2503     }
2504     ierr = PetscFree(collengths);CHKERRQ(ierr);
2505     *ia  = cia; *ja = cja;
2506   }
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2512 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2513 {
2514   PetscErrorCode ierr;
2515 
2516   PetscFunctionBegin;
2517   if (!ia) PetscFunctionReturn(0);
2518   ierr = PetscFree(*ia);CHKERRQ(ierr);
2519   ierr = PetscFree(*ja);CHKERRQ(ierr);
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 #undef __FUNCT__
2524 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2525 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2526 {
2527   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
2528   PetscErrorCode ierr;
2529   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2530   PetscScalar    dx,*y,*xx,*w3_array;
2531   PetscScalar    *vscale_array;
2532   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2533   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
2534   void           *fctx   = coloring->fctx;
2535   PetscBool      flg     = PETSC_FALSE;
2536   PetscInt       ctype   = coloring->ctype,N,col_start=0,col_end=0;
2537   Vec            x1_tmp;
2538 
2539   PetscFunctionBegin;
2540   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2541   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2542   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2543   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2544 
2545   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2546   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2547   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
2548   if (flg) {
2549     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2550   } else {
2551     PetscBool assembled;
2552     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2553     if (assembled) {
2554       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2555     }
2556   }
2557 
2558   x1_tmp = x1;
2559   if (!coloring->vscale) {
2560     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2561   }
2562 
2563   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2564     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2565   }
2566   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2567 
2568   /* Set w1 = F(x1) */
2569   if (!coloring->fset) {
2570     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2571     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2572     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2573   } else {
2574     coloring->fset = PETSC_FALSE;
2575   }
2576 
2577   if (!coloring->w3) {
2578     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2579     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
2580   }
2581   w3 = coloring->w3;
2582 
2583   /* Compute all the local scale factors, including ghost points */
2584   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2585   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2586   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2587   if (ctype == IS_COLORING_GHOSTED) {
2588     col_start = 0; col_end = N;
2589   } else if (ctype == IS_COLORING_GLOBAL) {
2590     xx           = xx - start;
2591     vscale_array = vscale_array - start;
2592     col_start    = start; col_end = N + start;
2593   }
2594   for (col=col_start; col<col_end; col++) {
2595     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2596     if (coloring->htype[0] == 'w') {
2597       dx = 1.0 + unorm;
2598     } else {
2599       dx = xx[col];
2600     }
2601     if (dx == (PetscScalar)0.0) dx = 1.0;
2602     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2603     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2604     dx               *= epsilon;
2605     vscale_array[col] = (PetscScalar)1.0/dx;
2606   }
2607   if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start;
2608   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2609   if (ctype == IS_COLORING_GLOBAL) {
2610     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2611     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2612   }
2613   if (coloring->vscaleforrow) {
2614     vscaleforrow = coloring->vscaleforrow;
2615   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2616 
2617   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2618   /*
2619     Loop over each color
2620   */
2621   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2622   for (k=0; k<coloring->ncolors; k++) {
2623     coloring->currentcolor = k;
2624     for (i=0; i<bs; i++) {
2625       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2626       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2627       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2628       /*
2629         Loop over each column associated with color
2630         adding the perturbation to the vector w3.
2631       */
2632       for (l=0; l<coloring->ncolumns[k]; l++) {
2633         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2634         if (coloring->htype[0] == 'w') {
2635           dx = 1.0 + unorm;
2636         } else {
2637           dx = xx[col];
2638         }
2639         if (dx == (PetscScalar)0.0) dx = 1.0;
2640         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2641         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2642         dx *= epsilon;
2643         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2644         w3_array[col] += dx;
2645       }
2646       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2647       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2648 
2649       /*
2650         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2651         w2 = F(x1 + dx) - F(x1)
2652       */
2653       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2654       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2655       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2656       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2657 
2658       /*
2659         Loop over rows of vector, putting results into Jacobian matrix
2660       */
2661       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2662       for (l=0; l<coloring->nrows[k]; l++) {
2663         row = bs*coloring->rows[k][l];                /* local row index */
2664         col = i + bs*coloring->columnsforrow[k][l];       /* global column index */
2665         for (j=0; j<bs; j++) {
2666           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2667           srows[j]  = row + start + j;
2668         }
2669         ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2670       }
2671       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2672     }
2673   } /* endof for each color */
2674   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2675   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2676   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2677   ierr = PetscFree(srows);CHKERRQ(ierr);
2678 
2679   coloring->currentcolor = -1;
2680 
2681   ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2682   ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2683   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2684   PetscFunctionReturn(0);
2685 }
2686 
2687 /* -------------------------------------------------------------------*/
2688 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2689                                        MatGetRow_SeqBAIJ,
2690                                        MatRestoreRow_SeqBAIJ,
2691                                        MatMult_SeqBAIJ_N,
2692                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2693                                        MatMultTranspose_SeqBAIJ,
2694                                        MatMultTransposeAdd_SeqBAIJ,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                /* 10*/ 0,
2699                                        MatLUFactor_SeqBAIJ,
2700                                        0,
2701                                        0,
2702                                        MatTranspose_SeqBAIJ,
2703                                /* 15*/ MatGetInfo_SeqBAIJ,
2704                                        MatEqual_SeqBAIJ,
2705                                        MatGetDiagonal_SeqBAIJ,
2706                                        MatDiagonalScale_SeqBAIJ,
2707                                        MatNorm_SeqBAIJ,
2708                                /* 20*/ 0,
2709                                        MatAssemblyEnd_SeqBAIJ,
2710                                        MatSetOption_SeqBAIJ,
2711                                        MatZeroEntries_SeqBAIJ,
2712                                /* 24*/ MatZeroRows_SeqBAIJ,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                /* 29*/ MatSetUp_SeqBAIJ,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                        0,
2722                                /* 34*/ MatDuplicate_SeqBAIJ,
2723                                        0,
2724                                        0,
2725                                        MatILUFactor_SeqBAIJ,
2726                                        0,
2727                                /* 39*/ MatAXPY_SeqBAIJ,
2728                                        MatGetSubMatrices_SeqBAIJ,
2729                                        MatIncreaseOverlap_SeqBAIJ,
2730                                        MatGetValues_SeqBAIJ,
2731                                        MatCopy_SeqBAIJ,
2732                                /* 44*/ 0,
2733                                        MatScale_SeqBAIJ,
2734                                        0,
2735                                        0,
2736                                        MatZeroRowsColumns_SeqBAIJ,
2737                                /* 49*/ 0,
2738                                        MatGetRowIJ_SeqBAIJ,
2739                                        MatRestoreRowIJ_SeqBAIJ,
2740                                        MatGetColumnIJ_SeqBAIJ,
2741                                        MatRestoreColumnIJ_SeqBAIJ,
2742                                /* 54*/ MatFDColoringCreate_SeqAIJ,
2743                                        0,
2744                                        0,
2745                                        0,
2746                                        MatSetValuesBlocked_SeqBAIJ,
2747                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2748                                        MatDestroy_SeqBAIJ,
2749                                        MatView_SeqBAIJ,
2750                                        0,
2751                                        0,
2752                                /* 64*/ 0,
2753                                        0,
2754                                        0,
2755                                        0,
2756                                        0,
2757                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2758                                        0,
2759                                        MatConvert_Basic,
2760                                        0,
2761                                        0,
2762                                /* 74*/ 0,
2763                                        MatFDColoringApply_BAIJ,
2764                                        0,
2765                                        0,
2766                                        0,
2767                                /* 79*/ 0,
2768                                        0,
2769                                        0,
2770                                        0,
2771                                        MatLoad_SeqBAIJ,
2772                                /* 84*/ 0,
2773                                        0,
2774                                        0,
2775                                        0,
2776                                        0,
2777                                /* 89*/ 0,
2778                                        0,
2779                                        0,
2780                                        0,
2781                                        0,
2782                                /* 94*/ 0,
2783                                        0,
2784                                        0,
2785                                        0,
2786                                        0,
2787                                /* 99*/ 0,
2788                                        0,
2789                                        0,
2790                                        0,
2791                                        0,
2792                                /*104*/ 0,
2793                                        MatRealPart_SeqBAIJ,
2794                                        MatImaginaryPart_SeqBAIJ,
2795                                        0,
2796                                        0,
2797                                /*109*/ 0,
2798                                        0,
2799                                        0,
2800                                        0,
2801                                        MatMissingDiagonal_SeqBAIJ,
2802                                /*114*/ 0,
2803                                        0,
2804                                        0,
2805                                        0,
2806                                        0,
2807                                /*119*/ 0,
2808                                        0,
2809                                        MatMultHermitianTranspose_SeqBAIJ,
2810                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2811                                        0,
2812                                /*124*/ 0,
2813                                        0,
2814                                        MatInvertBlockDiagonal_SeqBAIJ,
2815                                        0,
2816                                        0,
2817                                /*129*/ 0,
2818                                        0,
2819                                        0,
2820                                        0,
2821                                        0,
2822                                /*134*/ 0,
2823                                        0,
2824                                        0,
2825                                        0,
2826                                        0,
2827                                /*139*/ 0,
2828                                        0,
2829                                        0
2830 };
2831 
2832 #undef __FUNCT__
2833 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2834 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2835 {
2836   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2837   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2838   PetscErrorCode ierr;
2839 
2840   PetscFunctionBegin;
2841   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2842 
2843   /* allocate space for values if not already there */
2844   if (!aij->saved_values) {
2845     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2846     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2847   }
2848 
2849   /* copy values over */
2850   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 #undef __FUNCT__
2855 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2856 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2857 {
2858   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2859   PetscErrorCode ierr;
2860   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2861 
2862   PetscFunctionBegin;
2863   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2864   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2865 
2866   /* copy values over */
2867   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2868   PetscFunctionReturn(0);
2869 }
2870 
2871 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2872 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2873 
2874 #undef __FUNCT__
2875 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2876 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2877 {
2878   Mat_SeqBAIJ    *b;
2879   PetscErrorCode ierr;
2880   PetscInt       i,mbs,nbs,bs2;
2881   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2882 
2883   PetscFunctionBegin;
2884   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2885   if (nz == MAT_SKIP_ALLOCATION) {
2886     skipallocation = PETSC_TRUE;
2887     nz             = 0;
2888   }
2889 
2890   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2891   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2892   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2893   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2894   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2895 
2896   B->preallocated = PETSC_TRUE;
2897 
2898   mbs = B->rmap->n/bs;
2899   nbs = B->cmap->n/bs;
2900   bs2 = bs*bs;
2901 
2902   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2903 
2904   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2905   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2906   if (nnz) {
2907     for (i=0; i<mbs; i++) {
2908       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2909       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2910     }
2911   }
2912 
2913   b    = (Mat_SeqBAIJ*)B->data;
2914   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2915   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
2916   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2917 
2918   if (!flg) {
2919     switch (bs) {
2920     case 1:
2921       B->ops->mult    = MatMult_SeqBAIJ_1;
2922       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2923       break;
2924     case 2:
2925       B->ops->mult    = MatMult_SeqBAIJ_2;
2926       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2927       break;
2928     case 3:
2929       B->ops->mult    = MatMult_SeqBAIJ_3;
2930       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2931       break;
2932     case 4:
2933       B->ops->mult    = MatMult_SeqBAIJ_4;
2934       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2935       break;
2936     case 5:
2937       B->ops->mult    = MatMult_SeqBAIJ_5;
2938       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2939       break;
2940     case 6:
2941       B->ops->mult    = MatMult_SeqBAIJ_6;
2942       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2943       break;
2944     case 7:
2945       B->ops->mult    = MatMult_SeqBAIJ_7;
2946       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2947       break;
2948     case 15:
2949       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2950       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2951       break;
2952     default:
2953       B->ops->mult    = MatMult_SeqBAIJ_N;
2954       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2955       break;
2956     }
2957   }
2958   B->ops->sor = MatSOR_SeqBAIJ;
2959   b->mbs = mbs;
2960   b->nbs = nbs;
2961   if (!skipallocation) {
2962     if (!b->imax) {
2963       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2964       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2965 
2966       b->free_imax_ilen = PETSC_TRUE;
2967     }
2968     /* b->ilen will count nonzeros in each block row so far. */
2969     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2970     if (!nnz) {
2971       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2972       else if (nz < 0) nz = 1;
2973       for (i=0; i<mbs; i++) b->imax[i] = nz;
2974       nz = nz*mbs;
2975     } else {
2976       nz = 0;
2977       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2978     }
2979 
2980     /* allocate the matrix space */
2981     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2982     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2983     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2984     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2985     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2986 
2987     b->singlemalloc = PETSC_TRUE;
2988     b->i[0]         = 0;
2989     for (i=1; i<mbs+1; i++) {
2990       b->i[i] = b->i[i-1] + b->imax[i-1];
2991     }
2992     b->free_a  = PETSC_TRUE;
2993     b->free_ij = PETSC_TRUE;
2994 #if defined(PETSC_THREADCOMM_ACTIVE)
2995     ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr);
2996 #endif
2997   } else {
2998     b->free_a  = PETSC_FALSE;
2999     b->free_ij = PETSC_FALSE;
3000   }
3001 
3002   b->bs2              = bs2;
3003   b->mbs              = mbs;
3004   b->nz               = 0;
3005   b->maxnz            = nz;
3006   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3007   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3008   PetscFunctionReturn(0);
3009 }
3010 
3011 #undef __FUNCT__
3012 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3013 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3014 {
3015   PetscInt       i,m,nz,nz_max=0,*nnz;
3016   PetscScalar    *values=0;
3017   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3018   PetscErrorCode ierr;
3019 
3020   PetscFunctionBegin;
3021   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3022   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3023   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3024   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3025   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3026   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3027   m    = B->rmap->n/bs;
3028 
3029   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3030   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3031   for (i=0; i<m; i++) {
3032     nz = ii[i+1]- ii[i];
3033     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3034     nz_max = PetscMax(nz_max, nz);
3035     nnz[i] = nz;
3036   }
3037   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3038   ierr = PetscFree(nnz);CHKERRQ(ierr);
3039 
3040   values = (PetscScalar*)V;
3041   if (!values) {
3042     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3043     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3044   }
3045   for (i=0; i<m; i++) {
3046     PetscInt          ncols  = ii[i+1] - ii[i];
3047     const PetscInt    *icols = jj + ii[i];
3048     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3049     if (!roworiented) {
3050       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3051     } else {
3052       PetscInt j;
3053       for (j=0; j<ncols; j++) {
3054         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3055         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3056       }
3057     }
3058   }
3059   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3060   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3061   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3062   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3063   PetscFunctionReturn(0);
3064 }
3065 
3066 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3067 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3068 #if defined(PETSC_HAVE_MUMPS)
3069 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3070 #endif
3071 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
3072 
3073 /*MC
3074    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3075    block sparse compressed row format.
3076 
3077    Options Database Keys:
3078 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3079 
3080   Level: beginner
3081 
3082 .seealso: MatCreateSeqBAIJ()
3083 M*/
3084 
3085 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3086 
3087 #undef __FUNCT__
3088 #define __FUNCT__ "MatCreate_SeqBAIJ"
3089 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3090 {
3091   PetscErrorCode ierr;
3092   PetscMPIInt    size;
3093   Mat_SeqBAIJ    *b;
3094 
3095   PetscFunctionBegin;
3096   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3097   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3098 
3099   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3100   B->data = (void*)b;
3101   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3102 
3103   b->row          = 0;
3104   b->col          = 0;
3105   b->icol         = 0;
3106   b->reallocs     = 0;
3107   b->saved_values = 0;
3108 
3109   b->roworiented        = PETSC_TRUE;
3110   b->nonew              = 0;
3111   b->diag               = 0;
3112   b->solve_work         = 0;
3113   b->mult_work          = 0;
3114   B->spptr              = 0;
3115   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3116   b->keepnonzeropattern = PETSC_FALSE;
3117   b->xtoy               = 0;
3118   b->XtoY               = 0;
3119   B->same_nonzero       = PETSC_FALSE;
3120 
3121   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3122   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3123   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3124 #if defined(PETSC_HAVE_MUMPS)
3125   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3126 #endif
3127   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3128   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3129   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3130   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3131   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3132   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3133   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3134   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3135   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3136   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3137   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3138   PetscFunctionReturn(0);
3139 }
3140 
3141 #undef __FUNCT__
3142 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3143 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3144 {
3145   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3146   PetscErrorCode ierr;
3147   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3148 
3149   PetscFunctionBegin;
3150   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3151 
3152   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3153     c->imax           = a->imax;
3154     c->ilen           = a->ilen;
3155     c->free_imax_ilen = PETSC_FALSE;
3156   } else {
3157     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3158     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3159     for (i=0; i<mbs; i++) {
3160       c->imax[i] = a->imax[i];
3161       c->ilen[i] = a->ilen[i];
3162     }
3163     c->free_imax_ilen = PETSC_TRUE;
3164   }
3165 
3166   /* allocate the matrix space */
3167   if (mallocmatspace) {
3168     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3169       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3170       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3171       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3172 
3173       c->i            = a->i;
3174       c->j            = a->j;
3175       c->singlemalloc = PETSC_FALSE;
3176       c->free_a       = PETSC_TRUE;
3177       c->free_ij      = PETSC_FALSE;
3178       c->parent       = A;
3179       C->preallocated = PETSC_TRUE;
3180       C->assembled    = PETSC_TRUE;
3181 
3182       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3183       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3184       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3185     } else {
3186       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3187       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3188 
3189       c->singlemalloc = PETSC_TRUE;
3190       c->free_a       = PETSC_TRUE;
3191       c->free_ij      = PETSC_TRUE;
3192 
3193       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3194       if (mbs > 0) {
3195         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3196         if (cpvalues == MAT_COPY_VALUES) {
3197           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3198         } else {
3199           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3200         }
3201       }
3202       C->preallocated = PETSC_TRUE;
3203       C->assembled    = PETSC_TRUE;
3204     }
3205   }
3206 
3207   c->roworiented = a->roworiented;
3208   c->nonew       = a->nonew;
3209 
3210   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3211   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3212 
3213   c->bs2         = a->bs2;
3214   c->mbs         = a->mbs;
3215   c->nbs         = a->nbs;
3216 
3217   if (a->diag) {
3218     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3219       c->diag      = a->diag;
3220       c->free_diag = PETSC_FALSE;
3221     } else {
3222       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3223       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3224       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3225       c->free_diag = PETSC_TRUE;
3226     }
3227   } else c->diag = 0;
3228 
3229   c->nz         = a->nz;
3230   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3231   c->solve_work = 0;
3232   c->mult_work  = 0;
3233 
3234   c->compressedrow.use   = a->compressedrow.use;
3235   c->compressedrow.nrows = a->compressedrow.nrows;
3236   if (a->compressedrow.use) {
3237     i    = a->compressedrow.nrows;
3238     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3239     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3240     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3241     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3242   } else {
3243     c->compressedrow.use    = PETSC_FALSE;
3244     c->compressedrow.i      = NULL;
3245     c->compressedrow.rindex = NULL;
3246   }
3247   C->same_nonzero = A->same_nonzero;
3248 
3249   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3250   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3251   PetscFunctionReturn(0);
3252 }
3253 
3254 #undef __FUNCT__
3255 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3256 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3257 {
3258   PetscErrorCode ierr;
3259 
3260   PetscFunctionBegin;
3261   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3262   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3263   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3264   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3265   PetscFunctionReturn(0);
3266 }
3267 
3268 #undef __FUNCT__
3269 #define __FUNCT__ "MatLoad_SeqBAIJ"
3270 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3271 {
3272   Mat_SeqBAIJ    *a;
3273   PetscErrorCode ierr;
3274   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3275   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3276   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3277   PetscInt       *masked,nmask,tmp,bs2,ishift;
3278   PetscMPIInt    size;
3279   int            fd;
3280   PetscScalar    *aa;
3281   MPI_Comm       comm;
3282 
3283   PetscFunctionBegin;
3284   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3285   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3286   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3287   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3288   bs2  = bs*bs;
3289 
3290   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3291   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3292   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3293   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3294   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3295   M = header[1]; N = header[2]; nz = header[3];
3296 
3297   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3298   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3299 
3300   /*
3301      This code adds extra rows to make sure the number of rows is
3302     divisible by the blocksize
3303   */
3304   mbs        = M/bs;
3305   extra_rows = bs - M + bs*(mbs);
3306   if (extra_rows == bs) extra_rows = 0;
3307   else mbs++;
3308   if (extra_rows) {
3309     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3310   }
3311 
3312   /* Set global sizes if not already set */
3313   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3314     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3315   } else { /* Check if the matrix global sizes are correct */
3316     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3317     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3318       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3319     }
3320     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3321   }
3322 
3323   /* read in row lengths */
3324   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3325   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3326   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3327 
3328   /* read in column indices */
3329   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3330   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3331   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3332 
3333   /* loop over row lengths determining block row lengths */
3334   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3335   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3336   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3337   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3338   rowcount = 0;
3339   nzcount  = 0;
3340   for (i=0; i<mbs; i++) {
3341     nmask = 0;
3342     for (j=0; j<bs; j++) {
3343       kmax = rowlengths[rowcount];
3344       for (k=0; k<kmax; k++) {
3345         tmp = jj[nzcount++]/bs;
3346         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3347       }
3348       rowcount++;
3349     }
3350     browlengths[i] += nmask;
3351     /* zero out the mask elements we set */
3352     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3353   }
3354 
3355   /* Do preallocation  */
3356   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3357   a    = (Mat_SeqBAIJ*)newmat->data;
3358 
3359   /* set matrix "i" values */
3360   a->i[0] = 0;
3361   for (i=1; i<= mbs; i++) {
3362     a->i[i]      = a->i[i-1] + browlengths[i-1];
3363     a->ilen[i-1] = browlengths[i-1];
3364   }
3365   a->nz = 0;
3366   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3367 
3368   /* read in nonzero values */
3369   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3370   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3371   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3372 
3373   /* set "a" and "j" values into matrix */
3374   nzcount = 0; jcount = 0;
3375   for (i=0; i<mbs; i++) {
3376     nzcountb = nzcount;
3377     nmask    = 0;
3378     for (j=0; j<bs; j++) {
3379       kmax = rowlengths[i*bs+j];
3380       for (k=0; k<kmax; k++) {
3381         tmp = jj[nzcount++]/bs;
3382         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3383       }
3384     }
3385     /* sort the masked values */
3386     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3387 
3388     /* set "j" values into matrix */
3389     maskcount = 1;
3390     for (j=0; j<nmask; j++) {
3391       a->j[jcount++]  = masked[j];
3392       mask[masked[j]] = maskcount++;
3393     }
3394     /* set "a" values into matrix */
3395     ishift = bs2*a->i[i];
3396     for (j=0; j<bs; j++) {
3397       kmax = rowlengths[i*bs+j];
3398       for (k=0; k<kmax; k++) {
3399         tmp       = jj[nzcountb]/bs;
3400         block     = mask[tmp] - 1;
3401         point     = jj[nzcountb] - bs*tmp;
3402         idx       = ishift + bs2*block + j + bs*point;
3403         a->a[idx] = (MatScalar)aa[nzcountb++];
3404       }
3405     }
3406     /* zero out the mask elements we set */
3407     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3408   }
3409   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3410 
3411   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3412   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3413   ierr = PetscFree(aa);CHKERRQ(ierr);
3414   ierr = PetscFree(jj);CHKERRQ(ierr);
3415   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3416 
3417   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3418   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3419   PetscFunctionReturn(0);
3420 }
3421 
3422 #undef __FUNCT__
3423 #define __FUNCT__ "MatCreateSeqBAIJ"
3424 /*@C
3425    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3426    compressed row) format.  For good matrix assembly performance the
3427    user should preallocate the matrix storage by setting the parameter nz
3428    (or the array nnz).  By setting these parameters accurately, performance
3429    during matrix assembly can be increased by more than a factor of 50.
3430 
3431    Collective on MPI_Comm
3432 
3433    Input Parameters:
3434 +  comm - MPI communicator, set to PETSC_COMM_SELF
3435 .  bs - size of block
3436 .  m - number of rows
3437 .  n - number of columns
3438 .  nz - number of nonzero blocks  per block row (same for all rows)
3439 -  nnz - array containing the number of nonzero blocks in the various block rows
3440          (possibly different for each block row) or NULL
3441 
3442    Output Parameter:
3443 .  A - the matrix
3444 
3445    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3446    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3447    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3448 
3449    Options Database Keys:
3450 .   -mat_no_unroll - uses code that does not unroll the loops in the
3451                      block calculations (much slower)
3452 .    -mat_block_size - size of the blocks to use
3453 
3454    Level: intermediate
3455 
3456    Notes:
3457    The number of rows and columns must be divisible by blocksize.
3458 
3459    If the nnz parameter is given then the nz parameter is ignored
3460 
3461    A nonzero block is any block that as 1 or more nonzeros in it
3462 
3463    The block AIJ format is fully compatible with standard Fortran 77
3464    storage.  That is, the stored row and column indices can begin at
3465    either one (as in Fortran) or zero.  See the users' manual for details.
3466 
3467    Specify the preallocated storage with either nz or nnz (not both).
3468    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3469    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3470    matrices.
3471 
3472 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3473 @*/
3474 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3475 {
3476   PetscErrorCode ierr;
3477 
3478   PetscFunctionBegin;
3479   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3480   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3481   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3482   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3483   PetscFunctionReturn(0);
3484 }
3485 
3486 #undef __FUNCT__
3487 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3488 /*@C
3489    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3490    per row in the matrix. For good matrix assembly performance the
3491    user should preallocate the matrix storage by setting the parameter nz
3492    (or the array nnz).  By setting these parameters accurately, performance
3493    during matrix assembly can be increased by more than a factor of 50.
3494 
3495    Collective on MPI_Comm
3496 
3497    Input Parameters:
3498 +  A - the matrix
3499 .  bs - size of block
3500 .  nz - number of block nonzeros per block row (same for all rows)
3501 -  nnz - array containing the number of block nonzeros in the various block rows
3502          (possibly different for each block row) or NULL
3503 
3504    Options Database Keys:
3505 .   -mat_no_unroll - uses code that does not unroll the loops in the
3506                      block calculations (much slower)
3507 .    -mat_block_size - size of the blocks to use
3508 
3509    Level: intermediate
3510 
3511    Notes:
3512    If the nnz parameter is given then the nz parameter is ignored
3513 
3514    You can call MatGetInfo() to get information on how effective the preallocation was;
3515    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3516    You can also run with the option -info and look for messages with the string
3517    malloc in them to see if additional memory allocation was needed.
3518 
3519    The block AIJ format is fully compatible with standard Fortran 77
3520    storage.  That is, the stored row and column indices can begin at
3521    either one (as in Fortran) or zero.  See the users' manual for details.
3522 
3523    Specify the preallocated storage with either nz or nnz (not both).
3524    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3525    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3526 
3527 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3528 @*/
3529 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3530 {
3531   PetscErrorCode ierr;
3532 
3533   PetscFunctionBegin;
3534   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3535   PetscValidType(B,1);
3536   PetscValidLogicalCollectiveInt(B,bs,2);
3537   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3538   PetscFunctionReturn(0);
3539 }
3540 
3541 #undef __FUNCT__
3542 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3543 /*@C
3544    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3545    (the default sequential PETSc format).
3546 
3547    Collective on MPI_Comm
3548 
3549    Input Parameters:
3550 +  A - the matrix
3551 .  i - the indices into j for the start of each local row (starts with zero)
3552 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3553 -  v - optional values in the matrix
3554 
3555    Level: developer
3556 
3557    Notes:
3558    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3559    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3560    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3561    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3562    block column and the second index is over columns within a block.
3563 
3564 .keywords: matrix, aij, compressed row, sparse
3565 
3566 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3567 @*/
3568 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3569 {
3570   PetscErrorCode ierr;
3571 
3572   PetscFunctionBegin;
3573   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3574   PetscValidType(B,1);
3575   PetscValidLogicalCollectiveInt(B,bs,2);
3576   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3577   PetscFunctionReturn(0);
3578 }
3579 
3580 
3581 #undef __FUNCT__
3582 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3583 /*@
3584      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3585 
3586      Collective on MPI_Comm
3587 
3588    Input Parameters:
3589 +  comm - must be an MPI communicator of size 1
3590 .  bs - size of block
3591 .  m - number of rows
3592 .  n - number of columns
3593 .  i - row indices
3594 .  j - column indices
3595 -  a - matrix values
3596 
3597    Output Parameter:
3598 .  mat - the matrix
3599 
3600    Level: advanced
3601 
3602    Notes:
3603        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3604     once the matrix is destroyed
3605 
3606        You cannot set new nonzero locations into this matrix, that will generate an error.
3607 
3608        The i and j indices are 0 based
3609 
3610        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3611 
3612       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3613       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3614       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3615       with column-major ordering within blocks.
3616 
3617 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3618 
3619 @*/
3620 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3621 {
3622   PetscErrorCode ierr;
3623   PetscInt       ii;
3624   Mat_SeqBAIJ    *baij;
3625 
3626   PetscFunctionBegin;
3627   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3628   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3629 
3630   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3631   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3632   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3633   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3634   baij = (Mat_SeqBAIJ*)(*mat)->data;
3635   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3636   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3637 
3638   baij->i = i;
3639   baij->j = j;
3640   baij->a = a;
3641 
3642   baij->singlemalloc = PETSC_FALSE;
3643   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3644   baij->free_a       = PETSC_FALSE;
3645   baij->free_ij      = PETSC_FALSE;
3646 
3647   for (ii=0; ii<m; ii++) {
3648     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3649 #if defined(PETSC_USE_DEBUG)
3650     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3651 #endif
3652   }
3653 #if defined(PETSC_USE_DEBUG)
3654   for (ii=0; ii<baij->i[m]; ii++) {
3655     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3656     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3657   }
3658 #endif
3659 
3660   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3661   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3662   PetscFunctionReturn(0);
3663 }
3664