xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 609bdbee21ea3be08735c64dbe00a9ab27759925)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
16 {
17   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
18   PetscErrorCode ierr;
19   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
20   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
21   PetscReal      shift = 0.0;
22   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
23 
24   PetscFunctionBegin;
25   allowzeropivot = PetscNot(A->erroriffailure);
26 
27   if (a->idiagvalid) {
28     if (values) *values = a->idiag;
29     PetscFunctionReturn(0);
30   }
31   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
32   diag_offset = a->diag;
33   if (!a->idiag) {
34     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
35     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
36   }
37   diag  = a->idiag;
38   mdiag = a->idiag+bs2*mbs;
39   if (values) *values = a->idiag;
40   /* factor and invert each block */
41   switch (bs) {
42   case 1:
43     for (i=0; i<mbs; i++) {
44       odiag    = v + 1*diag_offset[i];
45       diag[0]  = odiag[0];
46       mdiag[0] = odiag[0];
47 
48       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
49         if (allowzeropivot) {
50           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
51           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
52           A->factorerror_zeropivot_row   = i;
53           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
54         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
55       }
56 
57       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
58       diag    += 1;
59       mdiag   += 1;
60     }
61     break;
62   case 2:
63     for (i=0; i<mbs; i++) {
64       odiag    = v + 4*diag_offset[i];
65       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
66       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
67       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
68       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
69       diag    += 4;
70       mdiag   += 4;
71     }
72     break;
73   case 3:
74     for (i=0; i<mbs; i++) {
75       odiag    = v + 9*diag_offset[i];
76       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
78       diag[8]  = odiag[8];
79       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
80       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
81       mdiag[8] = odiag[8];
82       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
83       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
84       diag    += 9;
85       mdiag   += 9;
86     }
87     break;
88   case 4:
89     for (i=0; i<mbs; i++) {
90       odiag  = v + 16*diag_offset[i];
91       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
94       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
95       diag  += 16;
96       mdiag += 16;
97     }
98     break;
99   case 5:
100     for (i=0; i<mbs; i++) {
101       odiag  = v + 25*diag_offset[i];
102       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
105       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
106       diag  += 25;
107       mdiag += 25;
108     }
109     break;
110   case 6:
111     for (i=0; i<mbs; i++) {
112       odiag  = v + 36*diag_offset[i];
113       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
114       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
115       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
116       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
117       diag  += 36;
118       mdiag += 36;
119     }
120     break;
121   case 7:
122     for (i=0; i<mbs; i++) {
123       odiag  = v + 49*diag_offset[i];
124       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
125       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += 49;
129       mdiag += 49;
130     }
131     break;
132   default:
133     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
134     for (i=0; i<mbs; i++) {
135       odiag  = v + bs2*diag_offset[i];
136       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
137       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
138       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
139       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
140       diag  += bs2;
141       mdiag += bs2;
142     }
143     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
144   }
145   a->idiagvalid = PETSC_TRUE;
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
150 {
151   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
152   PetscScalar       *x,*work,*w,*workt,*t;
153   const MatScalar   *v,*aa = a->a, *idiag;
154   const PetscScalar *b,*xb;
155   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
156   PetscErrorCode    ierr;
157   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
158   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
159 
160   PetscFunctionBegin;
161   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
162   its = its*lits;
163   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
164   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
165   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
166   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
167   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
168 
169   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
170 
171   if (!m) PetscFunctionReturn(0);
172   diag  = a->diag;
173   idiag = a->idiag;
174   k    = PetscMax(A->rmap->n,A->cmap->n);
175   if (!a->mult_work) {
176     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
177   }
178   if (!a->sor_workt) {
179     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
180   }
181   if (!a->sor_work) {
182     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
183   }
184   work = a->mult_work;
185   t    = a->sor_workt;
186   w    = a->sor_work;
187 
188   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
189   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
190 
191   if (flag & SOR_ZERO_INITIAL_GUESS) {
192     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
193       switch (bs) {
194       case 1:
195         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
196         t[0] = b[0];
197         i2     = 1;
198         idiag += 1;
199         for (i=1; i<m; i++) {
200           v  = aa + ai[i];
201           vi = aj + ai[i];
202           nz = diag[i] - ai[i];
203           s[0] = b[i2];
204           for (j=0; j<nz; j++) {
205             xw[0] = x[vi[j]];
206             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
207           }
208           t[i2] = s[0];
209           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
210           x[i2]  = xw[0];
211           idiag += 1;
212           i2    += 1;
213         }
214         break;
215       case 2:
216         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
217         t[0] = b[0]; t[1] = b[1];
218         i2     = 2;
219         idiag += 4;
220         for (i=1; i<m; i++) {
221           v  = aa + 4*ai[i];
222           vi = aj + ai[i];
223           nz = diag[i] - ai[i];
224           s[0] = b[i2]; s[1] = b[i2+1];
225           for (j=0; j<nz; j++) {
226             idx = 2*vi[j];
227             it  = 4*j;
228             xw[0] = x[idx]; xw[1] = x[1+idx];
229             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
230           }
231           t[i2] = s[0]; t[i2+1] = s[1];
232           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
233           x[i2]   = xw[0]; x[i2+1] = xw[1];
234           idiag  += 4;
235           i2     += 2;
236         }
237         break;
238       case 3:
239         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
240         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
241         i2     = 3;
242         idiag += 9;
243         for (i=1; i<m; i++) {
244           v  = aa + 9*ai[i];
245           vi = aj + ai[i];
246           nz = diag[i] - ai[i];
247           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
248           while (nz--) {
249             idx = 3*(*vi++);
250             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
251             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
252             v  += 9;
253           }
254           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
255           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
256           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
257           idiag  += 9;
258           i2     += 3;
259         }
260         break;
261       case 4:
262         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
263         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
264         i2     = 4;
265         idiag += 16;
266         for (i=1; i<m; i++) {
267           v  = aa + 16*ai[i];
268           vi = aj + ai[i];
269           nz = diag[i] - ai[i];
270           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
271           while (nz--) {
272             idx = 4*(*vi++);
273             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
274             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
275             v  += 16;
276           }
277           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
278           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
279           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
280           idiag  += 16;
281           i2     += 4;
282         }
283         break;
284       case 5:
285         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
286         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
287         i2     = 5;
288         idiag += 25;
289         for (i=1; i<m; i++) {
290           v  = aa + 25*ai[i];
291           vi = aj + ai[i];
292           nz = diag[i] - ai[i];
293           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
294           while (nz--) {
295             idx = 5*(*vi++);
296             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
297             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
298             v  += 25;
299           }
300           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
301           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
302           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
303           idiag  += 25;
304           i2     += 5;
305         }
306         break;
307       case 6:
308         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
309         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
310         i2     = 6;
311         idiag += 36;
312         for (i=1; i<m; i++) {
313           v  = aa + 36*ai[i];
314           vi = aj + ai[i];
315           nz = diag[i] - ai[i];
316           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
317           while (nz--) {
318             idx = 6*(*vi++);
319             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
320             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
321             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
322             v  += 36;
323           }
324           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
325           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
326           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
327           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
328           idiag  += 36;
329           i2     += 6;
330         }
331         break;
332       case 7:
333         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
334         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
335         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
336         i2     = 7;
337         idiag += 49;
338         for (i=1; i<m; i++) {
339           v  = aa + 49*ai[i];
340           vi = aj + ai[i];
341           nz = diag[i] - ai[i];
342           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
343           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
344           while (nz--) {
345             idx = 7*(*vi++);
346             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
347             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
348             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
349             v  += 49;
350           }
351           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
352           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
353           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
354           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
355           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
356           idiag  += 49;
357           i2     += 7;
358         }
359         break;
360       default:
361         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
362         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
363         i2     = bs;
364         idiag += bs2;
365         for (i=1; i<m; i++) {
366           v  = aa + bs2*ai[i];
367           vi = aj + ai[i];
368           nz = diag[i] - ai[i];
369 
370           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
371           /* copy all rows of x that are needed into contiguous space */
372           workt = work;
373           for (j=0; j<nz; j++) {
374             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
375             workt += bs;
376           }
377           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
378           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
379           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
380 
381           idiag += bs2;
382           i2    += bs;
383         }
384         break;
385       }
386       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
387       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
388       xb = t;
389     }
390     else xb = b;
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
392       idiag = a->idiag+bs2*(a->mbs-1);
393       i2 = bs * (m-1);
394       switch (bs) {
395       case 1:
396         s[0]  = xb[i2];
397         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
398         x[i2] = xw[0];
399         i2   -= 1;
400         for (i=m-2; i>=0; i--) {
401           v  = aa + (diag[i]+1);
402           vi = aj + diag[i] + 1;
403           nz = ai[i+1] - diag[i] - 1;
404           s[0] = xb[i2];
405           for (j=0; j<nz; j++) {
406             xw[0] = x[vi[j]];
407             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
408           }
409           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
410           x[i2]  = xw[0];
411           idiag -= 1;
412           i2    -= 1;
413         }
414         break;
415       case 2:
416         s[0]  = xb[i2]; s[1] = xb[i2+1];
417         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418         x[i2] = xw[0]; x[i2+1] = xw[1];
419         i2    -= 2;
420         idiag -= 4;
421         for (i=m-2; i>=0; i--) {
422           v  = aa + 4*(diag[i] + 1);
423           vi = aj + diag[i] + 1;
424           nz = ai[i+1] - diag[i] - 1;
425           s[0] = xb[i2]; s[1] = xb[i2+1];
426           for (j=0; j<nz; j++) {
427             idx = 2*vi[j];
428             it  = 4*j;
429             xw[0] = x[idx]; xw[1] = x[1+idx];
430             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
431           }
432           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
433           x[i2]   = xw[0]; x[i2+1] = xw[1];
434           idiag  -= 4;
435           i2     -= 2;
436         }
437         break;
438       case 3:
439         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
440         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442         i2    -= 3;
443         idiag -= 9;
444         for (i=m-2; i>=0; i--) {
445           v  = aa + 9*(diag[i]+1);
446           vi = aj + diag[i] + 1;
447           nz = ai[i+1] - diag[i] - 1;
448           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
449           while (nz--) {
450             idx = 3*(*vi++);
451             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
452             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
453             v  += 9;
454           }
455           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
456           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
457           idiag  -= 9;
458           i2     -= 3;
459         }
460         break;
461       case 4:
462         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
463         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465         i2    -= 4;
466         idiag -= 16;
467         for (i=m-2; i>=0; i--) {
468           v  = aa + 16*(diag[i]+1);
469           vi = aj + diag[i] + 1;
470           nz = ai[i+1] - diag[i] - 1;
471           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
472           while (nz--) {
473             idx = 4*(*vi++);
474             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
475             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
476             v  += 16;
477           }
478           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
479           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
480           idiag  -= 16;
481           i2     -= 4;
482         }
483         break;
484       case 5:
485         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
486         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488         i2    -= 5;
489         idiag -= 25;
490         for (i=m-2; i>=0; i--) {
491           v  = aa + 25*(diag[i]+1);
492           vi = aj + diag[i] + 1;
493           nz = ai[i+1] - diag[i] - 1;
494           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
495           while (nz--) {
496             idx = 5*(*vi++);
497             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
498             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
499             v  += 25;
500           }
501           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
502           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
503           idiag  -= 25;
504           i2     -= 5;
505         }
506         break;
507       case 6:
508         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
509         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
510         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
511         i2    -= 6;
512         idiag -= 36;
513         for (i=m-2; i>=0; i--) {
514           v  = aa + 36*(diag[i]+1);
515           vi = aj + diag[i] + 1;
516           nz = ai[i+1] - diag[i] - 1;
517           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
518           while (nz--) {
519             idx = 6*(*vi++);
520             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
521             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
522             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
523             v  += 36;
524           }
525           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
526           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
527           idiag  -= 36;
528           i2     -= 6;
529         }
530         break;
531       case 7:
532         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
533         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
534         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
535         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
536         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
537         i2    -= 7;
538         idiag -= 49;
539         for (i=m-2; i>=0; i--) {
540           v  = aa + 49*(diag[i]+1);
541           vi = aj + diag[i] + 1;
542           nz = ai[i+1] - diag[i] - 1;
543           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
544           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
545           while (nz--) {
546             idx = 7*(*vi++);
547             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
548             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
549             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
550             v  += 49;
551           }
552           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
553           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
554           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
555           idiag  -= 49;
556           i2     -= 7;
557         }
558         break;
559       default:
560         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
561         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
562         i2    -= bs;
563         idiag -= bs2;
564         for (i=m-2; i>=0; i--) {
565           v  = aa + bs2*(diag[i]+1);
566           vi = aj + diag[i] + 1;
567           nz = ai[i+1] - diag[i] - 1;
568 
569           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
570           /* copy all rows of x that are needed into contiguous space */
571           workt = work;
572           for (j=0; j<nz; j++) {
573             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
574             workt += bs;
575           }
576           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
577           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
578 
579           idiag -= bs2;
580           i2    -= bs;
581         }
582         break;
583       }
584       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
585     }
586     its--;
587   }
588   while (its--) {
589     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
590       idiag = a->idiag;
591       i2 = 0;
592       switch (bs) {
593       case 1:
594         for (i=0; i<m; i++) {
595           v  = aa + ai[i];
596           vi = aj + ai[i];
597           nz = ai[i+1] - ai[i];
598           s[0] = b[i2];
599           for (j=0; j<nz; j++) {
600             xw[0] = x[vi[j]];
601             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
602           }
603           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
604           x[i2] += xw[0];
605           idiag += 1;
606           i2    += 1;
607         }
608         break;
609       case 2:
610         for (i=0; i<m; i++) {
611           v  = aa + 4*ai[i];
612           vi = aj + ai[i];
613           nz = ai[i+1] - ai[i];
614           s[0] = b[i2]; s[1] = b[i2+1];
615           for (j=0; j<nz; j++) {
616             idx = 2*vi[j];
617             it  = 4*j;
618             xw[0] = x[idx]; xw[1] = x[1+idx];
619             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
620           }
621           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
622           x[i2]  += xw[0]; x[i2+1] += xw[1];
623           idiag  += 4;
624           i2     += 2;
625         }
626         break;
627       case 3:
628         for (i=0; i<m; i++) {
629           v  = aa + 9*ai[i];
630           vi = aj + ai[i];
631           nz = ai[i+1] - ai[i];
632           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
633           while (nz--) {
634             idx = 3*(*vi++);
635             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
636             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
637             v  += 9;
638           }
639           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
640           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
641           idiag  += 9;
642           i2     += 3;
643         }
644         break;
645       case 4:
646         for (i=0; i<m; i++) {
647           v  = aa + 16*ai[i];
648           vi = aj + ai[i];
649           nz = ai[i+1] - ai[i];
650           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
651           while (nz--) {
652             idx = 4*(*vi++);
653             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
654             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
655             v  += 16;
656           }
657           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
658           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
659           idiag  += 16;
660           i2     += 4;
661         }
662         break;
663       case 5:
664         for (i=0; i<m; i++) {
665           v  = aa + 25*ai[i];
666           vi = aj + ai[i];
667           nz = ai[i+1] - ai[i];
668           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
669           while (nz--) {
670             idx = 5*(*vi++);
671             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
672             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
673             v  += 25;
674           }
675           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
676           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
677           idiag  += 25;
678           i2     += 5;
679         }
680         break;
681       case 6:
682         for (i=0; i<m; i++) {
683           v  = aa + 36*ai[i];
684           vi = aj + ai[i];
685           nz = ai[i+1] - ai[i];
686           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
687           while (nz--) {
688             idx = 6*(*vi++);
689             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
690             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
691             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
692             v  += 36;
693           }
694           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
695           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
696           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
697           idiag  += 36;
698           i2     += 6;
699         }
700         break;
701       case 7:
702         for (i=0; i<m; i++) {
703           v  = aa + 49*ai[i];
704           vi = aj + ai[i];
705           nz = ai[i+1] - ai[i];
706           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
707           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
708           while (nz--) {
709             idx = 7*(*vi++);
710             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
711             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
712             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
713             v  += 49;
714           }
715           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
716           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
717           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
718           idiag  += 49;
719           i2     += 7;
720         }
721         break;
722       default:
723         for (i=0; i<m; i++) {
724           v  = aa + bs2*ai[i];
725           vi = aj + ai[i];
726           nz = ai[i+1] - ai[i];
727 
728           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
729           /* copy all rows of x that are needed into contiguous space */
730           workt = work;
731           for (j=0; j<nz; j++) {
732             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
733             workt += bs;
734           }
735           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
736           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
737 
738           idiag += bs2;
739           i2    += bs;
740         }
741         break;
742       }
743       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
744     }
745     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
746       idiag = a->idiag+bs2*(a->mbs-1);
747       i2 = bs * (m-1);
748       switch (bs) {
749       case 1:
750         for (i=m-1; i>=0; i--) {
751           v  = aa + ai[i];
752           vi = aj + ai[i];
753           nz = ai[i+1] - ai[i];
754           s[0] = b[i2];
755           for (j=0; j<nz; j++) {
756             xw[0] = x[vi[j]];
757             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
758           }
759           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
760           x[i2] += xw[0];
761           idiag -= 1;
762           i2    -= 1;
763         }
764         break;
765       case 2:
766         for (i=m-1; i>=0; i--) {
767           v  = aa + 4*ai[i];
768           vi = aj + ai[i];
769           nz = ai[i+1] - ai[i];
770           s[0] = b[i2]; s[1] = b[i2+1];
771           for (j=0; j<nz; j++) {
772             idx = 2*vi[j];
773             it  = 4*j;
774             xw[0] = x[idx]; xw[1] = x[1+idx];
775             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
776           }
777           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
778           x[i2]  += xw[0]; x[i2+1] += xw[1];
779           idiag  -= 4;
780           i2     -= 2;
781         }
782         break;
783       case 3:
784         for (i=m-1; i>=0; i--) {
785           v  = aa + 9*ai[i];
786           vi = aj + ai[i];
787           nz = ai[i+1] - ai[i];
788           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
789           while (nz--) {
790             idx = 3*(*vi++);
791             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
792             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
793             v  += 9;
794           }
795           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
796           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
797           idiag  -= 9;
798           i2     -= 3;
799         }
800         break;
801       case 4:
802         for (i=m-1; i>=0; i--) {
803           v  = aa + 16*ai[i];
804           vi = aj + ai[i];
805           nz = ai[i+1] - ai[i];
806           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
807           while (nz--) {
808             idx = 4*(*vi++);
809             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
810             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
811             v  += 16;
812           }
813           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
814           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
815           idiag  -= 16;
816           i2     -= 4;
817         }
818         break;
819       case 5:
820         for (i=m-1; i>=0; i--) {
821           v  = aa + 25*ai[i];
822           vi = aj + ai[i];
823           nz = ai[i+1] - ai[i];
824           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
825           while (nz--) {
826             idx = 5*(*vi++);
827             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
828             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
829             v  += 25;
830           }
831           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
832           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
833           idiag  -= 25;
834           i2     -= 5;
835         }
836         break;
837       case 6:
838         for (i=m-1; i>=0; i--) {
839           v  = aa + 36*ai[i];
840           vi = aj + ai[i];
841           nz = ai[i+1] - ai[i];
842           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
843           while (nz--) {
844             idx = 6*(*vi++);
845             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
846             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
847             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
848             v  += 36;
849           }
850           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
851           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
852           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
853           idiag  -= 36;
854           i2     -= 6;
855         }
856         break;
857       case 7:
858         for (i=m-1; i>=0; i--) {
859           v  = aa + 49*ai[i];
860           vi = aj + ai[i];
861           nz = ai[i+1] - ai[i];
862           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
863           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
864           while (nz--) {
865             idx = 7*(*vi++);
866             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
867             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
868             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
869             v  += 49;
870           }
871           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
872           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
873           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
874           idiag  -= 49;
875           i2     -= 7;
876         }
877         break;
878       default:
879         for (i=m-1; i>=0; i--) {
880           v  = aa + bs2*ai[i];
881           vi = aj + ai[i];
882           nz = ai[i+1] - ai[i];
883 
884           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
885           /* copy all rows of x that are needed into contiguous space */
886           workt = work;
887           for (j=0; j<nz; j++) {
888             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
889             workt += bs;
890           }
891           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
892           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
893 
894           idiag -= bs2;
895           i2    -= bs;
896         }
897         break;
898       }
899       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
900     }
901   }
902   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
903   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
904   PetscFunctionReturn(0);
905 }
906 
907 
908 /*
909     Special version for direct calls from Fortran (Used in PETSc-fun3d)
910 */
911 #if defined(PETSC_HAVE_FORTRAN_CAPS)
912 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
913 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
914 #define matsetvaluesblocked4_ matsetvaluesblocked4
915 #endif
916 
917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
918 {
919   Mat               A  = *AA;
920   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
921   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
922   PetscInt          *ai    =a->i,*ailen=a->ilen;
923   PetscInt          *aj    =a->j,stepval,lastcol = -1;
924   const PetscScalar *value = v;
925   MatScalar         *ap,*aa = a->a,*bap;
926 
927   PetscFunctionBegin;
928   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
929   stepval = (n-1)*4;
930   for (k=0; k<m; k++) { /* loop over added rows */
931     row  = im[k];
932     rp   = aj + ai[row];
933     ap   = aa + 16*ai[row];
934     nrow = ailen[row];
935     low  = 0;
936     high = nrow;
937     for (l=0; l<n; l++) { /* loop over added columns */
938       col = in[l];
939       if (col <= lastcol)  low = 0;
940       else                high = nrow;
941       lastcol = col;
942       value   = v + k*(stepval+4 + l)*4;
943       while (high-low > 7) {
944         t = (low+high)/2;
945         if (rp[t] > col) high = t;
946         else             low  = t;
947       }
948       for (i=low; i<high; i++) {
949         if (rp[i] > col) break;
950         if (rp[i] == col) {
951           bap = ap +  16*i;
952           for (ii=0; ii<4; ii++,value+=stepval) {
953             for (jj=ii; jj<16; jj+=4) {
954               bap[jj] += *value++;
955             }
956           }
957           goto noinsert2;
958         }
959       }
960       N = nrow++ - 1;
961       high++; /* added new column index thus must search to one higher than before */
962       /* shift up all the later entries in this row */
963       for (ii=N; ii>=i; ii--) {
964         rp[ii+1] = rp[ii];
965         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
966       }
967       if (N >= i) {
968         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
969       }
970       rp[i] = col;
971       bap   = ap +  16*i;
972       for (ii=0; ii<4; ii++,value+=stepval) {
973         for (jj=ii; jj<16; jj+=4) {
974           bap[jj] = *value++;
975         }
976       }
977       noinsert2:;
978       low = i;
979     }
980     ailen[row] = nrow;
981   }
982   PetscFunctionReturnVoid();
983 }
984 
985 #if defined(PETSC_HAVE_FORTRAN_CAPS)
986 #define matsetvalues4_ MATSETVALUES4
987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
988 #define matsetvalues4_ matsetvalues4
989 #endif
990 
991 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
992 {
993   Mat         A  = *AA;
994   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
995   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
996   PetscInt    *ai=a->i,*ailen=a->ilen;
997   PetscInt    *aj=a->j,brow,bcol;
998   PetscInt    ridx,cidx,lastcol = -1;
999   MatScalar   *ap,value,*aa=a->a,*bap;
1000 
1001   PetscFunctionBegin;
1002   for (k=0; k<m; k++) { /* loop over added rows */
1003     row  = im[k]; brow = row/4;
1004     rp   = aj + ai[brow];
1005     ap   = aa + 16*ai[brow];
1006     nrow = ailen[brow];
1007     low  = 0;
1008     high = nrow;
1009     for (l=0; l<n; l++) { /* loop over added columns */
1010       col   = in[l]; bcol = col/4;
1011       ridx  = row % 4; cidx = col % 4;
1012       value = v[l + k*n];
1013       if (col <= lastcol)  low = 0;
1014       else                high = nrow;
1015       lastcol = col;
1016       while (high-low > 7) {
1017         t = (low+high)/2;
1018         if (rp[t] > bcol) high = t;
1019         else              low  = t;
1020       }
1021       for (i=low; i<high; i++) {
1022         if (rp[i] > bcol) break;
1023         if (rp[i] == bcol) {
1024           bap   = ap +  16*i + 4*cidx + ridx;
1025           *bap += value;
1026           goto noinsert1;
1027         }
1028       }
1029       N = nrow++ - 1;
1030       high++; /* added new column thus must search to one higher than before */
1031       /* shift up all the later entries in this row */
1032       for (ii=N; ii>=i; ii--) {
1033         rp[ii+1] = rp[ii];
1034         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1035       }
1036       if (N>=i) {
1037         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1038       }
1039       rp[i]                    = bcol;
1040       ap[16*i + 4*cidx + ridx] = value;
1041 noinsert1:;
1042       low = i;
1043     }
1044     ailen[brow] = nrow;
1045   }
1046   PetscFunctionReturnVoid();
1047 }
1048 
1049 /*
1050      Checks for missing diagonals
1051 */
1052 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1053 {
1054   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   PetscInt       *diag,*ii = a->i,i;
1057 
1058   PetscFunctionBegin;
1059   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1060   *missing = PETSC_FALSE;
1061   if (A->rmap->n > 0 && !ii) {
1062     *missing = PETSC_TRUE;
1063     if (d) *d = 0;
1064     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1065   } else {
1066     diag = a->diag;
1067     for (i=0; i<a->mbs; i++) {
1068       if (diag[i] >= ii[i+1]) {
1069         *missing = PETSC_TRUE;
1070         if (d) *d = i;
1071         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1072         break;
1073       }
1074     }
1075   }
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1080 {
1081   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1082   PetscErrorCode ierr;
1083   PetscInt       i,j,m = a->mbs;
1084 
1085   PetscFunctionBegin;
1086   if (!a->diag) {
1087     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1088     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1089     a->free_diag = PETSC_TRUE;
1090   }
1091   for (i=0; i<m; i++) {
1092     a->diag[i] = a->i[i+1];
1093     for (j=a->i[i]; j<a->i[i+1]; j++) {
1094       if (a->j[j] == i) {
1095         a->diag[i] = j;
1096         break;
1097       }
1098     }
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 
1104 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1105 {
1106   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1107   PetscErrorCode ierr;
1108   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1109   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1110 
1111   PetscFunctionBegin;
1112   *nn = n;
1113   if (!ia) PetscFunctionReturn(0);
1114   if (symmetric) {
1115     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1116     nz   = tia[n];
1117   } else {
1118     tia = a->i; tja = a->j;
1119   }
1120 
1121   if (!blockcompressed && bs > 1) {
1122     (*nn) *= bs;
1123     /* malloc & create the natural set of indices */
1124     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1125     if (n) {
1126       (*ia)[0] = oshift;
1127       for (j=1; j<bs; j++) {
1128         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1129       }
1130     }
1131 
1132     for (i=1; i<n; i++) {
1133       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1134       for (j=1; j<bs; j++) {
1135         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1136       }
1137     }
1138     if (n) {
1139       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1140     }
1141 
1142     if (inja) {
1143       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1144       cnt = 0;
1145       for (i=0; i<n; i++) {
1146         for (j=0; j<bs; j++) {
1147           for (k=tia[i]; k<tia[i+1]; k++) {
1148             for (l=0; l<bs; l++) {
1149               (*ja)[cnt++] = bs*tja[k] + l;
1150             }
1151           }
1152         }
1153       }
1154     }
1155 
1156     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1157       ierr = PetscFree(tia);CHKERRQ(ierr);
1158       ierr = PetscFree(tja);CHKERRQ(ierr);
1159     }
1160   } else if (oshift == 1) {
1161     if (symmetric) {
1162       nz = tia[A->rmap->n/bs];
1163       /*  add 1 to i and j indices */
1164       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1165       *ia = tia;
1166       if (ja) {
1167         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1168         *ja = tja;
1169       }
1170     } else {
1171       nz = a->i[A->rmap->n/bs];
1172       /* malloc space and  add 1 to i and j indices */
1173       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1174       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1175       if (ja) {
1176         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1177         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1178       }
1179     }
1180   } else {
1181     *ia = tia;
1182     if (ja) *ja = tja;
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1188 {
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192   if (!ia) PetscFunctionReturn(0);
1193   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1194     ierr = PetscFree(*ia);CHKERRQ(ierr);
1195     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1196   }
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1201 {
1202   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206 #if defined(PETSC_USE_LOG)
1207   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1208 #endif
1209   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1210   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1211   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1212   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1213   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1214   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1215   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1216   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1217   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1218   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1219   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1220   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1221   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1222 
1223   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1224   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1225   ierr = PetscFree(A->data);CHKERRQ(ierr);
1226 
1227   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1238 #if defined(PETSC_HAVE_HYPRE)
1239   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1240 #endif
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1245 {
1246   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   switch (op) {
1251   case MAT_ROW_ORIENTED:
1252     a->roworiented = flg;
1253     break;
1254   case MAT_KEEP_NONZERO_PATTERN:
1255     a->keepnonzeropattern = flg;
1256     break;
1257   case MAT_NEW_NONZERO_LOCATIONS:
1258     a->nonew = (flg ? 0 : 1);
1259     break;
1260   case MAT_NEW_NONZERO_LOCATION_ERR:
1261     a->nonew = (flg ? -1 : 0);
1262     break;
1263   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1264     a->nonew = (flg ? -2 : 0);
1265     break;
1266   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1267     a->nounused = (flg ? -1 : 0);
1268     break;
1269   case MAT_NEW_DIAGONALS:
1270   case MAT_IGNORE_OFF_PROC_ENTRIES:
1271   case MAT_USE_HASH_TABLE:
1272     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1273     break;
1274   case MAT_SPD:
1275   case MAT_SYMMETRIC:
1276   case MAT_STRUCTURALLY_SYMMETRIC:
1277   case MAT_HERMITIAN:
1278   case MAT_SYMMETRY_ETERNAL:
1279   case MAT_SUBMAT_SINGLEIS:
1280     /* These options are handled directly by MatSetOption() */
1281     break;
1282   default:
1283     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1284   }
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1289 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1290 {
1291   PetscErrorCode ierr;
1292   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1293   MatScalar      *aa_i;
1294   PetscScalar    *v_i;
1295 
1296   PetscFunctionBegin;
1297   bs  = A->rmap->bs;
1298   bs2 = bs*bs;
1299   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1300 
1301   bn  = row/bs;   /* Block number */
1302   bp  = row % bs; /* Block Position */
1303   M   = ai[bn+1] - ai[bn];
1304   *nz = bs*M;
1305 
1306   if (v) {
1307     *v = 0;
1308     if (*nz) {
1309       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1310       for (i=0; i<M; i++) { /* for each block in the block row */
1311         v_i  = *v + i*bs;
1312         aa_i = aa + bs2*(ai[bn] + i);
1313         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1314       }
1315     }
1316   }
1317 
1318   if (idx) {
1319     *idx = 0;
1320     if (*nz) {
1321       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1322       for (i=0; i<M; i++) { /* for each block in the block row */
1323         idx_i = *idx + i*bs;
1324         itmp  = bs*aj[ai[bn] + i];
1325         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1326       }
1327     }
1328   }
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1333 {
1334   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1335   PetscErrorCode ierr;
1336 
1337   PetscFunctionBegin;
1338   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1343 {
1344   PetscErrorCode ierr;
1345 
1346   PetscFunctionBegin;
1347   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1348   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1353 
1354 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1355 {
1356   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1357   Mat            C;
1358   PetscErrorCode ierr;
1359   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1360   PetscInt       *rows,*cols,bs2=a->bs2;
1361   MatScalar      *array;
1362 
1363   PetscFunctionBegin;
1364   if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1365   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1366     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1367 
1368     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1369     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1370     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1371     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1372     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1373     ierr = PetscFree(col);CHKERRQ(ierr);
1374   } else {
1375     C = *B;
1376   }
1377 
1378   array = a->a;
1379   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1380   for (i=0; i<mbs; i++) {
1381     cols[0] = i*bs;
1382     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1383     len = ai[i+1] - ai[i];
1384     for (j=0; j<len; j++) {
1385       rows[0] = (*aj++)*bs;
1386       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1387       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1388       array += bs2;
1389     }
1390   }
1391   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1392 
1393   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1394   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1395 
1396   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1397     *B = C;
1398   } else {
1399     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1405 {
1406   PetscErrorCode ierr;
1407   Mat            Btrans;
1408 
1409   PetscFunctionBegin;
1410   *f   = PETSC_FALSE;
1411   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1412   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1413   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1418 {
1419   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1420   PetscErrorCode ierr;
1421   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1422   int            fd;
1423   PetscScalar    *aa;
1424   FILE           *file;
1425 
1426   PetscFunctionBegin;
1427   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1428   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1429   col_lens[0] = MAT_FILE_CLASSID;
1430 
1431   col_lens[1] = A->rmap->N;
1432   col_lens[2] = A->cmap->n;
1433   col_lens[3] = a->nz*bs2;
1434 
1435   /* store lengths of each row and write (including header) to file */
1436   count = 0;
1437   for (i=0; i<a->mbs; i++) {
1438     for (j=0; j<bs; j++) {
1439       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1440     }
1441   }
1442   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1443   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1444 
1445   /* store column indices (zero start index) */
1446   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1447   count = 0;
1448   for (i=0; i<a->mbs; i++) {
1449     for (j=0; j<bs; j++) {
1450       for (k=a->i[i]; k<a->i[i+1]; k++) {
1451         for (l=0; l<bs; l++) {
1452           jj[count++] = bs*a->j[k] + l;
1453         }
1454       }
1455     }
1456   }
1457   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1458   ierr = PetscFree(jj);CHKERRQ(ierr);
1459 
1460   /* store nonzero values */
1461   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1462   count = 0;
1463   for (i=0; i<a->mbs; i++) {
1464     for (j=0; j<bs; j++) {
1465       for (k=a->i[i]; k<a->i[i+1]; k++) {
1466         for (l=0; l<bs; l++) {
1467           aa[count++] = a->a[bs2*k + l*bs + j];
1468         }
1469       }
1470     }
1471   }
1472   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1473   ierr = PetscFree(aa);CHKERRQ(ierr);
1474 
1475   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1476   if (file) {
1477     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1478   }
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1483 {
1484   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1485   PetscErrorCode    ierr;
1486   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1487   PetscViewerFormat format;
1488 
1489   PetscFunctionBegin;
1490   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1491   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1492     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1493   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1494     const char *matname;
1495     Mat        aij;
1496     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1497     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1498     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1499     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1500     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1501   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1502       PetscFunctionReturn(0);
1503   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1504     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1505     for (i=0; i<a->mbs; i++) {
1506       for (j=0; j<bs; j++) {
1507         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1508         for (k=a->i[i]; k<a->i[i+1]; k++) {
1509           for (l=0; l<bs; l++) {
1510 #if defined(PETSC_USE_COMPLEX)
1511             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1512               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1513                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1514             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1515               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1516                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1517             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1518               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1519             }
1520 #else
1521             if (a->a[bs2*k + l*bs + j] != 0.0) {
1522               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1523             }
1524 #endif
1525           }
1526         }
1527         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1528       }
1529     }
1530     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1531   } else {
1532     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1533     for (i=0; i<a->mbs; i++) {
1534       for (j=0; j<bs; j++) {
1535         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1536         for (k=a->i[i]; k<a->i[i+1]; k++) {
1537           for (l=0; l<bs; l++) {
1538 #if defined(PETSC_USE_COMPLEX)
1539             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1540               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1541                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1542             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1543               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1544                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1545             } else {
1546               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1547             }
1548 #else
1549             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1550 #endif
1551           }
1552         }
1553         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1554       }
1555     }
1556     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1557   }
1558   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #include <petscdraw.h>
1563 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1564 {
1565   Mat               A = (Mat) Aa;
1566   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1567   PetscErrorCode    ierr;
1568   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1569   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1570   MatScalar         *aa;
1571   PetscViewer       viewer;
1572   PetscViewerFormat format;
1573 
1574   PetscFunctionBegin;
1575   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1576   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1577   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1578 
1579   /* loop over matrix elements drawing boxes */
1580 
1581   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1582     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1583     /* Blue for negative, Cyan for zero and  Red for positive */
1584     color = PETSC_DRAW_BLUE;
1585     for (i=0,row=0; i<mbs; i++,row+=bs) {
1586       for (j=a->i[i]; j<a->i[i+1]; j++) {
1587         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1588         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1589         aa  = a->a + j*bs2;
1590         for (k=0; k<bs; k++) {
1591           for (l=0; l<bs; l++) {
1592             if (PetscRealPart(*aa++) >=  0.) continue;
1593             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1594           }
1595         }
1596       }
1597     }
1598     color = PETSC_DRAW_CYAN;
1599     for (i=0,row=0; i<mbs; i++,row+=bs) {
1600       for (j=a->i[i]; j<a->i[i+1]; j++) {
1601         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1602         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1603         aa  = a->a + j*bs2;
1604         for (k=0; k<bs; k++) {
1605           for (l=0; l<bs; l++) {
1606             if (PetscRealPart(*aa++) != 0.) continue;
1607             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1608           }
1609         }
1610       }
1611     }
1612     color = PETSC_DRAW_RED;
1613     for (i=0,row=0; i<mbs; i++,row+=bs) {
1614       for (j=a->i[i]; j<a->i[i+1]; j++) {
1615         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1616         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1617         aa  = a->a + j*bs2;
1618         for (k=0; k<bs; k++) {
1619           for (l=0; l<bs; l++) {
1620             if (PetscRealPart(*aa++) <= 0.) continue;
1621             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1622           }
1623         }
1624       }
1625     }
1626     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1627   } else {
1628     /* use contour shading to indicate magnitude of values */
1629     /* first determine max of all nonzero values */
1630     PetscReal minv = 0.0, maxv = 0.0;
1631     PetscDraw popup;
1632 
1633     for (i=0; i<a->nz*a->bs2; i++) {
1634       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1635     }
1636     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1637     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1638     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1639 
1640     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1641     for (i=0,row=0; i<mbs; i++,row+=bs) {
1642       for (j=a->i[i]; j<a->i[i+1]; j++) {
1643         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1644         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1645         aa  = a->a + j*bs2;
1646         for (k=0; k<bs; k++) {
1647           for (l=0; l<bs; l++) {
1648             MatScalar v = *aa++;
1649             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1650             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1651           }
1652         }
1653       }
1654     }
1655     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1656   }
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1661 {
1662   PetscErrorCode ierr;
1663   PetscReal      xl,yl,xr,yr,w,h;
1664   PetscDraw      draw;
1665   PetscBool      isnull;
1666 
1667   PetscFunctionBegin;
1668   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1669   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1670   if (isnull) PetscFunctionReturn(0);
1671 
1672   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1673   xr  += w;          yr += h;        xl = -w;     yl = -h;
1674   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1675   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1676   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1677   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1678   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1683 {
1684   PetscErrorCode ierr;
1685   PetscBool      iascii,isbinary,isdraw;
1686 
1687   PetscFunctionBegin;
1688   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1689   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1690   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1691   if (iascii) {
1692     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1693   } else if (isbinary) {
1694     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1695   } else if (isdraw) {
1696     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1697   } else {
1698     Mat B;
1699     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1700     ierr = MatView(B,viewer);CHKERRQ(ierr);
1701     ierr = MatDestroy(&B);CHKERRQ(ierr);
1702   }
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 
1707 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1708 {
1709   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1710   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1711   PetscInt    *ai = a->i,*ailen = a->ilen;
1712   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1713   MatScalar   *ap,*aa = a->a;
1714 
1715   PetscFunctionBegin;
1716   for (k=0; k<m; k++) { /* loop over rows */
1717     row = im[k]; brow = row/bs;
1718     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1719     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1720     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1721     nrow = ailen[brow];
1722     for (l=0; l<n; l++) { /* loop over columns */
1723       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1724       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1725       col  = in[l];
1726       bcol = col/bs;
1727       cidx = col%bs;
1728       ridx = row%bs;
1729       high = nrow;
1730       low  = 0; /* assume unsorted */
1731       while (high-low > 5) {
1732         t = (low+high)/2;
1733         if (rp[t] > bcol) high = t;
1734         else             low  = t;
1735       }
1736       for (i=low; i<high; i++) {
1737         if (rp[i] > bcol) break;
1738         if (rp[i] == bcol) {
1739           *v++ = ap[bs2*i+bs*cidx+ridx];
1740           goto finished;
1741         }
1742       }
1743       *v++ = 0.0;
1744 finished:;
1745     }
1746   }
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1751 {
1752   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1753   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1754   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1755   PetscErrorCode    ierr;
1756   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1757   PetscBool         roworiented=a->roworiented;
1758   const PetscScalar *value     = v;
1759   MatScalar         *ap,*aa = a->a,*bap;
1760 
1761   PetscFunctionBegin;
1762   if (roworiented) {
1763     stepval = (n-1)*bs;
1764   } else {
1765     stepval = (m-1)*bs;
1766   }
1767   for (k=0; k<m; k++) { /* loop over added rows */
1768     row = im[k];
1769     if (row < 0) continue;
1770 #if defined(PETSC_USE_DEBUG)
1771     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1772 #endif
1773     rp   = aj + ai[row];
1774     ap   = aa + bs2*ai[row];
1775     rmax = imax[row];
1776     nrow = ailen[row];
1777     low  = 0;
1778     high = nrow;
1779     for (l=0; l<n; l++) { /* loop over added columns */
1780       if (in[l] < 0) continue;
1781 #if defined(PETSC_USE_DEBUG)
1782       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1783 #endif
1784       col = in[l];
1785       if (roworiented) {
1786         value = v + (k*(stepval+bs) + l)*bs;
1787       } else {
1788         value = v + (l*(stepval+bs) + k)*bs;
1789       }
1790       if (col <= lastcol) low = 0;
1791       else high = nrow;
1792       lastcol = col;
1793       while (high-low > 7) {
1794         t = (low+high)/2;
1795         if (rp[t] > col) high = t;
1796         else             low  = t;
1797       }
1798       for (i=low; i<high; i++) {
1799         if (rp[i] > col) break;
1800         if (rp[i] == col) {
1801           bap = ap +  bs2*i;
1802           if (roworiented) {
1803             if (is == ADD_VALUES) {
1804               for (ii=0; ii<bs; ii++,value+=stepval) {
1805                 for (jj=ii; jj<bs2; jj+=bs) {
1806                   bap[jj] += *value++;
1807                 }
1808               }
1809             } else {
1810               for (ii=0; ii<bs; ii++,value+=stepval) {
1811                 for (jj=ii; jj<bs2; jj+=bs) {
1812                   bap[jj] = *value++;
1813                 }
1814               }
1815             }
1816           } else {
1817             if (is == ADD_VALUES) {
1818               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1819                 for (jj=0; jj<bs; jj++) {
1820                   bap[jj] += value[jj];
1821                 }
1822                 bap += bs;
1823               }
1824             } else {
1825               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1826                 for (jj=0; jj<bs; jj++) {
1827                   bap[jj]  = value[jj];
1828                 }
1829                 bap += bs;
1830               }
1831             }
1832           }
1833           goto noinsert2;
1834         }
1835       }
1836       if (nonew == 1) goto noinsert2;
1837       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1838       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1839       N = nrow++ - 1; high++;
1840       /* shift up all the later entries in this row */
1841       for (ii=N; ii>=i; ii--) {
1842         rp[ii+1] = rp[ii];
1843         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1844       }
1845       if (N >= i) {
1846         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1847       }
1848       rp[i] = col;
1849       bap   = ap +  bs2*i;
1850       if (roworiented) {
1851         for (ii=0; ii<bs; ii++,value+=stepval) {
1852           for (jj=ii; jj<bs2; jj+=bs) {
1853             bap[jj] = *value++;
1854           }
1855         }
1856       } else {
1857         for (ii=0; ii<bs; ii++,value+=stepval) {
1858           for (jj=0; jj<bs; jj++) {
1859             *bap++ = *value++;
1860           }
1861         }
1862       }
1863 noinsert2:;
1864       low = i;
1865     }
1866     ailen[row] = nrow;
1867   }
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1872 {
1873   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1874   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1875   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1876   PetscErrorCode ierr;
1877   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1878   MatScalar      *aa  = a->a,*ap;
1879   PetscReal      ratio=0.6;
1880 
1881   PetscFunctionBegin;
1882   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1883 
1884   if (m) rmax = ailen[0];
1885   for (i=1; i<mbs; i++) {
1886     /* move each row back by the amount of empty slots (fshift) before it*/
1887     fshift += imax[i-1] - ailen[i-1];
1888     rmax    = PetscMax(rmax,ailen[i]);
1889     if (fshift) {
1890       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1891       N  = ailen[i];
1892       for (j=0; j<N; j++) {
1893         ip[j-fshift] = ip[j];
1894 
1895         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1896       }
1897     }
1898     ai[i] = ai[i-1] + ailen[i-1];
1899   }
1900   if (mbs) {
1901     fshift += imax[mbs-1] - ailen[mbs-1];
1902     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1903   }
1904 
1905   /* reset ilen and imax for each row */
1906   a->nonzerorowcnt = 0;
1907   for (i=0; i<mbs; i++) {
1908     ailen[i] = imax[i] = ai[i+1] - ai[i];
1909     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1910   }
1911   a->nz = ai[mbs];
1912 
1913   /* diagonals may have moved, so kill the diagonal pointers */
1914   a->idiagvalid = PETSC_FALSE;
1915   if (fshift && a->diag) {
1916     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1917     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1918     a->diag = 0;
1919   }
1920   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1921   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1922   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1923   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1924 
1925   A->info.mallocs    += a->reallocs;
1926   a->reallocs         = 0;
1927   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1928   a->rmax             = rmax;
1929 
1930   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 /*
1935    This function returns an array of flags which indicate the locations of contiguous
1936    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1937    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1938    Assume: sizes should be long enough to hold all the values.
1939 */
1940 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1941 {
1942   PetscInt  i,j,k,row;
1943   PetscBool flg;
1944 
1945   PetscFunctionBegin;
1946   for (i=0,j=0; i<n; j++) {
1947     row = idx[i];
1948     if (row%bs!=0) { /* Not the begining of a block */
1949       sizes[j] = 1;
1950       i++;
1951     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1952       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1953       i++;
1954     } else { /* Begining of the block, so check if the complete block exists */
1955       flg = PETSC_TRUE;
1956       for (k=1; k<bs; k++) {
1957         if (row+k != idx[i+k]) { /* break in the block */
1958           flg = PETSC_FALSE;
1959           break;
1960         }
1961       }
1962       if (flg) { /* No break in the bs */
1963         sizes[j] = bs;
1964         i       += bs;
1965       } else {
1966         sizes[j] = 1;
1967         i++;
1968       }
1969     }
1970   }
1971   *bs_max = j;
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1976 {
1977   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1978   PetscErrorCode    ierr;
1979   PetscInt          i,j,k,count,*rows;
1980   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1981   PetscScalar       zero = 0.0;
1982   MatScalar         *aa;
1983   const PetscScalar *xx;
1984   PetscScalar       *bb;
1985 
1986   PetscFunctionBegin;
1987   /* fix right hand side if needed */
1988   if (x && b) {
1989     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1990     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1991     for (i=0; i<is_n; i++) {
1992       bb[is_idx[i]] = diag*xx[is_idx[i]];
1993     }
1994     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1995     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1996   }
1997 
1998   /* Make a copy of the IS and  sort it */
1999   /* allocate memory for rows,sizes */
2000   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2001 
2002   /* copy IS values to rows, and sort them */
2003   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2004   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2005 
2006   if (baij->keepnonzeropattern) {
2007     for (i=0; i<is_n; i++) sizes[i] = 1;
2008     bs_max          = is_n;
2009   } else {
2010     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2011     A->nonzerostate++;
2012   }
2013 
2014   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2015     row = rows[j];
2016     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2017     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2018     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2019     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2020       if (diag != (PetscScalar)0.0) {
2021         if (baij->ilen[row/bs] > 0) {
2022           baij->ilen[row/bs]       = 1;
2023           baij->j[baij->i[row/bs]] = row/bs;
2024 
2025           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2026         }
2027         /* Now insert all the diagonal values for this bs */
2028         for (k=0; k<bs; k++) {
2029           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2030         }
2031       } else { /* (diag == 0.0) */
2032         baij->ilen[row/bs] = 0;
2033       } /* end (diag == 0.0) */
2034     } else { /* (sizes[i] != bs) */
2035 #if defined(PETSC_USE_DEBUG)
2036       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2037 #endif
2038       for (k=0; k<count; k++) {
2039         aa[0] =  zero;
2040         aa   += bs;
2041       }
2042       if (diag != (PetscScalar)0.0) {
2043         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2044       }
2045     }
2046   }
2047 
2048   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2049   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2054 {
2055   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2056   PetscErrorCode    ierr;
2057   PetscInt          i,j,k,count;
2058   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2059   PetscScalar       zero = 0.0;
2060   MatScalar         *aa;
2061   const PetscScalar *xx;
2062   PetscScalar       *bb;
2063   PetscBool         *zeroed,vecs = PETSC_FALSE;
2064 
2065   PetscFunctionBegin;
2066   /* fix right hand side if needed */
2067   if (x && b) {
2068     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2069     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2070     vecs = PETSC_TRUE;
2071   }
2072 
2073   /* zero the columns */
2074   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2075   for (i=0; i<is_n; i++) {
2076     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2077     zeroed[is_idx[i]] = PETSC_TRUE;
2078   }
2079   for (i=0; i<A->rmap->N; i++) {
2080     if (!zeroed[i]) {
2081       row = i/bs;
2082       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2083         for (k=0; k<bs; k++) {
2084           col = bs*baij->j[j] + k;
2085           if (zeroed[col]) {
2086             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2087             if (vecs) bb[i] -= aa[0]*xx[col];
2088             aa[0] = 0.0;
2089           }
2090         }
2091       }
2092     } else if (vecs) bb[i] = diag*xx[i];
2093   }
2094   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2095   if (vecs) {
2096     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2097     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2098   }
2099 
2100   /* zero the rows */
2101   for (i=0; i<is_n; i++) {
2102     row   = is_idx[i];
2103     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2104     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2105     for (k=0; k<count; k++) {
2106       aa[0] =  zero;
2107       aa   += bs;
2108     }
2109     if (diag != (PetscScalar)0.0) {
2110       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2111     }
2112   }
2113   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2118 {
2119   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2120   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2121   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2122   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2123   PetscErrorCode ierr;
2124   PetscInt       ridx,cidx,bs2=a->bs2;
2125   PetscBool      roworiented=a->roworiented;
2126   MatScalar      *ap,value,*aa=a->a,*bap;
2127 
2128   PetscFunctionBegin;
2129   for (k=0; k<m; k++) { /* loop over added rows */
2130     row  = im[k];
2131     brow = row/bs;
2132     if (row < 0) continue;
2133 #if defined(PETSC_USE_DEBUG)
2134     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2135 #endif
2136     rp   = aj + ai[brow];
2137     ap   = aa + bs2*ai[brow];
2138     rmax = imax[brow];
2139     nrow = ailen[brow];
2140     low  = 0;
2141     high = nrow;
2142     for (l=0; l<n; l++) { /* loop over added columns */
2143       if (in[l] < 0) continue;
2144 #if defined(PETSC_USE_DEBUG)
2145       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2146 #endif
2147       col  = in[l]; bcol = col/bs;
2148       ridx = row % bs; cidx = col % bs;
2149       if (roworiented) {
2150         value = v[l + k*n];
2151       } else {
2152         value = v[k + l*m];
2153       }
2154       if (col <= lastcol) low = 0; else high = nrow;
2155       lastcol = col;
2156       while (high-low > 7) {
2157         t = (low+high)/2;
2158         if (rp[t] > bcol) high = t;
2159         else              low  = t;
2160       }
2161       for (i=low; i<high; i++) {
2162         if (rp[i] > bcol) break;
2163         if (rp[i] == bcol) {
2164           bap = ap +  bs2*i + bs*cidx + ridx;
2165           if (is == ADD_VALUES) *bap += value;
2166           else                  *bap  = value;
2167           goto noinsert1;
2168         }
2169       }
2170       if (nonew == 1) goto noinsert1;
2171       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2172       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2173       N = nrow++ - 1; high++;
2174       /* shift up all the later entries in this row */
2175       for (ii=N; ii>=i; ii--) {
2176         rp[ii+1] = rp[ii];
2177         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2178       }
2179       if (N>=i) {
2180         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2181       }
2182       rp[i]                      = bcol;
2183       ap[bs2*i + bs*cidx + ridx] = value;
2184       a->nz++;
2185       A->nonzerostate++;
2186 noinsert1:;
2187       low = i;
2188     }
2189     ailen[brow] = nrow;
2190   }
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2195 {
2196   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2197   Mat            outA;
2198   PetscErrorCode ierr;
2199   PetscBool      row_identity,col_identity;
2200 
2201   PetscFunctionBegin;
2202   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2203   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2204   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2205   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2206 
2207   outA            = inA;
2208   inA->factortype = MAT_FACTOR_LU;
2209   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2210   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2211 
2212   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2213 
2214   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2215   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2216   a->row = row;
2217   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2218   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2219   a->col = col;
2220 
2221   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2222   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2223   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2224   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2225 
2226   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2227   if (!a->solve_work) {
2228     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2229     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2230   }
2231   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2232   PetscFunctionReturn(0);
2233 }
2234 
2235 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2236 {
2237   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2238   PetscInt    i,nz,mbs;
2239 
2240   PetscFunctionBegin;
2241   nz  = baij->maxnz;
2242   mbs = baij->mbs;
2243   for (i=0; i<nz; i++) {
2244     baij->j[i] = indices[i];
2245   }
2246   baij->nz = nz;
2247   for (i=0; i<mbs; i++) {
2248     baij->ilen[i] = baij->imax[i];
2249   }
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 /*@
2254     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2255        in the matrix.
2256 
2257   Input Parameters:
2258 +  mat - the SeqBAIJ matrix
2259 -  indices - the column indices
2260 
2261   Level: advanced
2262 
2263   Notes:
2264     This can be called if you have precomputed the nonzero structure of the
2265   matrix and want to provide it to the matrix object to improve the performance
2266   of the MatSetValues() operation.
2267 
2268     You MUST have set the correct numbers of nonzeros per row in the call to
2269   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2270 
2271     MUST be called before any calls to MatSetValues();
2272 
2273 @*/
2274 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2275 {
2276   PetscErrorCode ierr;
2277 
2278   PetscFunctionBegin;
2279   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2280   PetscValidPointer(indices,2);
2281   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2286 {
2287   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2288   PetscErrorCode ierr;
2289   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2290   PetscReal      atmp;
2291   PetscScalar    *x,zero = 0.0;
2292   MatScalar      *aa;
2293   PetscInt       ncols,brow,krow,kcol;
2294 
2295   PetscFunctionBegin;
2296   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2297   bs  = A->rmap->bs;
2298   aa  = a->a;
2299   ai  = a->i;
2300   aj  = a->j;
2301   mbs = a->mbs;
2302 
2303   ierr = VecSet(v,zero);CHKERRQ(ierr);
2304   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2305   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2306   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2307   for (i=0; i<mbs; i++) {
2308     ncols = ai[1] - ai[0]; ai++;
2309     brow  = bs*i;
2310     for (j=0; j<ncols; j++) {
2311       for (kcol=0; kcol<bs; kcol++) {
2312         for (krow=0; krow<bs; krow++) {
2313           atmp = PetscAbsScalar(*aa);aa++;
2314           row  = brow + krow;   /* row index */
2315           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2316         }
2317       }
2318       aj++;
2319     }
2320   }
2321   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2326 {
2327   PetscErrorCode ierr;
2328 
2329   PetscFunctionBegin;
2330   /* If the two matrices have the same copy implementation, use fast copy. */
2331   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2332     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2333     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2334     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2335 
2336     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2337     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2338     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2339   } else {
2340     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2341   }
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2346 {
2347   PetscErrorCode ierr;
2348 
2349   PetscFunctionBegin;
2350   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2351   PetscFunctionReturn(0);
2352 }
2353 
2354 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2355 {
2356   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2357 
2358   PetscFunctionBegin;
2359   *array = a->a;
2360   PetscFunctionReturn(0);
2361 }
2362 
2363 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2364 {
2365   PetscFunctionBegin;
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2370 {
2371   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2372   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2373   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2374   PetscErrorCode ierr;
2375 
2376   PetscFunctionBegin;
2377   /* Set the number of nonzeros in the new matrix */
2378   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2383 {
2384   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2385   PetscErrorCode ierr;
2386   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2387   PetscBLASInt   one=1;
2388 
2389   PetscFunctionBegin;
2390   if (str == SAME_NONZERO_PATTERN) {
2391     PetscScalar  alpha = a;
2392     PetscBLASInt bnz;
2393     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2394     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2395     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2396   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2397     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2398   } else {
2399     Mat      B;
2400     PetscInt *nnz;
2401     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2402     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2403     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2404     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2405     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2406     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2407     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2408     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2409     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2410     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2411     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2412     ierr = PetscFree(nnz);CHKERRQ(ierr);
2413   }
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2418 {
2419   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2420   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2421   MatScalar   *aa = a->a;
2422 
2423   PetscFunctionBegin;
2424   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2429 {
2430   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2431   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2432   MatScalar   *aa = a->a;
2433 
2434   PetscFunctionBegin;
2435   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 /*
2440     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2441 */
2442 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2443 {
2444   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2445   PetscErrorCode ierr;
2446   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2447   PetscInt       nz = a->i[m],row,*jj,mr,col;
2448 
2449   PetscFunctionBegin;
2450   *nn = n;
2451   if (!ia) PetscFunctionReturn(0);
2452   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2453   else {
2454     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2455     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2456     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2457     jj   = a->j;
2458     for (i=0; i<nz; i++) {
2459       collengths[jj[i]]++;
2460     }
2461     cia[0] = oshift;
2462     for (i=0; i<n; i++) {
2463       cia[i+1] = cia[i] + collengths[i];
2464     }
2465     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2466     jj   = a->j;
2467     for (row=0; row<m; row++) {
2468       mr = a->i[row+1] - a->i[row];
2469       for (i=0; i<mr; i++) {
2470         col = *jj++;
2471 
2472         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2473       }
2474     }
2475     ierr = PetscFree(collengths);CHKERRQ(ierr);
2476     *ia  = cia; *ja = cja;
2477   }
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2482 {
2483   PetscErrorCode ierr;
2484 
2485   PetscFunctionBegin;
2486   if (!ia) PetscFunctionReturn(0);
2487   ierr = PetscFree(*ia);CHKERRQ(ierr);
2488   ierr = PetscFree(*ja);CHKERRQ(ierr);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 /*
2493  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2494  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2495  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2496  */
2497 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2498 {
2499   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2500   PetscErrorCode ierr;
2501   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2502   PetscInt       nz = a->i[m],row,*jj,mr,col;
2503   PetscInt       *cspidx;
2504 
2505   PetscFunctionBegin;
2506   *nn = n;
2507   if (!ia) PetscFunctionReturn(0);
2508 
2509   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2510   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2511   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2512   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2513   jj   = a->j;
2514   for (i=0; i<nz; i++) {
2515     collengths[jj[i]]++;
2516   }
2517   cia[0] = oshift;
2518   for (i=0; i<n; i++) {
2519     cia[i+1] = cia[i] + collengths[i];
2520   }
2521   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2522   jj   = a->j;
2523   for (row=0; row<m; row++) {
2524     mr = a->i[row+1] - a->i[row];
2525     for (i=0; i<mr; i++) {
2526       col = *jj++;
2527       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2528       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2529     }
2530   }
2531   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2532   *ia    = cia; *ja = cja;
2533   *spidx = cspidx;
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2538 {
2539   PetscErrorCode ierr;
2540 
2541   PetscFunctionBegin;
2542   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2543   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2548 {
2549   PetscErrorCode ierr;
2550   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2551 
2552   PetscFunctionBegin;
2553   if (!Y->preallocated || !aij->nz) {
2554     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2555   }
2556   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 /* -------------------------------------------------------------------*/
2561 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2562                                        MatGetRow_SeqBAIJ,
2563                                        MatRestoreRow_SeqBAIJ,
2564                                        MatMult_SeqBAIJ_N,
2565                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2566                                        MatMultTranspose_SeqBAIJ,
2567                                        MatMultTransposeAdd_SeqBAIJ,
2568                                        0,
2569                                        0,
2570                                        0,
2571                                /* 10*/ 0,
2572                                        MatLUFactor_SeqBAIJ,
2573                                        0,
2574                                        0,
2575                                        MatTranspose_SeqBAIJ,
2576                                /* 15*/ MatGetInfo_SeqBAIJ,
2577                                        MatEqual_SeqBAIJ,
2578                                        MatGetDiagonal_SeqBAIJ,
2579                                        MatDiagonalScale_SeqBAIJ,
2580                                        MatNorm_SeqBAIJ,
2581                                /* 20*/ 0,
2582                                        MatAssemblyEnd_SeqBAIJ,
2583                                        MatSetOption_SeqBAIJ,
2584                                        MatZeroEntries_SeqBAIJ,
2585                                /* 24*/ MatZeroRows_SeqBAIJ,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                        0,
2590                                /* 29*/ MatSetUp_SeqBAIJ,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                /* 34*/ MatDuplicate_SeqBAIJ,
2596                                        0,
2597                                        0,
2598                                        MatILUFactor_SeqBAIJ,
2599                                        0,
2600                                /* 39*/ MatAXPY_SeqBAIJ,
2601                                        MatCreateSubMatrices_SeqBAIJ,
2602                                        MatIncreaseOverlap_SeqBAIJ,
2603                                        MatGetValues_SeqBAIJ,
2604                                        MatCopy_SeqBAIJ,
2605                                /* 44*/ 0,
2606                                        MatScale_SeqBAIJ,
2607                                        MatShift_SeqBAIJ,
2608                                        0,
2609                                        MatZeroRowsColumns_SeqBAIJ,
2610                                /* 49*/ 0,
2611                                        MatGetRowIJ_SeqBAIJ,
2612                                        MatRestoreRowIJ_SeqBAIJ,
2613                                        MatGetColumnIJ_SeqBAIJ,
2614                                        MatRestoreColumnIJ_SeqBAIJ,
2615                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                        MatSetValuesBlocked_SeqBAIJ,
2620                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2621                                        MatDestroy_SeqBAIJ,
2622                                        MatView_SeqBAIJ,
2623                                        0,
2624                                        0,
2625                                /* 64*/ 0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2631                                        0,
2632                                        MatConvert_Basic,
2633                                        0,
2634                                        0,
2635                                /* 74*/ 0,
2636                                        MatFDColoringApply_BAIJ,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                /* 79*/ 0,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                        MatLoad_SeqBAIJ,
2645                                /* 84*/ 0,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                        0,
2650                                /* 89*/ 0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                /* 94*/ 0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                /* 99*/ 0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                /*104*/ 0,
2666                                        MatRealPart_SeqBAIJ,
2667                                        MatImaginaryPart_SeqBAIJ,
2668                                        0,
2669                                        0,
2670                                /*109*/ 0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                        MatMissingDiagonal_SeqBAIJ,
2675                                /*114*/ 0,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                /*119*/ 0,
2681                                        0,
2682                                        MatMultHermitianTranspose_SeqBAIJ,
2683                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2684                                        0,
2685                                /*124*/ 0,
2686                                        0,
2687                                        MatInvertBlockDiagonal_SeqBAIJ,
2688                                        0,
2689                                        0,
2690                                /*129*/ 0,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                /*134*/ 0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                /*139*/ MatSetBlockSizes_Default,
2701                                        0,
2702                                        0,
2703                                        MatFDColoringSetUp_SeqXAIJ,
2704                                        0,
2705                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2706 };
2707 
2708 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2709 {
2710   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2711   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2712   PetscErrorCode ierr;
2713 
2714   PetscFunctionBegin;
2715   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2716 
2717   /* allocate space for values if not already there */
2718   if (!aij->saved_values) {
2719     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2720     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2721   }
2722 
2723   /* copy values over */
2724   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2725   PetscFunctionReturn(0);
2726 }
2727 
2728 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2729 {
2730   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2731   PetscErrorCode ierr;
2732   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2733 
2734   PetscFunctionBegin;
2735   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2736   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2737 
2738   /* copy values over */
2739   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2744 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2745 
2746 static PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2747 {
2748   Mat_SeqBAIJ    *b;
2749   PetscErrorCode ierr;
2750   PetscInt       i,mbs,nbs,bs2;
2751   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2752 
2753   PetscFunctionBegin;
2754   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2755   if (nz == MAT_SKIP_ALLOCATION) {
2756     skipallocation = PETSC_TRUE;
2757     nz             = 0;
2758   }
2759 
2760   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2761   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2762   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2763   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2764 
2765   B->preallocated = PETSC_TRUE;
2766 
2767   mbs = B->rmap->n/bs;
2768   nbs = B->cmap->n/bs;
2769   bs2 = bs*bs;
2770 
2771   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2772 
2773   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2774   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2775   if (nnz) {
2776     for (i=0; i<mbs; i++) {
2777       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2778       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2779     }
2780   }
2781 
2782   b    = (Mat_SeqBAIJ*)B->data;
2783   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2784   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2785   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2786 
2787   if (!flg) {
2788     switch (bs) {
2789     case 1:
2790       B->ops->mult    = MatMult_SeqBAIJ_1;
2791       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2792       break;
2793     case 2:
2794       B->ops->mult    = MatMult_SeqBAIJ_2;
2795       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2796       break;
2797     case 3:
2798       B->ops->mult    = MatMult_SeqBAIJ_3;
2799       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2800       break;
2801     case 4:
2802       B->ops->mult    = MatMult_SeqBAIJ_4;
2803       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2804       break;
2805     case 5:
2806       B->ops->mult    = MatMult_SeqBAIJ_5;
2807       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2808       break;
2809     case 6:
2810       B->ops->mult    = MatMult_SeqBAIJ_6;
2811       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2812       break;
2813     case 7:
2814       B->ops->mult    = MatMult_SeqBAIJ_7;
2815       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2816       break;
2817     case 11:
2818       B->ops->mult    = MatMult_SeqBAIJ_11;
2819       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2820       break;
2821     case 15:
2822       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2823       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2824       break;
2825     default:
2826       B->ops->mult    = MatMult_SeqBAIJ_N;
2827       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2828       break;
2829     }
2830   }
2831   B->ops->sor = MatSOR_SeqBAIJ;
2832   b->mbs = mbs;
2833   b->nbs = nbs;
2834   if (!skipallocation) {
2835     if (!b->imax) {
2836       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2837       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2838 
2839       b->free_imax_ilen = PETSC_TRUE;
2840     }
2841     /* b->ilen will count nonzeros in each block row so far. */
2842     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2843     if (!nnz) {
2844       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2845       else if (nz < 0) nz = 1;
2846       for (i=0; i<mbs; i++) b->imax[i] = nz;
2847       nz = nz*mbs;
2848     } else {
2849       nz = 0;
2850       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2851     }
2852 
2853     /* allocate the matrix space */
2854     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2855     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2856     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2857     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2858     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2859 
2860     b->singlemalloc = PETSC_TRUE;
2861     b->i[0]         = 0;
2862     for (i=1; i<mbs+1; i++) {
2863       b->i[i] = b->i[i-1] + b->imax[i-1];
2864     }
2865     b->free_a  = PETSC_TRUE;
2866     b->free_ij = PETSC_TRUE;
2867   } else {
2868     b->free_a  = PETSC_FALSE;
2869     b->free_ij = PETSC_FALSE;
2870   }
2871 
2872   b->bs2              = bs2;
2873   b->mbs              = mbs;
2874   b->nz               = 0;
2875   b->maxnz            = nz;
2876   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2877   B->was_assembled    = PETSC_FALSE;
2878   B->assembled        = PETSC_FALSE;
2879   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2880   PetscFunctionReturn(0);
2881 }
2882 
2883 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2884 {
2885   PetscInt       i,m,nz,nz_max=0,*nnz;
2886   PetscScalar    *values=0;
2887   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2888   PetscErrorCode ierr;
2889 
2890   PetscFunctionBegin;
2891   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2892   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2893   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2894   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2895   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2896   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2897   m    = B->rmap->n/bs;
2898 
2899   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2900   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2901   for (i=0; i<m; i++) {
2902     nz = ii[i+1]- ii[i];
2903     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2904     nz_max = PetscMax(nz_max, nz);
2905     nnz[i] = nz;
2906   }
2907   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2908   ierr = PetscFree(nnz);CHKERRQ(ierr);
2909 
2910   values = (PetscScalar*)V;
2911   if (!values) {
2912     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2913   }
2914   for (i=0; i<m; i++) {
2915     PetscInt          ncols  = ii[i+1] - ii[i];
2916     const PetscInt    *icols = jj + ii[i];
2917     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2918     if (!roworiented) {
2919       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2920     } else {
2921       PetscInt j;
2922       for (j=0; j<ncols; j++) {
2923         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2924         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2925       }
2926     }
2927   }
2928   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2929   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2930   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2931   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 /*MC
2936    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2937    block sparse compressed row format.
2938 
2939    Options Database Keys:
2940 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2941 
2942   Level: beginner
2943 
2944 .seealso: MatCreateSeqBAIJ()
2945 M*/
2946 
2947 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
2948 
2949 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2950 {
2951   PetscErrorCode ierr;
2952   PetscMPIInt    size;
2953   Mat_SeqBAIJ    *b;
2954 
2955   PetscFunctionBegin;
2956   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2957   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2958 
2959   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2960   B->data = (void*)b;
2961   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2962 
2963   b->row          = 0;
2964   b->col          = 0;
2965   b->icol         = 0;
2966   b->reallocs     = 0;
2967   b->saved_values = 0;
2968 
2969   b->roworiented        = PETSC_TRUE;
2970   b->nonew              = 0;
2971   b->diag               = 0;
2972   B->spptr              = 0;
2973   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
2974   b->keepnonzeropattern = PETSC_FALSE;
2975 
2976   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2977   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2978   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2979   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2980   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2981   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2982   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2983   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
2984   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
2985 #if defined(PETSC_HAVE_HYPRE)
2986   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
2987 #endif
2988   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2989   PetscFunctionReturn(0);
2990 }
2991 
2992 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
2993 {
2994   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2995   PetscErrorCode ierr;
2996   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2997 
2998   PetscFunctionBegin;
2999   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3000 
3001   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3002     c->imax           = a->imax;
3003     c->ilen           = a->ilen;
3004     c->free_imax_ilen = PETSC_FALSE;
3005   } else {
3006     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3007     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3008     for (i=0; i<mbs; i++) {
3009       c->imax[i] = a->imax[i];
3010       c->ilen[i] = a->ilen[i];
3011     }
3012     c->free_imax_ilen = PETSC_TRUE;
3013   }
3014 
3015   /* allocate the matrix space */
3016   if (mallocmatspace) {
3017     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3018       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3019       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3020 
3021       c->i            = a->i;
3022       c->j            = a->j;
3023       c->singlemalloc = PETSC_FALSE;
3024       c->free_a       = PETSC_TRUE;
3025       c->free_ij      = PETSC_FALSE;
3026       c->parent       = A;
3027       C->preallocated = PETSC_TRUE;
3028       C->assembled    = PETSC_TRUE;
3029 
3030       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3031       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3032       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3033     } else {
3034       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3035       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3036 
3037       c->singlemalloc = PETSC_TRUE;
3038       c->free_a       = PETSC_TRUE;
3039       c->free_ij      = PETSC_TRUE;
3040 
3041       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3042       if (mbs > 0) {
3043         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3044         if (cpvalues == MAT_COPY_VALUES) {
3045           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3046         } else {
3047           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3048         }
3049       }
3050       C->preallocated = PETSC_TRUE;
3051       C->assembled    = PETSC_TRUE;
3052     }
3053   }
3054 
3055   c->roworiented = a->roworiented;
3056   c->nonew       = a->nonew;
3057 
3058   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3059   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3060 
3061   c->bs2         = a->bs2;
3062   c->mbs         = a->mbs;
3063   c->nbs         = a->nbs;
3064 
3065   if (a->diag) {
3066     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3067       c->diag      = a->diag;
3068       c->free_diag = PETSC_FALSE;
3069     } else {
3070       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3071       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3072       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3073       c->free_diag = PETSC_TRUE;
3074     }
3075   } else c->diag = 0;
3076 
3077   c->nz         = a->nz;
3078   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3079   c->solve_work = NULL;
3080   c->mult_work  = NULL;
3081   c->sor_workt  = NULL;
3082   c->sor_work   = NULL;
3083 
3084   c->compressedrow.use   = a->compressedrow.use;
3085   c->compressedrow.nrows = a->compressedrow.nrows;
3086   if (a->compressedrow.use) {
3087     i    = a->compressedrow.nrows;
3088     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3089     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3090     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3091     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3092   } else {
3093     c->compressedrow.use    = PETSC_FALSE;
3094     c->compressedrow.i      = NULL;
3095     c->compressedrow.rindex = NULL;
3096   }
3097   C->nonzerostate = A->nonzerostate;
3098 
3099   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3100   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3101   PetscFunctionReturn(0);
3102 }
3103 
3104 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3105 {
3106   PetscErrorCode ierr;
3107 
3108   PetscFunctionBegin;
3109   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3110   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3111   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3112   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3113   PetscFunctionReturn(0);
3114 }
3115 
3116 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3117 {
3118   Mat_SeqBAIJ    *a;
3119   PetscErrorCode ierr;
3120   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3121   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3122   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3123   PetscInt       *masked,nmask,tmp,bs2,ishift;
3124   PetscMPIInt    size;
3125   int            fd;
3126   PetscScalar    *aa;
3127   MPI_Comm       comm;
3128 
3129   PetscFunctionBegin;
3130   /* force binary viewer to load .info file if it has not yet done so */
3131   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3132   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3133   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3134   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3135   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3136   if (bs < 0) bs = 1;
3137   bs2  = bs*bs;
3138 
3139   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3140   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3141   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3142   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3143   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3144   M = header[1]; N = header[2]; nz = header[3];
3145 
3146   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3147   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3148 
3149   /*
3150      This code adds extra rows to make sure the number of rows is
3151     divisible by the blocksize
3152   */
3153   mbs        = M/bs;
3154   extra_rows = bs - M + bs*(mbs);
3155   if (extra_rows == bs) extra_rows = 0;
3156   else mbs++;
3157   if (extra_rows) {
3158     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3159   }
3160 
3161   /* Set global sizes if not already set */
3162   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3163     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3164   } else { /* Check if the matrix global sizes are correct */
3165     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3166     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3167       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3168     }
3169     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3170   }
3171 
3172   /* read in row lengths */
3173   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3174   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3175   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3176 
3177   /* read in column indices */
3178   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3179   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3180   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3181 
3182   /* loop over row lengths determining block row lengths */
3183   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3184   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3185   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3186   rowcount = 0;
3187   nzcount  = 0;
3188   for (i=0; i<mbs; i++) {
3189     nmask = 0;
3190     for (j=0; j<bs; j++) {
3191       kmax = rowlengths[rowcount];
3192       for (k=0; k<kmax; k++) {
3193         tmp = jj[nzcount++]/bs;
3194         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3195       }
3196       rowcount++;
3197     }
3198     browlengths[i] += nmask;
3199     /* zero out the mask elements we set */
3200     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3201   }
3202 
3203   /* Do preallocation  */
3204   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3205   a    = (Mat_SeqBAIJ*)newmat->data;
3206 
3207   /* set matrix "i" values */
3208   a->i[0] = 0;
3209   for (i=1; i<= mbs; i++) {
3210     a->i[i]      = a->i[i-1] + browlengths[i-1];
3211     a->ilen[i-1] = browlengths[i-1];
3212   }
3213   a->nz = 0;
3214   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3215 
3216   /* read in nonzero values */
3217   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3218   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3219   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3220 
3221   /* set "a" and "j" values into matrix */
3222   nzcount = 0; jcount = 0;
3223   for (i=0; i<mbs; i++) {
3224     nzcountb = nzcount;
3225     nmask    = 0;
3226     for (j=0; j<bs; j++) {
3227       kmax = rowlengths[i*bs+j];
3228       for (k=0; k<kmax; k++) {
3229         tmp = jj[nzcount++]/bs;
3230         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3231       }
3232     }
3233     /* sort the masked values */
3234     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3235 
3236     /* set "j" values into matrix */
3237     maskcount = 1;
3238     for (j=0; j<nmask; j++) {
3239       a->j[jcount++]  = masked[j];
3240       mask[masked[j]] = maskcount++;
3241     }
3242     /* set "a" values into matrix */
3243     ishift = bs2*a->i[i];
3244     for (j=0; j<bs; j++) {
3245       kmax = rowlengths[i*bs+j];
3246       for (k=0; k<kmax; k++) {
3247         tmp       = jj[nzcountb]/bs;
3248         block     = mask[tmp] - 1;
3249         point     = jj[nzcountb] - bs*tmp;
3250         idx       = ishift + bs2*block + j + bs*point;
3251         a->a[idx] = (MatScalar)aa[nzcountb++];
3252       }
3253     }
3254     /* zero out the mask elements we set */
3255     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3256   }
3257   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3258 
3259   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3260   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3261   ierr = PetscFree(aa);CHKERRQ(ierr);
3262   ierr = PetscFree(jj);CHKERRQ(ierr);
3263   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3264 
3265   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3266   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3267   PetscFunctionReturn(0);
3268 }
3269 
3270 /*@C
3271    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3272    compressed row) format.  For good matrix assembly performance the
3273    user should preallocate the matrix storage by setting the parameter nz
3274    (or the array nnz).  By setting these parameters accurately, performance
3275    during matrix assembly can be increased by more than a factor of 50.
3276 
3277    Collective on MPI_Comm
3278 
3279    Input Parameters:
3280 +  comm - MPI communicator, set to PETSC_COMM_SELF
3281 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3282           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3283 .  m - number of rows
3284 .  n - number of columns
3285 .  nz - number of nonzero blocks  per block row (same for all rows)
3286 -  nnz - array containing the number of nonzero blocks in the various block rows
3287          (possibly different for each block row) or NULL
3288 
3289    Output Parameter:
3290 .  A - the matrix
3291 
3292    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3293    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3294    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3295 
3296    Options Database Keys:
3297 .   -mat_no_unroll - uses code that does not unroll the loops in the
3298                      block calculations (much slower)
3299 .    -mat_block_size - size of the blocks to use
3300 
3301    Level: intermediate
3302 
3303    Notes:
3304    The number of rows and columns must be divisible by blocksize.
3305 
3306    If the nnz parameter is given then the nz parameter is ignored
3307 
3308    A nonzero block is any block that as 1 or more nonzeros in it
3309 
3310    The block AIJ format is fully compatible with standard Fortran 77
3311    storage.  That is, the stored row and column indices can begin at
3312    either one (as in Fortran) or zero.  See the users' manual for details.
3313 
3314    Specify the preallocated storage with either nz or nnz (not both).
3315    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3316    allocation.  See Users-Manual: ch_mat for details.
3317    matrices.
3318 
3319 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3320 @*/
3321 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3322 {
3323   PetscErrorCode ierr;
3324 
3325   PetscFunctionBegin;
3326   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3327   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3328   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3329   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3330   PetscFunctionReturn(0);
3331 }
3332 
3333 /*@C
3334    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3335    per row in the matrix. For good matrix assembly performance the
3336    user should preallocate the matrix storage by setting the parameter nz
3337    (or the array nnz).  By setting these parameters accurately, performance
3338    during matrix assembly can be increased by more than a factor of 50.
3339 
3340    Collective on MPI_Comm
3341 
3342    Input Parameters:
3343 +  B - the matrix
3344 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3345           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3346 .  nz - number of block nonzeros per block row (same for all rows)
3347 -  nnz - array containing the number of block nonzeros in the various block rows
3348          (possibly different for each block row) or NULL
3349 
3350    Options Database Keys:
3351 .   -mat_no_unroll - uses code that does not unroll the loops in the
3352                      block calculations (much slower)
3353 .   -mat_block_size - size of the blocks to use
3354 
3355    Level: intermediate
3356 
3357    Notes:
3358    If the nnz parameter is given then the nz parameter is ignored
3359 
3360    You can call MatGetInfo() to get information on how effective the preallocation was;
3361    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3362    You can also run with the option -info and look for messages with the string
3363    malloc in them to see if additional memory allocation was needed.
3364 
3365    The block AIJ format is fully compatible with standard Fortran 77
3366    storage.  That is, the stored row and column indices can begin at
3367    either one (as in Fortran) or zero.  See the users' manual for details.
3368 
3369    Specify the preallocated storage with either nz or nnz (not both).
3370    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3371    allocation.  See Users-Manual: ch_mat for details.
3372 
3373 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3374 @*/
3375 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3376 {
3377   PetscErrorCode ierr;
3378 
3379   PetscFunctionBegin;
3380   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3381   PetscValidType(B,1);
3382   PetscValidLogicalCollectiveInt(B,bs,2);
3383   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3384   PetscFunctionReturn(0);
3385 }
3386 
3387 /*@C
3388    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3389    (the default sequential PETSc format).
3390 
3391    Collective on MPI_Comm
3392 
3393    Input Parameters:
3394 +  B - the matrix
3395 .  i - the indices into j for the start of each local row (starts with zero)
3396 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3397 -  v - optional values in the matrix
3398 
3399    Level: developer
3400 
3401    Notes:
3402    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3403    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3404    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3405    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3406    block column and the second index is over columns within a block.
3407 
3408 .keywords: matrix, aij, compressed row, sparse
3409 
3410 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3411 @*/
3412 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3413 {
3414   PetscErrorCode ierr;
3415 
3416   PetscFunctionBegin;
3417   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3418   PetscValidType(B,1);
3419   PetscValidLogicalCollectiveInt(B,bs,2);
3420   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3421   PetscFunctionReturn(0);
3422 }
3423 
3424 
3425 /*@
3426      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3427 
3428      Collective on MPI_Comm
3429 
3430    Input Parameters:
3431 +  comm - must be an MPI communicator of size 1
3432 .  bs - size of block
3433 .  m - number of rows
3434 .  n - number of columns
3435 .  i - row indices
3436 .  j - column indices
3437 -  a - matrix values
3438 
3439    Output Parameter:
3440 .  mat - the matrix
3441 
3442    Level: advanced
3443 
3444    Notes:
3445        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3446     once the matrix is destroyed
3447 
3448        You cannot set new nonzero locations into this matrix, that will generate an error.
3449 
3450        The i and j indices are 0 based
3451 
3452        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3453 
3454       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3455       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3456       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3457       with column-major ordering within blocks.
3458 
3459 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3460 
3461 @*/
3462 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3463 {
3464   PetscErrorCode ierr;
3465   PetscInt       ii;
3466   Mat_SeqBAIJ    *baij;
3467 
3468   PetscFunctionBegin;
3469   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3470   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3471 
3472   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3473   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3474   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3475   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3476   baij = (Mat_SeqBAIJ*)(*mat)->data;
3477   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3478   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3479 
3480   baij->i = i;
3481   baij->j = j;
3482   baij->a = a;
3483 
3484   baij->singlemalloc = PETSC_FALSE;
3485   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3486   baij->free_a       = PETSC_FALSE;
3487   baij->free_ij      = PETSC_FALSE;
3488 
3489   for (ii=0; ii<m; ii++) {
3490     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3491 #if defined(PETSC_USE_DEBUG)
3492     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3493 #endif
3494   }
3495 #if defined(PETSC_USE_DEBUG)
3496   for (ii=0; ii<baij->i[m]; ii++) {
3497     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3498     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3499   }
3500 #endif
3501 
3502   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3503   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3504   PetscFunctionReturn(0);
3505 }
3506 
3507 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3508 {
3509   PetscErrorCode ierr;
3510   PetscMPIInt    size;
3511 
3512   PetscFunctionBegin;
3513   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3514   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3515     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3516   } else {
3517     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3518   }
3519   PetscFunctionReturn(0);
3520 }
3521