xref: /petsc/src/mat/impls/baij/seq/baij.c (revision cc310fdddfd1255ff354b307e63afef1e66c265d)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc-private/kernels/blockinvert.h>
9 #include <petsc-private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ"
129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,*work,*w,*workt,*t;
133   const MatScalar   *v,*aa = a->a, *idiag;
134   const PetscScalar *b,*xb;
135   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136   PetscErrorCode    ierr;
137   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
139 
140   PetscFunctionBegin;
141   its = its*lits;
142   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
143   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
144   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
145   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
146   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   if (!m) PetscFunctionReturn(0);
151   diag  = a->diag;
152   idiag = a->idiag;
153   k    = PetscMax(A->rmap->n,A->cmap->n);
154   if (!a->mult_work) {
155     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
156   }
157   if (!a->sor_workt) {
158     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
159   }
160   if (!a->sor_work) {
161     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
162   }
163   work = a->mult_work;
164   t    = a->sor_workt;
165   w    = a->sor_work;
166 
167   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
168   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
169 
170   if (flag & SOR_ZERO_INITIAL_GUESS) {
171     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
172       switch (bs) {
173       case 1:
174         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
175         t[0] = b[0];
176         i2     = 1;
177         idiag += 1;
178         for (i=1; i<m; i++) {
179           v  = aa + ai[i];
180           vi = aj + ai[i];
181           nz = diag[i] - ai[i];
182           s[0] = b[i2];
183           for (j=0; j<nz; j++) {
184             xw[0] = x[vi[j]];
185             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
186           }
187           t[i2] = s[0];
188           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
189           x[i2]  = xw[0];
190           idiag += 1;
191           i2    += 1;
192         }
193         break;
194       case 2:
195         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
196         t[0] = b[0]; t[1] = b[1];
197         i2     = 2;
198         idiag += 4;
199         for (i=1; i<m; i++) {
200           v  = aa + 4*ai[i];
201           vi = aj + ai[i];
202           nz = diag[i] - ai[i];
203           s[0] = b[i2]; s[1] = b[i2+1];
204           for (j=0; j<nz; j++) {
205             idx = 2*vi[j];
206             it  = 4*j;
207             xw[0] = x[idx]; xw[1] = x[1+idx];
208             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
209           }
210           t[i2] = s[0]; t[i2+1] = s[1];
211           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
212           x[i2]   = xw[0]; x[i2+1] = xw[1];
213           idiag  += 4;
214           i2     += 2;
215         }
216         break;
217       case 3:
218         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
219         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
220         i2     = 3;
221         idiag += 9;
222         for (i=1; i<m; i++) {
223           v  = aa + 9*ai[i];
224           vi = aj + ai[i];
225           nz = diag[i] - ai[i];
226           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
227           while (nz--) {
228             idx = 3*(*vi++);
229             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
230             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
231             v  += 9;
232           }
233           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
234           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
235           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
236           idiag  += 9;
237           i2     += 3;
238         }
239         break;
240       case 4:
241         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
242         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
243         i2     = 4;
244         idiag += 16;
245         for (i=1; i<m; i++) {
246           v  = aa + 16*ai[i];
247           vi = aj + ai[i];
248           nz = diag[i] - ai[i];
249           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
250           while (nz--) {
251             idx = 4*(*vi++);
252             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
253             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
254             v  += 16;
255           }
256           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
257           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
258           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
259           idiag  += 16;
260           i2     += 4;
261         }
262         break;
263       case 5:
264         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
265         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
266         i2     = 5;
267         idiag += 25;
268         for (i=1; i<m; i++) {
269           v  = aa + 25*ai[i];
270           vi = aj + ai[i];
271           nz = diag[i] - ai[i];
272           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
273           while (nz--) {
274             idx = 5*(*vi++);
275             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
276             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
277             v  += 25;
278           }
279           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
280           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
281           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
282           idiag  += 25;
283           i2     += 5;
284         }
285         break;
286       case 6:
287         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
288         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
289         i2     = 6;
290         idiag += 36;
291         for (i=1; i<m; i++) {
292           v  = aa + 36*ai[i];
293           vi = aj + ai[i];
294           nz = diag[i] - ai[i];
295           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
296           while (nz--) {
297             idx = 6*(*vi++);
298             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
299             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
300             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
301             v  += 36;
302           }
303           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
304           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
305           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
306           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
307           idiag  += 36;
308           i2     += 6;
309         }
310         break;
311       case 7:
312         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
313         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
314         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
315         i2     = 7;
316         idiag += 49;
317         for (i=1; i<m; i++) {
318           v  = aa + 49*ai[i];
319           vi = aj + ai[i];
320           nz = diag[i] - ai[i];
321           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
322           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
323           while (nz--) {
324             idx = 7*(*vi++);
325             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
326             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
327             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
328             v  += 49;
329           }
330           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
331           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
332           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
333           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
334           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
335           idiag  += 49;
336           i2     += 7;
337         }
338         break;
339       default:
340         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
341         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
342         i2     = bs;
343         idiag += bs2;
344         for (i=1; i<m; i++) {
345           v  = aa + bs2*ai[i];
346           vi = aj + ai[i];
347           nz = diag[i] - ai[i];
348 
349           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
350           /* copy all rows of x that are needed into contiguous space */
351           workt = work;
352           for (j=0; j<nz; j++) {
353             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
354             workt += bs;
355           }
356           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
357           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
358           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
359 
360           idiag += bs2;
361           i2    += bs;
362         }
363         break;
364       }
365       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
366       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
367       xb = t;
368     }
369     else xb = b;
370     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
371       idiag = a->idiag+bs2*(a->mbs-1);
372       i2 = bs * (m-1);
373       switch (bs) {
374       case 1:
375         s[0]  = xb[i2];
376         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
377         x[i2] = xw[0];
378         i2   -= 1;
379         for (i=m-2; i>=0; i--) {
380           v  = aa + (diag[i]+1);
381           vi = aj + diag[i] + 1;
382           nz = ai[i+1] - diag[i] - 1;
383           s[0] = xb[i2];
384           for (j=0; j<nz; j++) {
385             xw[0] = x[vi[j]];
386             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
387           }
388           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
389           x[i2]  = xw[0];
390           idiag -= 1;
391           i2    -= 1;
392         }
393         break;
394       case 2:
395         s[0]  = xb[i2]; s[1] = xb[i2+1];
396         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
397         x[i2] = xw[0]; x[i2+1] = xw[1];
398         i2    -= 2;
399         idiag -= 4;
400         for (i=m-2; i>=0; i--) {
401           v  = aa + 4*(diag[i] + 1);
402           vi = aj + diag[i] + 1;
403           nz = ai[i+1] - diag[i] - 1;
404           s[0] = xb[i2]; s[1] = xb[i2+1];
405           for (j=0; j<nz; j++) {
406             idx = 2*vi[j];
407             it  = 4*j;
408             xw[0] = x[idx]; xw[1] = x[1+idx];
409             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
410           }
411           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
412           x[i2]   = xw[0]; x[i2+1] = xw[1];
413           idiag  -= 4;
414           i2     -= 2;
415         }
416         break;
417       case 3:
418         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
419         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
420         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
421         i2    -= 3;
422         idiag -= 9;
423         for (i=m-2; i>=0; i--) {
424           v  = aa + 9*(diag[i]+1);
425           vi = aj + diag[i] + 1;
426           nz = ai[i+1] - diag[i] - 1;
427           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
428           while (nz--) {
429             idx = 3*(*vi++);
430             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
431             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
432             v  += 9;
433           }
434           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
435           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
436           idiag  -= 9;
437           i2     -= 3;
438         }
439         break;
440       case 4:
441         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
442         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
443         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
444         i2    -= 4;
445         idiag -= 16;
446         for (i=m-2; i>=0; i--) {
447           v  = aa + 16*(diag[i]+1);
448           vi = aj + diag[i] + 1;
449           nz = ai[i+1] - diag[i] - 1;
450           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
451           while (nz--) {
452             idx = 4*(*vi++);
453             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
454             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
455             v  += 16;
456           }
457           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
458           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
459           idiag  -= 16;
460           i2     -= 4;
461         }
462         break;
463       case 5:
464         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
465         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
466         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
467         i2    -= 5;
468         idiag -= 25;
469         for (i=m-2; i>=0; i--) {
470           v  = aa + 25*(diag[i]+1);
471           vi = aj + diag[i] + 1;
472           nz = ai[i+1] - diag[i] - 1;
473           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
474           while (nz--) {
475             idx = 5*(*vi++);
476             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
477             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
478             v  += 25;
479           }
480           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
481           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
482           idiag  -= 25;
483           i2     -= 5;
484         }
485         break;
486       case 6:
487         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
488         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
489         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
490         i2    -= 6;
491         idiag -= 36;
492         for (i=m-2; i>=0; i--) {
493           v  = aa + 36*(diag[i]+1);
494           vi = aj + diag[i] + 1;
495           nz = ai[i+1] - diag[i] - 1;
496           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
497           while (nz--) {
498             idx = 6*(*vi++);
499             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
500             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
501             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
502             v  += 36;
503           }
504           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
505           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
506           idiag  -= 36;
507           i2     -= 6;
508         }
509         break;
510       case 7:
511         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
512         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
513         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
514         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
515         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
516         i2    -= 7;
517         idiag -= 49;
518         for (i=m-2; i>=0; i--) {
519           v  = aa + 49*(diag[i]+1);
520           vi = aj + diag[i] + 1;
521           nz = ai[i+1] - diag[i] - 1;
522           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
523           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
524           while (nz--) {
525             idx = 7*(*vi++);
526             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
527             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
528             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
529             v  += 49;
530           }
531           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
532           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
533           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
534           idiag  -= 49;
535           i2     -= 7;
536         }
537         break;
538       default:
539         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
540         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
541         i2    -= bs;
542         idiag -= bs2;
543         for (i=m-2; i>=0; i--) {
544           v  = aa + bs2*(diag[i]+1);
545           vi = aj + diag[i] + 1;
546           nz = ai[i+1] - diag[i] - 1;
547 
548           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
549           /* copy all rows of x that are needed into contiguous space */
550           workt = work;
551           for (j=0; j<nz; j++) {
552             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
553             workt += bs;
554           }
555           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
556           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
557 
558           idiag -= bs2;
559           i2    -= bs;
560         }
561         break;
562       }
563       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
564     }
565     its--;
566   }
567   while (its--) {
568     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
569       idiag = a->idiag;
570       i2 = 0;
571       switch (bs) {
572       case 1:
573         for (i=0; i<m; i++) {
574           v  = aa + ai[i];
575           vi = aj + ai[i];
576           nz = ai[i+1] - ai[i];
577           s[0] = b[i2];
578           for (j=0; j<nz; j++) {
579             xw[0] = x[vi[j]];
580             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
581           }
582           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
583           x[i2] += xw[0];
584           idiag += 1;
585           i2    += 1;
586         }
587         break;
588       case 2:
589         for (i=0; i<m; i++) {
590           v  = aa + 4*ai[i];
591           vi = aj + ai[i];
592           nz = ai[i+1] - ai[i];
593           s[0] = b[i2]; s[1] = b[i2+1];
594           for (j=0; j<nz; j++) {
595             idx = 2*vi[j];
596             it  = 4*j;
597             xw[0] = x[idx]; xw[1] = x[1+idx];
598             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
599           }
600           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
601           x[i2]  += xw[0]; x[i2+1] += xw[1];
602           idiag  += 4;
603           i2     += 2;
604         }
605         break;
606       case 3:
607         for (i=0; i<m; i++) {
608           v  = aa + 9*ai[i];
609           vi = aj + ai[i];
610           nz = ai[i+1] - ai[i];
611           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
612           while (nz--) {
613             idx = 3*(*vi++);
614             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
615             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
616             v  += 9;
617           }
618           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
619           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
620           idiag  += 9;
621           i2     += 3;
622         }
623         break;
624       case 4:
625         for (i=0; i<m; i++) {
626           v  = aa + 16*ai[i];
627           vi = aj + ai[i];
628           nz = ai[i+1] - ai[i];
629           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
630           while (nz--) {
631             idx = 4*(*vi++);
632             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
633             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
634             v  += 16;
635           }
636           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
637           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
638           idiag  += 16;
639           i2     += 4;
640         }
641         break;
642       case 5:
643         for (i=0; i<m; i++) {
644           v  = aa + 25*ai[i];
645           vi = aj + ai[i];
646           nz = ai[i+1] - ai[i];
647           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
648           while (nz--) {
649             idx = 5*(*vi++);
650             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
651             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
652             v  += 25;
653           }
654           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
655           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
656           idiag  += 25;
657           i2     += 5;
658         }
659         break;
660       case 6:
661         for (i=0; i<m; i++) {
662           v  = aa + 36*ai[i];
663           vi = aj + ai[i];
664           nz = ai[i+1] - ai[i];
665           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
666           while (nz--) {
667             idx = 6*(*vi++);
668             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
669             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
670             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
671             v  += 36;
672           }
673           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
674           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
675           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
676           idiag  += 36;
677           i2     += 6;
678         }
679         break;
680       case 7:
681         for (i=0; i<m; i++) {
682           v  = aa + 49*ai[i];
683           vi = aj + ai[i];
684           nz = ai[i+1] - ai[i];
685           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
686           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
687           while (nz--) {
688             idx = 7*(*vi++);
689             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
690             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
691             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
692             v  += 49;
693           }
694           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
695           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
696           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
697           idiag  += 49;
698           i2     += 7;
699         }
700         break;
701       default:
702         for (i=0; i<m; i++) {
703           v  = aa + bs2*ai[i];
704           vi = aj + ai[i];
705           nz = ai[i+1] - ai[i];
706 
707           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
708           /* copy all rows of x that are needed into contiguous space */
709           workt = work;
710           for (j=0; j<nz; j++) {
711             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
712             workt += bs;
713           }
714           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
715           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
716 
717           idiag += bs2;
718           i2    += bs;
719         }
720         break;
721       }
722       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
723     }
724     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
725       idiag = a->idiag+bs2*(a->mbs-1);
726       i2 = bs * (m-1);
727       switch (bs) {
728       case 1:
729         for (i=m-1; i>=0; i--) {
730           v  = aa + ai[i];
731           vi = aj + ai[i];
732           nz = ai[i+1] - ai[i];
733           s[0] = b[i2];
734           for (j=0; j<nz; j++) {
735             xw[0] = x[vi[j]];
736             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
737           }
738           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
739           x[i2] += xw[0];
740           idiag -= 1;
741           i2    -= 1;
742         }
743         break;
744       case 2:
745         for (i=m-1; i>=0; i--) {
746           v  = aa + 4*ai[i];
747           vi = aj + ai[i];
748           nz = ai[i+1] - ai[i];
749           s[0] = b[i2]; s[1] = b[i2+1];
750           for (j=0; j<nz; j++) {
751             idx = 2*vi[j];
752             it  = 4*j;
753             xw[0] = x[idx]; xw[1] = x[1+idx];
754             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
755           }
756           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
757           x[i2]  += xw[0]; x[i2+1] += xw[1];
758           idiag  -= 4;
759           i2     -= 2;
760         }
761         break;
762       case 3:
763         for (i=m-1; i>=0; i--) {
764           v  = aa + 9*ai[i];
765           vi = aj + ai[i];
766           nz = ai[i+1] - ai[i];
767           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
768           while (nz--) {
769             idx = 3*(*vi++);
770             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
771             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
772             v  += 9;
773           }
774           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
775           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
776           idiag  -= 9;
777           i2     -= 3;
778         }
779         break;
780       case 4:
781         for (i=m-1; i>=0; i--) {
782           v  = aa + 16*ai[i];
783           vi = aj + ai[i];
784           nz = ai[i+1] - ai[i];
785           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
786           while (nz--) {
787             idx = 4*(*vi++);
788             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
789             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
790             v  += 16;
791           }
792           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
793           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
794           idiag  -= 16;
795           i2     -= 4;
796         }
797         break;
798       case 5:
799         for (i=m-1; i>=0; i--) {
800           v  = aa + 25*ai[i];
801           vi = aj + ai[i];
802           nz = ai[i+1] - ai[i];
803           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
804           while (nz--) {
805             idx = 5*(*vi++);
806             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
807             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
808             v  += 25;
809           }
810           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
811           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
812           idiag  -= 25;
813           i2     -= 5;
814         }
815         break;
816       case 6:
817         for (i=m-1; i>=0; i--) {
818           v  = aa + 36*ai[i];
819           vi = aj + ai[i];
820           nz = ai[i+1] - ai[i];
821           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
822           while (nz--) {
823             idx = 6*(*vi++);
824             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
825             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
826             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
827             v  += 36;
828           }
829           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
830           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
831           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
832           idiag  -= 36;
833           i2     -= 6;
834         }
835         break;
836       case 7:
837         for (i=m-1; i>=0; i--) {
838           v  = aa + 49*ai[i];
839           vi = aj + ai[i];
840           nz = ai[i+1] - ai[i];
841           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
842           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
843           while (nz--) {
844             idx = 7*(*vi++);
845             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
846             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
847             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
848             v  += 49;
849           }
850           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
851           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
852           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
853           idiag  -= 49;
854           i2     -= 7;
855         }
856         break;
857       default:
858         for (i=m-1; i>=0; i--) {
859           v  = aa + bs2*ai[i];
860           vi = aj + ai[i];
861           nz = ai[i+1] - ai[i];
862 
863           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
864           /* copy all rows of x that are needed into contiguous space */
865           workt = work;
866           for (j=0; j<nz; j++) {
867             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
868             workt += bs;
869           }
870           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
871           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
872 
873           idiag -= bs2;
874           i2    -= bs;
875         }
876         break;
877       }
878       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
879     }
880   }
881   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
882   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 
887 /*
888     Special version for direct calls from Fortran (Used in PETSc-fun3d)
889 */
890 #if defined(PETSC_HAVE_FORTRAN_CAPS)
891 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
892 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
893 #define matsetvaluesblocked4_ matsetvaluesblocked4
894 #endif
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "matsetvaluesblocked4_"
898 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
899 {
900   Mat               A  = *AA;
901   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
902   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
903   PetscInt          *ai    =a->i,*ailen=a->ilen;
904   PetscInt          *aj    =a->j,stepval,lastcol = -1;
905   const PetscScalar *value = v;
906   MatScalar         *ap,*aa = a->a,*bap;
907 
908   PetscFunctionBegin;
909   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
910   stepval = (n-1)*4;
911   for (k=0; k<m; k++) { /* loop over added rows */
912     row  = im[k];
913     rp   = aj + ai[row];
914     ap   = aa + 16*ai[row];
915     nrow = ailen[row];
916     low  = 0;
917     high = nrow;
918     for (l=0; l<n; l++) { /* loop over added columns */
919       col = in[l];
920       if (col <= lastcol)  low = 0;
921       else                high = nrow;
922       lastcol = col;
923       value   = v + k*(stepval+4 + l)*4;
924       while (high-low > 7) {
925         t = (low+high)/2;
926         if (rp[t] > col) high = t;
927         else             low  = t;
928       }
929       for (i=low; i<high; i++) {
930         if (rp[i] > col) break;
931         if (rp[i] == col) {
932           bap = ap +  16*i;
933           for (ii=0; ii<4; ii++,value+=stepval) {
934             for (jj=ii; jj<16; jj+=4) {
935               bap[jj] += *value++;
936             }
937           }
938           goto noinsert2;
939         }
940       }
941       N = nrow++ - 1;
942       high++; /* added new column index thus must search to one higher than before */
943       /* shift up all the later entries in this row */
944       for (ii=N; ii>=i; ii--) {
945         rp[ii+1] = rp[ii];
946         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
947       }
948       if (N >= i) {
949         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
950       }
951       rp[i] = col;
952       bap   = ap +  16*i;
953       for (ii=0; ii<4; ii++,value+=stepval) {
954         for (jj=ii; jj<16; jj+=4) {
955           bap[jj] = *value++;
956         }
957       }
958       noinsert2:;
959       low = i;
960     }
961     ailen[row] = nrow;
962   }
963   PetscFunctionReturnVoid();
964 }
965 
966 #if defined(PETSC_HAVE_FORTRAN_CAPS)
967 #define matsetvalues4_ MATSETVALUES4
968 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
969 #define matsetvalues4_ matsetvalues4
970 #endif
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatSetValues4_"
974 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
975 {
976   Mat         A  = *AA;
977   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
978   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
979   PetscInt    *ai=a->i,*ailen=a->ilen;
980   PetscInt    *aj=a->j,brow,bcol;
981   PetscInt    ridx,cidx,lastcol = -1;
982   MatScalar   *ap,value,*aa=a->a,*bap;
983 
984   PetscFunctionBegin;
985   for (k=0; k<m; k++) { /* loop over added rows */
986     row  = im[k]; brow = row/4;
987     rp   = aj + ai[brow];
988     ap   = aa + 16*ai[brow];
989     nrow = ailen[brow];
990     low  = 0;
991     high = nrow;
992     for (l=0; l<n; l++) { /* loop over added columns */
993       col   = in[l]; bcol = col/4;
994       ridx  = row % 4; cidx = col % 4;
995       value = v[l + k*n];
996       if (col <= lastcol)  low = 0;
997       else                high = nrow;
998       lastcol = col;
999       while (high-low > 7) {
1000         t = (low+high)/2;
1001         if (rp[t] > bcol) high = t;
1002         else              low  = t;
1003       }
1004       for (i=low; i<high; i++) {
1005         if (rp[i] > bcol) break;
1006         if (rp[i] == bcol) {
1007           bap   = ap +  16*i + 4*cidx + ridx;
1008           *bap += value;
1009           goto noinsert1;
1010         }
1011       }
1012       N = nrow++ - 1;
1013       high++; /* added new column thus must search to one higher than before */
1014       /* shift up all the later entries in this row */
1015       for (ii=N; ii>=i; ii--) {
1016         rp[ii+1] = rp[ii];
1017         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1018       }
1019       if (N>=i) {
1020         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1021       }
1022       rp[i]                    = bcol;
1023       ap[16*i + 4*cidx + ridx] = value;
1024 noinsert1:;
1025       low = i;
1026     }
1027     ailen[brow] = nrow;
1028   }
1029   PetscFunctionReturnVoid();
1030 }
1031 
1032 /*
1033      Checks for missing diagonals
1034 */
1035 #undef __FUNCT__
1036 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1037 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1038 {
1039   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1040   PetscErrorCode ierr;
1041   PetscInt       *diag,*ii = a->i,i;
1042 
1043   PetscFunctionBegin;
1044   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1045   *missing = PETSC_FALSE;
1046   if (A->rmap->n > 0 && !ii) {
1047     *missing = PETSC_TRUE;
1048     if (d) *d = 0;
1049     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1050   } else {
1051     diag = a->diag;
1052     for (i=0; i<a->mbs; i++) {
1053       if (diag[i] >= ii[i+1]) {
1054         *missing = PETSC_TRUE;
1055         if (d) *d = i;
1056         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1057         break;
1058       }
1059     }
1060   }
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1066 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1067 {
1068   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1069   PetscErrorCode ierr;
1070   PetscInt       i,j,m = a->mbs;
1071 
1072   PetscFunctionBegin;
1073   if (!a->diag) {
1074     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1075     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1076     a->free_diag = PETSC_TRUE;
1077   }
1078   for (i=0; i<m; i++) {
1079     a->diag[i] = a->i[i+1];
1080     for (j=a->i[i]; j<a->i[i+1]; j++) {
1081       if (a->j[j] == i) {
1082         a->diag[i] = j;
1083         break;
1084       }
1085     }
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1093 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1094 {
1095   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1096   PetscErrorCode ierr;
1097   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1098   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1099 
1100   PetscFunctionBegin;
1101   *nn = n;
1102   if (!ia) PetscFunctionReturn(0);
1103   if (symmetric) {
1104     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1105     nz   = tia[n];
1106   } else {
1107     tia = a->i; tja = a->j;
1108   }
1109 
1110   if (!blockcompressed && bs > 1) {
1111     (*nn) *= bs;
1112     /* malloc & create the natural set of indices */
1113     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1114     if (n) {
1115       (*ia)[0] = 0;
1116       for (j=1; j<bs; j++) {
1117         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1118       }
1119     }
1120 
1121     for (i=1; i<n; i++) {
1122       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1123       for (j=1; j<bs; j++) {
1124         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1125       }
1126     }
1127     if (n) {
1128       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1129     }
1130 
1131     if (inja) {
1132       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1133       cnt = 0;
1134       for (i=0; i<n; i++) {
1135         for (j=0; j<bs; j++) {
1136           for (k=tia[i]; k<tia[i+1]; k++) {
1137             for (l=0; l<bs; l++) {
1138               (*ja)[cnt++] = bs*tja[k] + l;
1139             }
1140           }
1141         }
1142       }
1143     }
1144 
1145     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1146       ierr = PetscFree(tia);CHKERRQ(ierr);
1147       ierr = PetscFree(tja);CHKERRQ(ierr);
1148     }
1149   } else if (oshift == 1) {
1150     if (symmetric) {
1151       nz = tia[A->rmap->n/bs];
1152       /*  add 1 to i and j indices */
1153       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1154       *ia = tia;
1155       if (ja) {
1156         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1157         *ja = tja;
1158       }
1159     } else {
1160       nz = a->i[A->rmap->n/bs];
1161       /* malloc space and  add 1 to i and j indices */
1162       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1163       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1164       if (ja) {
1165         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1166         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1167       }
1168     }
1169   } else {
1170     *ia = tia;
1171     if (ja) *ja = tja;
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1178 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1179 {
1180   PetscErrorCode ierr;
1181 
1182   PetscFunctionBegin;
1183   if (!ia) PetscFunctionReturn(0);
1184   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1185     ierr = PetscFree(*ia);CHKERRQ(ierr);
1186     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1187   }
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1193 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1194 {
1195   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199 #if defined(PETSC_USE_LOG)
1200   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1201 #endif
1202   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1203   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1204   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1205   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1206   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1207   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1208   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1209   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1210   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1211   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1212   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1213   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1214   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1215 
1216   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1217   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1218   ierr = PetscFree(A->data);CHKERRQ(ierr);
1219 
1220   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1236 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1237 {
1238   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1239   PetscErrorCode ierr;
1240 
1241   PetscFunctionBegin;
1242   switch (op) {
1243   case MAT_ROW_ORIENTED:
1244     a->roworiented = flg;
1245     break;
1246   case MAT_KEEP_NONZERO_PATTERN:
1247     a->keepnonzeropattern = flg;
1248     break;
1249   case MAT_NEW_NONZERO_LOCATIONS:
1250     a->nonew = (flg ? 0 : 1);
1251     break;
1252   case MAT_NEW_NONZERO_LOCATION_ERR:
1253     a->nonew = (flg ? -1 : 0);
1254     break;
1255   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1256     a->nonew = (flg ? -2 : 0);
1257     break;
1258   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1259     a->nounused = (flg ? -1 : 0);
1260     break;
1261   case MAT_NEW_DIAGONALS:
1262   case MAT_IGNORE_OFF_PROC_ENTRIES:
1263   case MAT_USE_HASH_TABLE:
1264     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1265     break;
1266   case MAT_SPD:
1267   case MAT_SYMMETRIC:
1268   case MAT_STRUCTURALLY_SYMMETRIC:
1269   case MAT_HERMITIAN:
1270   case MAT_SYMMETRY_ETERNAL:
1271     /* These options are handled directly by MatSetOption() */
1272     break;
1273   default:
1274     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1275   }
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1282 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1283 {
1284   PetscErrorCode ierr;
1285   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1286   MatScalar      *aa_i;
1287   PetscScalar    *v_i;
1288 
1289   PetscFunctionBegin;
1290   bs  = A->rmap->bs;
1291   bs2 = bs*bs;
1292   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1293 
1294   bn  = row/bs;   /* Block number */
1295   bp  = row % bs; /* Block Position */
1296   M   = ai[bn+1] - ai[bn];
1297   *nz = bs*M;
1298 
1299   if (v) {
1300     *v = 0;
1301     if (*nz) {
1302       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1303       for (i=0; i<M; i++) { /* for each block in the block row */
1304         v_i  = *v + i*bs;
1305         aa_i = aa + bs2*(ai[bn] + i);
1306         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1307       }
1308     }
1309   }
1310 
1311   if (idx) {
1312     *idx = 0;
1313     if (*nz) {
1314       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1315       for (i=0; i<M; i++) { /* for each block in the block row */
1316         idx_i = *idx + i*bs;
1317         itmp  = bs*aj[ai[bn] + i];
1318         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1319       }
1320     }
1321   }
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1327 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1328 {
1329   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1339 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1340 {
1341   PetscErrorCode ierr;
1342 
1343   PetscFunctionBegin;
1344   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1345   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1353 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1354 {
1355   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1356   Mat            C;
1357   PetscErrorCode ierr;
1358   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1359   PetscInt       *rows,*cols,bs2=a->bs2;
1360   MatScalar      *array;
1361 
1362   PetscFunctionBegin;
1363   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1364   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1365     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1366 
1367     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1368     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1369     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1370     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1371     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1372     ierr = PetscFree(col);CHKERRQ(ierr);
1373   } else {
1374     C = *B;
1375   }
1376 
1377   array = a->a;
1378   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1379   for (i=0; i<mbs; i++) {
1380     cols[0] = i*bs;
1381     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1382     len = ai[i+1] - ai[i];
1383     for (j=0; j<len; j++) {
1384       rows[0] = (*aj++)*bs;
1385       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1386       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1387       array += bs2;
1388     }
1389   }
1390   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1391 
1392   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1393   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1394 
1395   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1396     *B = C;
1397   } else {
1398     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1405 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1406 {
1407   PetscErrorCode ierr;
1408   Mat            Btrans;
1409 
1410   PetscFunctionBegin;
1411   *f   = PETSC_FALSE;
1412   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1413   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1414   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1420 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1421 {
1422   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1423   PetscErrorCode ierr;
1424   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1425   int            fd;
1426   PetscScalar    *aa;
1427   FILE           *file;
1428 
1429   PetscFunctionBegin;
1430   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1431   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1432   col_lens[0] = MAT_FILE_CLASSID;
1433 
1434   col_lens[1] = A->rmap->N;
1435   col_lens[2] = A->cmap->n;
1436   col_lens[3] = a->nz*bs2;
1437 
1438   /* store lengths of each row and write (including header) to file */
1439   count = 0;
1440   for (i=0; i<a->mbs; i++) {
1441     for (j=0; j<bs; j++) {
1442       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1443     }
1444   }
1445   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1446   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1447 
1448   /* store column indices (zero start index) */
1449   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1450   count = 0;
1451   for (i=0; i<a->mbs; i++) {
1452     for (j=0; j<bs; j++) {
1453       for (k=a->i[i]; k<a->i[i+1]; k++) {
1454         for (l=0; l<bs; l++) {
1455           jj[count++] = bs*a->j[k] + l;
1456         }
1457       }
1458     }
1459   }
1460   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1461   ierr = PetscFree(jj);CHKERRQ(ierr);
1462 
1463   /* store nonzero values */
1464   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1465   count = 0;
1466   for (i=0; i<a->mbs; i++) {
1467     for (j=0; j<bs; j++) {
1468       for (k=a->i[i]; k<a->i[i+1]; k++) {
1469         for (l=0; l<bs; l++) {
1470           aa[count++] = a->a[bs2*k + l*bs + j];
1471         }
1472       }
1473     }
1474   }
1475   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1476   ierr = PetscFree(aa);CHKERRQ(ierr);
1477 
1478   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1479   if (file) {
1480     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1487 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1488 {
1489   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1490   PetscErrorCode    ierr;
1491   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1492   PetscViewerFormat format;
1493 
1494   PetscFunctionBegin;
1495   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1496   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1497     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1498   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1499     const char *matname;
1500     Mat        aij;
1501     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1502     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1503     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1504     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1505     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1506   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1507       PetscFunctionReturn(0);
1508   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1509     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1510     for (i=0; i<a->mbs; i++) {
1511       for (j=0; j<bs; j++) {
1512         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1513         for (k=a->i[i]; k<a->i[i+1]; k++) {
1514           for (l=0; l<bs; l++) {
1515 #if defined(PETSC_USE_COMPLEX)
1516             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1517               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1518                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1519             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1520               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1521                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1522             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1523               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1524             }
1525 #else
1526             if (a->a[bs2*k + l*bs + j] != 0.0) {
1527               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1528             }
1529 #endif
1530           }
1531         }
1532         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1533       }
1534     }
1535     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1536   } else {
1537     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1538     for (i=0; i<a->mbs; i++) {
1539       for (j=0; j<bs; j++) {
1540         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1541         for (k=a->i[i]; k<a->i[i+1]; k++) {
1542           for (l=0; l<bs; l++) {
1543 #if defined(PETSC_USE_COMPLEX)
1544             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1545               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1546                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1547             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1548               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1549                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1550             } else {
1551               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1552             }
1553 #else
1554             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1555 #endif
1556           }
1557         }
1558         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1559       }
1560     }
1561     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1562   }
1563   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 #include <petscdraw.h>
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1570 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1571 {
1572   Mat               A = (Mat) Aa;
1573   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1574   PetscErrorCode    ierr;
1575   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1576   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1577   MatScalar         *aa;
1578   PetscViewer       viewer;
1579   PetscViewerFormat format;
1580 
1581   PetscFunctionBegin;
1582   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1583   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1584 
1585   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1586 
1587   /* loop over matrix elements drawing boxes */
1588 
1589   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1590     color = PETSC_DRAW_BLUE;
1591     for (i=0,row=0; i<mbs; i++,row+=bs) {
1592       for (j=a->i[i]; j<a->i[i+1]; j++) {
1593         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1594         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1595         aa  = a->a + j*bs2;
1596         for (k=0; k<bs; k++) {
1597           for (l=0; l<bs; l++) {
1598             if (PetscRealPart(*aa++) >=  0.) continue;
1599             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1600           }
1601         }
1602       }
1603     }
1604     color = PETSC_DRAW_CYAN;
1605     for (i=0,row=0; i<mbs; i++,row+=bs) {
1606       for (j=a->i[i]; j<a->i[i+1]; j++) {
1607         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1608         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1609         aa  = a->a + j*bs2;
1610         for (k=0; k<bs; k++) {
1611           for (l=0; l<bs; l++) {
1612             if (PetscRealPart(*aa++) != 0.) continue;
1613             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1614           }
1615         }
1616       }
1617     }
1618     color = PETSC_DRAW_RED;
1619     for (i=0,row=0; i<mbs; i++,row+=bs) {
1620       for (j=a->i[i]; j<a->i[i+1]; j++) {
1621         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1622         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1623         aa  = a->a + j*bs2;
1624         for (k=0; k<bs; k++) {
1625           for (l=0; l<bs; l++) {
1626             if (PetscRealPart(*aa++) <= 0.) continue;
1627             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1628           }
1629         }
1630       }
1631     }
1632   } else {
1633     /* use contour shading to indicate magnitude of values */
1634     /* first determine max of all nonzero values */
1635     PetscDraw popup;
1636     PetscReal scale,maxv = 0.0;
1637 
1638     for (i=0; i<a->nz*a->bs2; i++) {
1639       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1640     }
1641     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1642     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1643     if (popup) {
1644       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1645     }
1646     for (i=0,row=0; i<mbs; i++,row+=bs) {
1647       for (j=a->i[i]; j<a->i[i+1]; j++) {
1648         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1649         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1650         aa  = a->a + j*bs2;
1651         for (k=0; k<bs; k++) {
1652           for (l=0; l<bs; l++) {
1653             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1654             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1655           }
1656         }
1657       }
1658     }
1659   }
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 #undef __FUNCT__
1664 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1665 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1666 {
1667   PetscErrorCode ierr;
1668   PetscReal      xl,yl,xr,yr,w,h;
1669   PetscDraw      draw;
1670   PetscBool      isnull;
1671 
1672   PetscFunctionBegin;
1673   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1674   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1675 
1676   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1677   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1678   xr  += w;    yr += h;  xl = -w;     yl = -h;
1679   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1680   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1681   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatView_SeqBAIJ"
1687 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1688 {
1689   PetscErrorCode ierr;
1690   PetscBool      iascii,isbinary,isdraw;
1691 
1692   PetscFunctionBegin;
1693   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1694   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1695   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1696   if (iascii) {
1697     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1698   } else if (isbinary) {
1699     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1700   } else if (isdraw) {
1701     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1702   } else {
1703     Mat B;
1704     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1705     ierr = MatView(B,viewer);CHKERRQ(ierr);
1706     ierr = MatDestroy(&B);CHKERRQ(ierr);
1707   }
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 
1712 #undef __FUNCT__
1713 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1714 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1715 {
1716   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1717   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1718   PetscInt    *ai = a->i,*ailen = a->ilen;
1719   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1720   MatScalar   *ap,*aa = a->a;
1721 
1722   PetscFunctionBegin;
1723   for (k=0; k<m; k++) { /* loop over rows */
1724     row = im[k]; brow = row/bs;
1725     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1726     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1727     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1728     nrow = ailen[brow];
1729     for (l=0; l<n; l++) { /* loop over columns */
1730       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1731       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1732       col  = in[l];
1733       bcol = col/bs;
1734       cidx = col%bs;
1735       ridx = row%bs;
1736       high = nrow;
1737       low  = 0; /* assume unsorted */
1738       while (high-low > 5) {
1739         t = (low+high)/2;
1740         if (rp[t] > bcol) high = t;
1741         else             low  = t;
1742       }
1743       for (i=low; i<high; i++) {
1744         if (rp[i] > bcol) break;
1745         if (rp[i] == bcol) {
1746           *v++ = ap[bs2*i+bs*cidx+ridx];
1747           goto finished;
1748         }
1749       }
1750       *v++ = 0.0;
1751 finished:;
1752     }
1753   }
1754   PetscFunctionReturn(0);
1755 }
1756 
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1759 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1760 {
1761   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1762   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1763   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1764   PetscErrorCode    ierr;
1765   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1766   PetscBool         roworiented=a->roworiented;
1767   const PetscScalar *value     = v;
1768   MatScalar         *ap,*aa = a->a,*bap;
1769 
1770   PetscFunctionBegin;
1771   if (roworiented) {
1772     stepval = (n-1)*bs;
1773   } else {
1774     stepval = (m-1)*bs;
1775   }
1776   for (k=0; k<m; k++) { /* loop over added rows */
1777     row = im[k];
1778     if (row < 0) continue;
1779 #if defined(PETSC_USE_DEBUG)
1780     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1781 #endif
1782     rp   = aj + ai[row];
1783     ap   = aa + bs2*ai[row];
1784     rmax = imax[row];
1785     nrow = ailen[row];
1786     low  = 0;
1787     high = nrow;
1788     for (l=0; l<n; l++) { /* loop over added columns */
1789       if (in[l] < 0) continue;
1790 #if defined(PETSC_USE_DEBUG)
1791       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1792 #endif
1793       col = in[l];
1794       if (roworiented) {
1795         value = v + (k*(stepval+bs) + l)*bs;
1796       } else {
1797         value = v + (l*(stepval+bs) + k)*bs;
1798       }
1799       if (col <= lastcol) low = 0;
1800       else high = nrow;
1801       lastcol = col;
1802       while (high-low > 7) {
1803         t = (low+high)/2;
1804         if (rp[t] > col) high = t;
1805         else             low  = t;
1806       }
1807       for (i=low; i<high; i++) {
1808         if (rp[i] > col) break;
1809         if (rp[i] == col) {
1810           bap = ap +  bs2*i;
1811           if (roworiented) {
1812             if (is == ADD_VALUES) {
1813               for (ii=0; ii<bs; ii++,value+=stepval) {
1814                 for (jj=ii; jj<bs2; jj+=bs) {
1815                   bap[jj] += *value++;
1816                 }
1817               }
1818             } else {
1819               for (ii=0; ii<bs; ii++,value+=stepval) {
1820                 for (jj=ii; jj<bs2; jj+=bs) {
1821                   bap[jj] = *value++;
1822                 }
1823               }
1824             }
1825           } else {
1826             if (is == ADD_VALUES) {
1827               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1828                 for (jj=0; jj<bs; jj++) {
1829                   bap[jj] += value[jj];
1830                 }
1831                 bap += bs;
1832               }
1833             } else {
1834               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1835                 for (jj=0; jj<bs; jj++) {
1836                   bap[jj]  = value[jj];
1837                 }
1838                 bap += bs;
1839               }
1840             }
1841           }
1842           goto noinsert2;
1843         }
1844       }
1845       if (nonew == 1) goto noinsert2;
1846       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1847       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1848       N = nrow++ - 1; high++;
1849       /* shift up all the later entries in this row */
1850       for (ii=N; ii>=i; ii--) {
1851         rp[ii+1] = rp[ii];
1852         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1853       }
1854       if (N >= i) {
1855         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1856       }
1857       rp[i] = col;
1858       bap   = ap +  bs2*i;
1859       if (roworiented) {
1860         for (ii=0; ii<bs; ii++,value+=stepval) {
1861           for (jj=ii; jj<bs2; jj+=bs) {
1862             bap[jj] = *value++;
1863           }
1864         }
1865       } else {
1866         for (ii=0; ii<bs; ii++,value+=stepval) {
1867           for (jj=0; jj<bs; jj++) {
1868             *bap++ = *value++;
1869           }
1870         }
1871       }
1872 noinsert2:;
1873       low = i;
1874     }
1875     ailen[row] = nrow;
1876   }
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #undef __FUNCT__
1881 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1882 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1883 {
1884   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1885   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1886   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1887   PetscErrorCode ierr;
1888   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1889   MatScalar      *aa  = a->a,*ap;
1890   PetscReal      ratio=0.6;
1891 
1892   PetscFunctionBegin;
1893   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1894 
1895   if (m) rmax = ailen[0];
1896   for (i=1; i<mbs; i++) {
1897     /* move each row back by the amount of empty slots (fshift) before it*/
1898     fshift += imax[i-1] - ailen[i-1];
1899     rmax    = PetscMax(rmax,ailen[i]);
1900     if (fshift) {
1901       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1902       N  = ailen[i];
1903       for (j=0; j<N; j++) {
1904         ip[j-fshift] = ip[j];
1905 
1906         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1907       }
1908     }
1909     ai[i] = ai[i-1] + ailen[i-1];
1910   }
1911   if (mbs) {
1912     fshift += imax[mbs-1] - ailen[mbs-1];
1913     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1914   }
1915 
1916   /* reset ilen and imax for each row */
1917   a->nonzerorowcnt = 0;
1918   for (i=0; i<mbs; i++) {
1919     ailen[i] = imax[i] = ai[i+1] - ai[i];
1920     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1921   }
1922   a->nz = ai[mbs];
1923 
1924   /* diagonals may have moved, so kill the diagonal pointers */
1925   a->idiagvalid = PETSC_FALSE;
1926   if (fshift && a->diag) {
1927     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1928     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1929     a->diag = 0;
1930   }
1931   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1932   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1933   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1934   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1935 
1936   A->info.mallocs    += a->reallocs;
1937   a->reallocs         = 0;
1938   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1939   a->rmax             = rmax;
1940 
1941   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 /*
1946    This function returns an array of flags which indicate the locations of contiguous
1947    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1948    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1949    Assume: sizes should be long enough to hold all the values.
1950 */
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1953 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1954 {
1955   PetscInt  i,j,k,row;
1956   PetscBool flg;
1957 
1958   PetscFunctionBegin;
1959   for (i=0,j=0; i<n; j++) {
1960     row = idx[i];
1961     if (row%bs!=0) { /* Not the begining of a block */
1962       sizes[j] = 1;
1963       i++;
1964     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1965       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1966       i++;
1967     } else { /* Begining of the block, so check if the complete block exists */
1968       flg = PETSC_TRUE;
1969       for (k=1; k<bs; k++) {
1970         if (row+k != idx[i+k]) { /* break in the block */
1971           flg = PETSC_FALSE;
1972           break;
1973         }
1974       }
1975       if (flg) { /* No break in the bs */
1976         sizes[j] = bs;
1977         i       += bs;
1978       } else {
1979         sizes[j] = 1;
1980         i++;
1981       }
1982     }
1983   }
1984   *bs_max = j;
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 #undef __FUNCT__
1989 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1990 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1991 {
1992   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1993   PetscErrorCode    ierr;
1994   PetscInt          i,j,k,count,*rows;
1995   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1996   PetscScalar       zero = 0.0;
1997   MatScalar         *aa;
1998   const PetscScalar *xx;
1999   PetscScalar       *bb;
2000 
2001   PetscFunctionBegin;
2002   /* fix right hand side if needed */
2003   if (x && b) {
2004     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2005     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2006     for (i=0; i<is_n; i++) {
2007       bb[is_idx[i]] = diag*xx[is_idx[i]];
2008     }
2009     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2010     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2011   }
2012 
2013   /* Make a copy of the IS and  sort it */
2014   /* allocate memory for rows,sizes */
2015   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2016 
2017   /* copy IS values to rows, and sort them */
2018   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2019   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2020 
2021   if (baij->keepnonzeropattern) {
2022     for (i=0; i<is_n; i++) sizes[i] = 1;
2023     bs_max          = is_n;
2024   } else {
2025     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2026     A->nonzerostate++;
2027   }
2028 
2029   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2030     row = rows[j];
2031     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2032     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2033     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2034     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2035       if (diag != (PetscScalar)0.0) {
2036         if (baij->ilen[row/bs] > 0) {
2037           baij->ilen[row/bs]       = 1;
2038           baij->j[baij->i[row/bs]] = row/bs;
2039 
2040           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2041         }
2042         /* Now insert all the diagonal values for this bs */
2043         for (k=0; k<bs; k++) {
2044           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2045         }
2046       } else { /* (diag == 0.0) */
2047         baij->ilen[row/bs] = 0;
2048       } /* end (diag == 0.0) */
2049     } else { /* (sizes[i] != bs) */
2050 #if defined(PETSC_USE_DEBUG)
2051       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2052 #endif
2053       for (k=0; k<count; k++) {
2054         aa[0] =  zero;
2055         aa   += bs;
2056       }
2057       if (diag != (PetscScalar)0.0) {
2058         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2059       }
2060     }
2061   }
2062 
2063   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2064   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2070 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2071 {
2072   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2073   PetscErrorCode    ierr;
2074   PetscInt          i,j,k,count;
2075   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2076   PetscScalar       zero = 0.0;
2077   MatScalar         *aa;
2078   const PetscScalar *xx;
2079   PetscScalar       *bb;
2080   PetscBool         *zeroed,vecs = PETSC_FALSE;
2081 
2082   PetscFunctionBegin;
2083   /* fix right hand side if needed */
2084   if (x && b) {
2085     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2086     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2087     vecs = PETSC_TRUE;
2088   }
2089 
2090   /* zero the columns */
2091   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2092   for (i=0; i<is_n; i++) {
2093     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2094     zeroed[is_idx[i]] = PETSC_TRUE;
2095   }
2096   for (i=0; i<A->rmap->N; i++) {
2097     if (!zeroed[i]) {
2098       row = i/bs;
2099       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2100         for (k=0; k<bs; k++) {
2101           col = bs*baij->j[j] + k;
2102           if (zeroed[col]) {
2103             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2104             if (vecs) bb[i] -= aa[0]*xx[col];
2105             aa[0] = 0.0;
2106           }
2107         }
2108       }
2109     } else if (vecs) bb[i] = diag*xx[i];
2110   }
2111   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2112   if (vecs) {
2113     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2114     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2115   }
2116 
2117   /* zero the rows */
2118   for (i=0; i<is_n; i++) {
2119     row   = is_idx[i];
2120     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2121     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2122     for (k=0; k<count; k++) {
2123       aa[0] =  zero;
2124       aa   += bs;
2125     }
2126     if (diag != (PetscScalar)0.0) {
2127       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2128     }
2129   }
2130   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2136 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2137 {
2138   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2139   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2140   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2141   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2142   PetscErrorCode ierr;
2143   PetscInt       ridx,cidx,bs2=a->bs2;
2144   PetscBool      roworiented=a->roworiented;
2145   MatScalar      *ap,value,*aa=a->a,*bap;
2146 
2147   PetscFunctionBegin;
2148   for (k=0; k<m; k++) { /* loop over added rows */
2149     row  = im[k];
2150     brow = row/bs;
2151     if (row < 0) continue;
2152 #if defined(PETSC_USE_DEBUG)
2153     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2154 #endif
2155     rp   = aj + ai[brow];
2156     ap   = aa + bs2*ai[brow];
2157     rmax = imax[brow];
2158     nrow = ailen[brow];
2159     low  = 0;
2160     high = nrow;
2161     for (l=0; l<n; l++) { /* loop over added columns */
2162       if (in[l] < 0) continue;
2163 #if defined(PETSC_USE_DEBUG)
2164       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2165 #endif
2166       col  = in[l]; bcol = col/bs;
2167       ridx = row % bs; cidx = col % bs;
2168       if (roworiented) {
2169         value = v[l + k*n];
2170       } else {
2171         value = v[k + l*m];
2172       }
2173       if (col <= lastcol) low = 0; else high = nrow;
2174       lastcol = col;
2175       while (high-low > 7) {
2176         t = (low+high)/2;
2177         if (rp[t] > bcol) high = t;
2178         else              low  = t;
2179       }
2180       for (i=low; i<high; i++) {
2181         if (rp[i] > bcol) break;
2182         if (rp[i] == bcol) {
2183           bap = ap +  bs2*i + bs*cidx + ridx;
2184           if (is == ADD_VALUES) *bap += value;
2185           else                  *bap  = value;
2186           goto noinsert1;
2187         }
2188       }
2189       if (nonew == 1) goto noinsert1;
2190       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2191       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2192       N = nrow++ - 1; high++;
2193       /* shift up all the later entries in this row */
2194       for (ii=N; ii>=i; ii--) {
2195         rp[ii+1] = rp[ii];
2196         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2197       }
2198       if (N>=i) {
2199         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2200       }
2201       rp[i]                      = bcol;
2202       ap[bs2*i + bs*cidx + ridx] = value;
2203       a->nz++;
2204       A->nonzerostate++;
2205 noinsert1:;
2206       low = i;
2207     }
2208     ailen[brow] = nrow;
2209   }
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 #undef __FUNCT__
2214 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2215 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2216 {
2217   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2218   Mat            outA;
2219   PetscErrorCode ierr;
2220   PetscBool      row_identity,col_identity;
2221 
2222   PetscFunctionBegin;
2223   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2224   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2225   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2226   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2227 
2228   outA            = inA;
2229   inA->factortype = MAT_FACTOR_LU;
2230 
2231   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2232 
2233   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2234   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2235   a->row = row;
2236   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2237   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2238   a->col = col;
2239 
2240   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2241   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2242   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2243   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2244 
2245   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2246   if (!a->solve_work) {
2247     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2248     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2249   }
2250   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 #undef __FUNCT__
2255 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2256 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2257 {
2258   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2259   PetscInt    i,nz,mbs;
2260 
2261   PetscFunctionBegin;
2262   nz  = baij->maxnz;
2263   mbs = baij->mbs;
2264   for (i=0; i<nz; i++) {
2265     baij->j[i] = indices[i];
2266   }
2267   baij->nz = nz;
2268   for (i=0; i<mbs; i++) {
2269     baij->ilen[i] = baij->imax[i];
2270   }
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 #undef __FUNCT__
2275 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2276 /*@
2277     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2278        in the matrix.
2279 
2280   Input Parameters:
2281 +  mat - the SeqBAIJ matrix
2282 -  indices - the column indices
2283 
2284   Level: advanced
2285 
2286   Notes:
2287     This can be called if you have precomputed the nonzero structure of the
2288   matrix and want to provide it to the matrix object to improve the performance
2289   of the MatSetValues() operation.
2290 
2291     You MUST have set the correct numbers of nonzeros per row in the call to
2292   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2293 
2294     MUST be called before any calls to MatSetValues();
2295 
2296 @*/
2297 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2298 {
2299   PetscErrorCode ierr;
2300 
2301   PetscFunctionBegin;
2302   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2303   PetscValidPointer(indices,2);
2304   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 #undef __FUNCT__
2309 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2310 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2311 {
2312   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2313   PetscErrorCode ierr;
2314   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2315   PetscReal      atmp;
2316   PetscScalar    *x,zero = 0.0;
2317   MatScalar      *aa;
2318   PetscInt       ncols,brow,krow,kcol;
2319 
2320   PetscFunctionBegin;
2321   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2322   bs  = A->rmap->bs;
2323   aa  = a->a;
2324   ai  = a->i;
2325   aj  = a->j;
2326   mbs = a->mbs;
2327 
2328   ierr = VecSet(v,zero);CHKERRQ(ierr);
2329   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2330   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2331   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2332   for (i=0; i<mbs; i++) {
2333     ncols = ai[1] - ai[0]; ai++;
2334     brow  = bs*i;
2335     for (j=0; j<ncols; j++) {
2336       for (kcol=0; kcol<bs; kcol++) {
2337         for (krow=0; krow<bs; krow++) {
2338           atmp = PetscAbsScalar(*aa);aa++;
2339           row  = brow + krow;   /* row index */
2340           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2341         }
2342       }
2343       aj++;
2344     }
2345   }
2346   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2347   PetscFunctionReturn(0);
2348 }
2349 
2350 #undef __FUNCT__
2351 #define __FUNCT__ "MatCopy_SeqBAIJ"
2352 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2353 {
2354   PetscErrorCode ierr;
2355 
2356   PetscFunctionBegin;
2357   /* If the two matrices have the same copy implementation, use fast copy. */
2358   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2359     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2360     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2361     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2362 
2363     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2364     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2365     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2366   } else {
2367     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2368   }
2369   PetscFunctionReturn(0);
2370 }
2371 
2372 #undef __FUNCT__
2373 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2374 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2375 {
2376   PetscErrorCode ierr;
2377 
2378   PetscFunctionBegin;
2379   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2385 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2386 {
2387   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2388 
2389   PetscFunctionBegin;
2390   *array = a->a;
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2396 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2397 {
2398   PetscFunctionBegin;
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2404 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2405 {
2406   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2407   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2408   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2409   PetscErrorCode ierr;
2410 
2411   PetscFunctionBegin;
2412   /* Set the number of nonzeros in the new matrix */
2413   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 #undef __FUNCT__
2418 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2419 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2420 {
2421   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2422   PetscErrorCode ierr;
2423   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2424   PetscBLASInt   one=1;
2425 
2426   PetscFunctionBegin;
2427   if (str == SAME_NONZERO_PATTERN) {
2428     PetscScalar  alpha = a;
2429     PetscBLASInt bnz;
2430     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2431     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2432     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2433   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2434     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2435   } else {
2436     Mat      B;
2437     PetscInt *nnz;
2438     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2439     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2440     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2441     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2442     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2443     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2444     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2445     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2446     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2447     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2448     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2449     ierr = PetscFree(nnz);CHKERRQ(ierr);
2450   }
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 #undef __FUNCT__
2455 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2456 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2457 {
2458   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2459   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2460   MatScalar   *aa = a->a;
2461 
2462   PetscFunctionBegin;
2463   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2469 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2470 {
2471   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2472   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2473   MatScalar   *aa = a->a;
2474 
2475   PetscFunctionBegin;
2476   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 #undef __FUNCT__
2481 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2482 /*
2483     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2484 */
2485 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2486 {
2487   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2488   PetscErrorCode ierr;
2489   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2490   PetscInt       nz = a->i[m],row,*jj,mr,col;
2491 
2492   PetscFunctionBegin;
2493   *nn = n;
2494   if (!ia) PetscFunctionReturn(0);
2495   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2496   else {
2497     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2498     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2499     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2500     jj   = a->j;
2501     for (i=0; i<nz; i++) {
2502       collengths[jj[i]]++;
2503     }
2504     cia[0] = oshift;
2505     for (i=0; i<n; i++) {
2506       cia[i+1] = cia[i] + collengths[i];
2507     }
2508     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2509     jj   = a->j;
2510     for (row=0; row<m; row++) {
2511       mr = a->i[row+1] - a->i[row];
2512       for (i=0; i<mr; i++) {
2513         col = *jj++;
2514 
2515         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2516       }
2517     }
2518     ierr = PetscFree(collengths);CHKERRQ(ierr);
2519     *ia  = cia; *ja = cja;
2520   }
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2526 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2527 {
2528   PetscErrorCode ierr;
2529 
2530   PetscFunctionBegin;
2531   if (!ia) PetscFunctionReturn(0);
2532   ierr = PetscFree(*ia);CHKERRQ(ierr);
2533   ierr = PetscFree(*ja);CHKERRQ(ierr);
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 /*
2538  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2539  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2540  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2541  */
2542 #undef __FUNCT__
2543 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2544 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2545 {
2546   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2547   PetscErrorCode ierr;
2548   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2549   PetscInt       nz = a->i[m],row,*jj,mr,col;
2550   PetscInt       *cspidx;
2551 
2552   PetscFunctionBegin;
2553   *nn = n;
2554   if (!ia) PetscFunctionReturn(0);
2555 
2556   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2557   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2558   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2559   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2560   jj   = a->j;
2561   for (i=0; i<nz; i++) {
2562     collengths[jj[i]]++;
2563   }
2564   cia[0] = oshift;
2565   for (i=0; i<n; i++) {
2566     cia[i+1] = cia[i] + collengths[i];
2567   }
2568   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2569   jj   = a->j;
2570   for (row=0; row<m; row++) {
2571     mr = a->i[row+1] - a->i[row];
2572     for (i=0; i<mr; i++) {
2573       col = *jj++;
2574       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2575       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2576     }
2577   }
2578   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2579   *ia    = cia; *ja = cja;
2580   *spidx = cspidx;
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 #undef __FUNCT__
2585 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2586 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2587 {
2588   PetscErrorCode ierr;
2589 
2590   PetscFunctionBegin;
2591   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2592   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 /* -------------------------------------------------------------------*/
2597 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2598                                        MatGetRow_SeqBAIJ,
2599                                        MatRestoreRow_SeqBAIJ,
2600                                        MatMult_SeqBAIJ_N,
2601                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2602                                        MatMultTranspose_SeqBAIJ,
2603                                        MatMultTransposeAdd_SeqBAIJ,
2604                                        0,
2605                                        0,
2606                                        0,
2607                                /* 10*/ 0,
2608                                        MatLUFactor_SeqBAIJ,
2609                                        0,
2610                                        0,
2611                                        MatTranspose_SeqBAIJ,
2612                                /* 15*/ MatGetInfo_SeqBAIJ,
2613                                        MatEqual_SeqBAIJ,
2614                                        MatGetDiagonal_SeqBAIJ,
2615                                        MatDiagonalScale_SeqBAIJ,
2616                                        MatNorm_SeqBAIJ,
2617                                /* 20*/ 0,
2618                                        MatAssemblyEnd_SeqBAIJ,
2619                                        MatSetOption_SeqBAIJ,
2620                                        MatZeroEntries_SeqBAIJ,
2621                                /* 24*/ MatZeroRows_SeqBAIJ,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                /* 29*/ MatSetUp_SeqBAIJ,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                /* 34*/ MatDuplicate_SeqBAIJ,
2632                                        0,
2633                                        0,
2634                                        MatILUFactor_SeqBAIJ,
2635                                        0,
2636                                /* 39*/ MatAXPY_SeqBAIJ,
2637                                        MatGetSubMatrices_SeqBAIJ,
2638                                        MatIncreaseOverlap_SeqBAIJ,
2639                                        MatGetValues_SeqBAIJ,
2640                                        MatCopy_SeqBAIJ,
2641                                /* 44*/ 0,
2642                                        MatScale_SeqBAIJ,
2643                                        0,
2644                                        0,
2645                                        MatZeroRowsColumns_SeqBAIJ,
2646                                /* 49*/ 0,
2647                                        MatGetRowIJ_SeqBAIJ,
2648                                        MatRestoreRowIJ_SeqBAIJ,
2649                                        MatGetColumnIJ_SeqBAIJ,
2650                                        MatRestoreColumnIJ_SeqBAIJ,
2651                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                        MatSetValuesBlocked_SeqBAIJ,
2656                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2657                                        MatDestroy_SeqBAIJ,
2658                                        MatView_SeqBAIJ,
2659                                        0,
2660                                        0,
2661                                /* 64*/ 0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2667                                        0,
2668                                        MatConvert_Basic,
2669                                        0,
2670                                        0,
2671                                /* 74*/ 0,
2672                                        MatFDColoringApply_BAIJ,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                /* 79*/ 0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        MatLoad_SeqBAIJ,
2681                                /* 84*/ 0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                /* 89*/ 0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                /* 94*/ 0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                /* 99*/ 0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                /*104*/ 0,
2702                                        MatRealPart_SeqBAIJ,
2703                                        MatImaginaryPart_SeqBAIJ,
2704                                        0,
2705                                        0,
2706                                /*109*/ 0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                        MatMissingDiagonal_SeqBAIJ,
2711                                /*114*/ 0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                /*119*/ 0,
2717                                        0,
2718                                        MatMultHermitianTranspose_SeqBAIJ,
2719                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2720                                        0,
2721                                /*124*/ 0,
2722                                        0,
2723                                        MatInvertBlockDiagonal_SeqBAIJ,
2724                                        0,
2725                                        0,
2726                                /*129*/ 0,
2727                                        0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                /*134*/ 0,
2732                                        0,
2733                                        0,
2734                                        0,
2735                                        0,
2736                                /*139*/ 0,
2737                                        0,
2738                                        0,
2739                                        MatFDColoringSetUp_SeqXAIJ,
2740                                        0,
2741                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2742 };
2743 
2744 #undef __FUNCT__
2745 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2746 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2747 {
2748   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2749   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2750   PetscErrorCode ierr;
2751 
2752   PetscFunctionBegin;
2753   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2754 
2755   /* allocate space for values if not already there */
2756   if (!aij->saved_values) {
2757     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2758     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2759   }
2760 
2761   /* copy values over */
2762   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2763   PetscFunctionReturn(0);
2764 }
2765 
2766 #undef __FUNCT__
2767 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2768 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2769 {
2770   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2771   PetscErrorCode ierr;
2772   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2773 
2774   PetscFunctionBegin;
2775   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2776   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2777 
2778   /* copy values over */
2779   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2780   PetscFunctionReturn(0);
2781 }
2782 
2783 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2784 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2785 
2786 #undef __FUNCT__
2787 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2788 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2789 {
2790   Mat_SeqBAIJ    *b;
2791   PetscErrorCode ierr;
2792   PetscInt       i,mbs,nbs,bs2;
2793   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2794 
2795   PetscFunctionBegin;
2796   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2797   if (nz == MAT_SKIP_ALLOCATION) {
2798     skipallocation = PETSC_TRUE;
2799     nz             = 0;
2800   }
2801 
2802   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2803   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2804   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2805   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2806 
2807   B->preallocated = PETSC_TRUE;
2808 
2809   mbs = B->rmap->n/bs;
2810   nbs = B->cmap->n/bs;
2811   bs2 = bs*bs;
2812 
2813   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2814 
2815   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2816   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2817   if (nnz) {
2818     for (i=0; i<mbs; i++) {
2819       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2820       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2821     }
2822   }
2823 
2824   b    = (Mat_SeqBAIJ*)B->data;
2825   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2826   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2827   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2828 
2829   if (!flg) {
2830     switch (bs) {
2831     case 1:
2832       B->ops->mult    = MatMult_SeqBAIJ_1;
2833       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2834       break;
2835     case 2:
2836       B->ops->mult    = MatMult_SeqBAIJ_2;
2837       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2838       break;
2839     case 3:
2840       B->ops->mult    = MatMult_SeqBAIJ_3;
2841       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2842       break;
2843     case 4:
2844       B->ops->mult    = MatMult_SeqBAIJ_4;
2845       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2846       break;
2847     case 5:
2848       B->ops->mult    = MatMult_SeqBAIJ_5;
2849       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2850       break;
2851     case 6:
2852       B->ops->mult    = MatMult_SeqBAIJ_6;
2853       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2854       break;
2855     case 7:
2856       B->ops->mult    = MatMult_SeqBAIJ_7;
2857       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2858       break;
2859     case 15:
2860       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2861       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2862       break;
2863     default:
2864       B->ops->mult    = MatMult_SeqBAIJ_N;
2865       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2866       break;
2867     }
2868   }
2869   B->ops->sor = MatSOR_SeqBAIJ;
2870   b->mbs = mbs;
2871   b->nbs = nbs;
2872   if (!skipallocation) {
2873     if (!b->imax) {
2874       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2875       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2876 
2877       b->free_imax_ilen = PETSC_TRUE;
2878     }
2879     /* b->ilen will count nonzeros in each block row so far. */
2880     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2881     if (!nnz) {
2882       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2883       else if (nz < 0) nz = 1;
2884       for (i=0; i<mbs; i++) b->imax[i] = nz;
2885       nz = nz*mbs;
2886     } else {
2887       nz = 0;
2888       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2889     }
2890 
2891     /* allocate the matrix space */
2892     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2893     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2894     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2895     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2896     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2897 
2898     b->singlemalloc = PETSC_TRUE;
2899     b->i[0]         = 0;
2900     for (i=1; i<mbs+1; i++) {
2901       b->i[i] = b->i[i-1] + b->imax[i-1];
2902     }
2903     b->free_a  = PETSC_TRUE;
2904     b->free_ij = PETSC_TRUE;
2905 #if defined(PETSC_THREADCOMM_ACTIVE)
2906     ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr);
2907 #endif
2908   } else {
2909     b->free_a  = PETSC_FALSE;
2910     b->free_ij = PETSC_FALSE;
2911   }
2912 
2913   b->bs2              = bs2;
2914   b->mbs              = mbs;
2915   b->nz               = 0;
2916   b->maxnz            = nz;
2917   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2918   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2919   PetscFunctionReturn(0);
2920 }
2921 
2922 #undef __FUNCT__
2923 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2924 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2925 {
2926   PetscInt       i,m,nz,nz_max=0,*nnz;
2927   PetscScalar    *values=0;
2928   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2929   PetscErrorCode ierr;
2930 
2931   PetscFunctionBegin;
2932   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2933   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2934   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2935   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2936   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2937   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2938   m    = B->rmap->n/bs;
2939 
2940   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2941   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2942   for (i=0; i<m; i++) {
2943     nz = ii[i+1]- ii[i];
2944     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2945     nz_max = PetscMax(nz_max, nz);
2946     nnz[i] = nz;
2947   }
2948   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2949   ierr = PetscFree(nnz);CHKERRQ(ierr);
2950 
2951   values = (PetscScalar*)V;
2952   if (!values) {
2953     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2954   }
2955   for (i=0; i<m; i++) {
2956     PetscInt          ncols  = ii[i+1] - ii[i];
2957     const PetscInt    *icols = jj + ii[i];
2958     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2959     if (!roworiented) {
2960       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2961     } else {
2962       PetscInt j;
2963       for (j=0; j<ncols; j++) {
2964         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2965         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2966       }
2967     }
2968   }
2969   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2970   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2971   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2972   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2973   PetscFunctionReturn(0);
2974 }
2975 
2976 /*MC
2977    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2978    block sparse compressed row format.
2979 
2980    Options Database Keys:
2981 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2982 
2983   Level: beginner
2984 
2985 .seealso: MatCreateSeqBAIJ()
2986 M*/
2987 
2988 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
2989 
2990 #undef __FUNCT__
2991 #define __FUNCT__ "MatCreate_SeqBAIJ"
2992 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2993 {
2994   PetscErrorCode ierr;
2995   PetscMPIInt    size;
2996   Mat_SeqBAIJ    *b;
2997 
2998   PetscFunctionBegin;
2999   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3000   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3001 
3002   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3003   B->data = (void*)b;
3004   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3005 
3006   b->row          = 0;
3007   b->col          = 0;
3008   b->icol         = 0;
3009   b->reallocs     = 0;
3010   b->saved_values = 0;
3011 
3012   b->roworiented        = PETSC_TRUE;
3013   b->nonew              = 0;
3014   b->diag               = 0;
3015   B->spptr              = 0;
3016   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3017   b->keepnonzeropattern = PETSC_FALSE;
3018 
3019   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3020   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3021   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3022   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3023   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3024   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3025   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3026   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3027   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3029   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3030   PetscFunctionReturn(0);
3031 }
3032 
3033 #undef __FUNCT__
3034 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3035 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3036 {
3037   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3038   PetscErrorCode ierr;
3039   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3040 
3041   PetscFunctionBegin;
3042   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3043 
3044   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3045     c->imax           = a->imax;
3046     c->ilen           = a->ilen;
3047     c->free_imax_ilen = PETSC_FALSE;
3048   } else {
3049     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3050     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3051     for (i=0; i<mbs; i++) {
3052       c->imax[i] = a->imax[i];
3053       c->ilen[i] = a->ilen[i];
3054     }
3055     c->free_imax_ilen = PETSC_TRUE;
3056   }
3057 
3058   /* allocate the matrix space */
3059   if (mallocmatspace) {
3060     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3061       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3062       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3063 
3064       c->i            = a->i;
3065       c->j            = a->j;
3066       c->singlemalloc = PETSC_FALSE;
3067       c->free_a       = PETSC_TRUE;
3068       c->free_ij      = PETSC_FALSE;
3069       c->parent       = A;
3070       C->preallocated = PETSC_TRUE;
3071       C->assembled    = PETSC_TRUE;
3072 
3073       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3074       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3075       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3076     } else {
3077       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3078       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3079 
3080       c->singlemalloc = PETSC_TRUE;
3081       c->free_a       = PETSC_TRUE;
3082       c->free_ij      = PETSC_TRUE;
3083 
3084       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3085       if (mbs > 0) {
3086         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3087         if (cpvalues == MAT_COPY_VALUES) {
3088           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3089         } else {
3090           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3091         }
3092       }
3093       C->preallocated = PETSC_TRUE;
3094       C->assembled    = PETSC_TRUE;
3095     }
3096   }
3097 
3098   c->roworiented = a->roworiented;
3099   c->nonew       = a->nonew;
3100 
3101   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3102   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3103 
3104   c->bs2         = a->bs2;
3105   c->mbs         = a->mbs;
3106   c->nbs         = a->nbs;
3107 
3108   if (a->diag) {
3109     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3110       c->diag      = a->diag;
3111       c->free_diag = PETSC_FALSE;
3112     } else {
3113       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3114       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3115       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3116       c->free_diag = PETSC_TRUE;
3117     }
3118   } else c->diag = 0;
3119 
3120   c->nz         = a->nz;
3121   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3122   c->solve_work = NULL;
3123   c->mult_work  = NULL;
3124   c->sor_workt  = NULL;
3125   c->sor_work   = NULL;
3126 
3127   c->compressedrow.use   = a->compressedrow.use;
3128   c->compressedrow.nrows = a->compressedrow.nrows;
3129   if (a->compressedrow.use) {
3130     i    = a->compressedrow.nrows;
3131     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3132     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3133     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3134     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3135   } else {
3136     c->compressedrow.use    = PETSC_FALSE;
3137     c->compressedrow.i      = NULL;
3138     c->compressedrow.rindex = NULL;
3139   }
3140   C->nonzerostate = A->nonzerostate;
3141 
3142   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3143   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3144   PetscFunctionReturn(0);
3145 }
3146 
3147 #undef __FUNCT__
3148 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3149 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3150 {
3151   PetscErrorCode ierr;
3152 
3153   PetscFunctionBegin;
3154   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3155   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3156   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3157   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3158   PetscFunctionReturn(0);
3159 }
3160 
3161 #undef __FUNCT__
3162 #define __FUNCT__ "MatLoad_SeqBAIJ"
3163 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3164 {
3165   Mat_SeqBAIJ    *a;
3166   PetscErrorCode ierr;
3167   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3168   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3169   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3170   PetscInt       *masked,nmask,tmp,bs2,ishift;
3171   PetscMPIInt    size;
3172   int            fd;
3173   PetscScalar    *aa;
3174   MPI_Comm       comm;
3175 
3176   PetscFunctionBegin;
3177   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3178   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3179   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3180   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3181   if (bs < 0) bs = 1;
3182   bs2  = bs*bs;
3183 
3184   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3185   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3186   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3187   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3188   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3189   M = header[1]; N = header[2]; nz = header[3];
3190 
3191   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3192   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3193 
3194   /*
3195      This code adds extra rows to make sure the number of rows is
3196     divisible by the blocksize
3197   */
3198   mbs        = M/bs;
3199   extra_rows = bs - M + bs*(mbs);
3200   if (extra_rows == bs) extra_rows = 0;
3201   else mbs++;
3202   if (extra_rows) {
3203     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3204   }
3205 
3206   /* Set global sizes if not already set */
3207   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3208     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3209   } else { /* Check if the matrix global sizes are correct */
3210     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3211     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3212       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3213     }
3214     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3215   }
3216 
3217   /* read in row lengths */
3218   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3219   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3220   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3221 
3222   /* read in column indices */
3223   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3224   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3225   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3226 
3227   /* loop over row lengths determining block row lengths */
3228   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3229   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3230   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3231   rowcount = 0;
3232   nzcount  = 0;
3233   for (i=0; i<mbs; i++) {
3234     nmask = 0;
3235     for (j=0; j<bs; j++) {
3236       kmax = rowlengths[rowcount];
3237       for (k=0; k<kmax; k++) {
3238         tmp = jj[nzcount++]/bs;
3239         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3240       }
3241       rowcount++;
3242     }
3243     browlengths[i] += nmask;
3244     /* zero out the mask elements we set */
3245     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3246   }
3247 
3248   /* Do preallocation  */
3249   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3250   a    = (Mat_SeqBAIJ*)newmat->data;
3251 
3252   /* set matrix "i" values */
3253   a->i[0] = 0;
3254   for (i=1; i<= mbs; i++) {
3255     a->i[i]      = a->i[i-1] + browlengths[i-1];
3256     a->ilen[i-1] = browlengths[i-1];
3257   }
3258   a->nz = 0;
3259   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3260 
3261   /* read in nonzero values */
3262   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3263   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3264   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3265 
3266   /* set "a" and "j" values into matrix */
3267   nzcount = 0; jcount = 0;
3268   for (i=0; i<mbs; i++) {
3269     nzcountb = nzcount;
3270     nmask    = 0;
3271     for (j=0; j<bs; j++) {
3272       kmax = rowlengths[i*bs+j];
3273       for (k=0; k<kmax; k++) {
3274         tmp = jj[nzcount++]/bs;
3275         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3276       }
3277     }
3278     /* sort the masked values */
3279     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3280 
3281     /* set "j" values into matrix */
3282     maskcount = 1;
3283     for (j=0; j<nmask; j++) {
3284       a->j[jcount++]  = masked[j];
3285       mask[masked[j]] = maskcount++;
3286     }
3287     /* set "a" values into matrix */
3288     ishift = bs2*a->i[i];
3289     for (j=0; j<bs; j++) {
3290       kmax = rowlengths[i*bs+j];
3291       for (k=0; k<kmax; k++) {
3292         tmp       = jj[nzcountb]/bs;
3293         block     = mask[tmp] - 1;
3294         point     = jj[nzcountb] - bs*tmp;
3295         idx       = ishift + bs2*block + j + bs*point;
3296         a->a[idx] = (MatScalar)aa[nzcountb++];
3297       }
3298     }
3299     /* zero out the mask elements we set */
3300     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3301   }
3302   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3303 
3304   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3305   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3306   ierr = PetscFree(aa);CHKERRQ(ierr);
3307   ierr = PetscFree(jj);CHKERRQ(ierr);
3308   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3309 
3310   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3311   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3312   PetscFunctionReturn(0);
3313 }
3314 
3315 #undef __FUNCT__
3316 #define __FUNCT__ "MatCreateSeqBAIJ"
3317 /*@C
3318    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3319    compressed row) format.  For good matrix assembly performance the
3320    user should preallocate the matrix storage by setting the parameter nz
3321    (or the array nnz).  By setting these parameters accurately, performance
3322    during matrix assembly can be increased by more than a factor of 50.
3323 
3324    Collective on MPI_Comm
3325 
3326    Input Parameters:
3327 +  comm - MPI communicator, set to PETSC_COMM_SELF
3328 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3329           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3330 .  m - number of rows
3331 .  n - number of columns
3332 .  nz - number of nonzero blocks  per block row (same for all rows)
3333 -  nnz - array containing the number of nonzero blocks in the various block rows
3334          (possibly different for each block row) or NULL
3335 
3336    Output Parameter:
3337 .  A - the matrix
3338 
3339    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3340    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3341    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3342 
3343    Options Database Keys:
3344 .   -mat_no_unroll - uses code that does not unroll the loops in the
3345                      block calculations (much slower)
3346 .    -mat_block_size - size of the blocks to use
3347 
3348    Level: intermediate
3349 
3350    Notes:
3351    The number of rows and columns must be divisible by blocksize.
3352 
3353    If the nnz parameter is given then the nz parameter is ignored
3354 
3355    A nonzero block is any block that as 1 or more nonzeros in it
3356 
3357    The block AIJ format is fully compatible with standard Fortran 77
3358    storage.  That is, the stored row and column indices can begin at
3359    either one (as in Fortran) or zero.  See the users' manual for details.
3360 
3361    Specify the preallocated storage with either nz or nnz (not both).
3362    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3363    allocation.  See Users-Manual: ch_mat for details.
3364    matrices.
3365 
3366 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3367 @*/
3368 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3369 {
3370   PetscErrorCode ierr;
3371 
3372   PetscFunctionBegin;
3373   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3374   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3375   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3376   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3377   PetscFunctionReturn(0);
3378 }
3379 
3380 #undef __FUNCT__
3381 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3382 /*@C
3383    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3384    per row in the matrix. For good matrix assembly performance the
3385    user should preallocate the matrix storage by setting the parameter nz
3386    (or the array nnz).  By setting these parameters accurately, performance
3387    during matrix assembly can be increased by more than a factor of 50.
3388 
3389    Collective on MPI_Comm
3390 
3391    Input Parameters:
3392 +  B - the matrix
3393 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3394           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3395 .  nz - number of block nonzeros per block row (same for all rows)
3396 -  nnz - array containing the number of block nonzeros in the various block rows
3397          (possibly different for each block row) or NULL
3398 
3399    Options Database Keys:
3400 .   -mat_no_unroll - uses code that does not unroll the loops in the
3401                      block calculations (much slower)
3402 .    -mat_block_size - size of the blocks to use
3403 
3404    Level: intermediate
3405 
3406    Notes:
3407    If the nnz parameter is given then the nz parameter is ignored
3408 
3409    You can call MatGetInfo() to get information on how effective the preallocation was;
3410    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3411    You can also run with the option -info and look for messages with the string
3412    malloc in them to see if additional memory allocation was needed.
3413 
3414    The block AIJ format is fully compatible with standard Fortran 77
3415    storage.  That is, the stored row and column indices can begin at
3416    either one (as in Fortran) or zero.  See the users' manual for details.
3417 
3418    Specify the preallocated storage with either nz or nnz (not both).
3419    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3420    allocation.  See Users-Manual: ch_mat for details.
3421 
3422 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3423 @*/
3424 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3425 {
3426   PetscErrorCode ierr;
3427 
3428   PetscFunctionBegin;
3429   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3430   PetscValidType(B,1);
3431   PetscValidLogicalCollectiveInt(B,bs,2);
3432   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3433   PetscFunctionReturn(0);
3434 }
3435 
3436 #undef __FUNCT__
3437 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3438 /*@C
3439    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3440    (the default sequential PETSc format).
3441 
3442    Collective on MPI_Comm
3443 
3444    Input Parameters:
3445 +  B - the matrix
3446 .  i - the indices into j for the start of each local row (starts with zero)
3447 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3448 -  v - optional values in the matrix
3449 
3450    Level: developer
3451 
3452    Notes:
3453    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3454    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3455    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3456    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3457    block column and the second index is over columns within a block.
3458 
3459 .keywords: matrix, aij, compressed row, sparse
3460 
3461 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3462 @*/
3463 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3464 {
3465   PetscErrorCode ierr;
3466 
3467   PetscFunctionBegin;
3468   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3469   PetscValidType(B,1);
3470   PetscValidLogicalCollectiveInt(B,bs,2);
3471   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3472   PetscFunctionReturn(0);
3473 }
3474 
3475 
3476 #undef __FUNCT__
3477 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3478 /*@
3479      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3480 
3481      Collective on MPI_Comm
3482 
3483    Input Parameters:
3484 +  comm - must be an MPI communicator of size 1
3485 .  bs - size of block
3486 .  m - number of rows
3487 .  n - number of columns
3488 .  i - row indices
3489 .  j - column indices
3490 -  a - matrix values
3491 
3492    Output Parameter:
3493 .  mat - the matrix
3494 
3495    Level: advanced
3496 
3497    Notes:
3498        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3499     once the matrix is destroyed
3500 
3501        You cannot set new nonzero locations into this matrix, that will generate an error.
3502 
3503        The i and j indices are 0 based
3504 
3505        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3506 
3507       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3508       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3509       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3510       with column-major ordering within blocks.
3511 
3512 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3513 
3514 @*/
3515 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3516 {
3517   PetscErrorCode ierr;
3518   PetscInt       ii;
3519   Mat_SeqBAIJ    *baij;
3520 
3521   PetscFunctionBegin;
3522   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3523   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3524 
3525   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3526   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3527   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3528   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3529   baij = (Mat_SeqBAIJ*)(*mat)->data;
3530   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3531   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3532 
3533   baij->i = i;
3534   baij->j = j;
3535   baij->a = a;
3536 
3537   baij->singlemalloc = PETSC_FALSE;
3538   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3539   baij->free_a       = PETSC_FALSE;
3540   baij->free_ij      = PETSC_FALSE;
3541 
3542   for (ii=0; ii<m; ii++) {
3543     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3544 #if defined(PETSC_USE_DEBUG)
3545     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3546 #endif
3547   }
3548 #if defined(PETSC_USE_DEBUG)
3549   for (ii=0; ii<baij->i[m]; ii++) {
3550     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3551     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3552   }
3553 #endif
3554 
3555   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3556   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3557   PetscFunctionReturn(0);
3558 }
3559 
3560 #undef __FUNCT__
3561 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3562 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3563 {
3564   PetscErrorCode ierr;
3565 
3566   PetscFunctionBegin;
3567   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3568   PetscFunctionReturn(0);
3569 }
3570