xref: /petsc/src/mat/impls/baij/seq/baij.c (revision bebe2cf65d55febe21a5af8db2bd2e168caaa2e7)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ"
129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,*work,*w,*workt,*t;
133   const MatScalar   *v,*aa = a->a, *idiag;
134   const PetscScalar *b,*xb;
135   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136   PetscErrorCode    ierr;
137   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
139 
140   PetscFunctionBegin;
141   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
142   its = its*lits;
143   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
144   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
145   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
146   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
147   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
148 
149   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
150 
151   if (!m) PetscFunctionReturn(0);
152   diag  = a->diag;
153   idiag = a->idiag;
154   k    = PetscMax(A->rmap->n,A->cmap->n);
155   if (!a->mult_work) {
156     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
157   }
158   if (!a->sor_workt) {
159     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
160   }
161   if (!a->sor_work) {
162     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
163   }
164   work = a->mult_work;
165   t    = a->sor_workt;
166   w    = a->sor_work;
167 
168   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
169   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
170 
171   if (flag & SOR_ZERO_INITIAL_GUESS) {
172     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
173       switch (bs) {
174       case 1:
175         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
176         t[0] = b[0];
177         i2     = 1;
178         idiag += 1;
179         for (i=1; i<m; i++) {
180           v  = aa + ai[i];
181           vi = aj + ai[i];
182           nz = diag[i] - ai[i];
183           s[0] = b[i2];
184           for (j=0; j<nz; j++) {
185             xw[0] = x[vi[j]];
186             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
187           }
188           t[i2] = s[0];
189           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
190           x[i2]  = xw[0];
191           idiag += 1;
192           i2    += 1;
193         }
194         break;
195       case 2:
196         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
197         t[0] = b[0]; t[1] = b[1];
198         i2     = 2;
199         idiag += 4;
200         for (i=1; i<m; i++) {
201           v  = aa + 4*ai[i];
202           vi = aj + ai[i];
203           nz = diag[i] - ai[i];
204           s[0] = b[i2]; s[1] = b[i2+1];
205           for (j=0; j<nz; j++) {
206             idx = 2*vi[j];
207             it  = 4*j;
208             xw[0] = x[idx]; xw[1] = x[1+idx];
209             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
210           }
211           t[i2] = s[0]; t[i2+1] = s[1];
212           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
213           x[i2]   = xw[0]; x[i2+1] = xw[1];
214           idiag  += 4;
215           i2     += 2;
216         }
217         break;
218       case 3:
219         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
220         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
221         i2     = 3;
222         idiag += 9;
223         for (i=1; i<m; i++) {
224           v  = aa + 9*ai[i];
225           vi = aj + ai[i];
226           nz = diag[i] - ai[i];
227           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
228           while (nz--) {
229             idx = 3*(*vi++);
230             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
231             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
232             v  += 9;
233           }
234           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
235           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
236           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
237           idiag  += 9;
238           i2     += 3;
239         }
240         break;
241       case 4:
242         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
243         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
244         i2     = 4;
245         idiag += 16;
246         for (i=1; i<m; i++) {
247           v  = aa + 16*ai[i];
248           vi = aj + ai[i];
249           nz = diag[i] - ai[i];
250           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
251           while (nz--) {
252             idx = 4*(*vi++);
253             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
254             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
255             v  += 16;
256           }
257           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
258           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
259           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
260           idiag  += 16;
261           i2     += 4;
262         }
263         break;
264       case 5:
265         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
266         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
267         i2     = 5;
268         idiag += 25;
269         for (i=1; i<m; i++) {
270           v  = aa + 25*ai[i];
271           vi = aj + ai[i];
272           nz = diag[i] - ai[i];
273           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
274           while (nz--) {
275             idx = 5*(*vi++);
276             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
277             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
278             v  += 25;
279           }
280           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
281           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
282           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
283           idiag  += 25;
284           i2     += 5;
285         }
286         break;
287       case 6:
288         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
289         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
290         i2     = 6;
291         idiag += 36;
292         for (i=1; i<m; i++) {
293           v  = aa + 36*ai[i];
294           vi = aj + ai[i];
295           nz = diag[i] - ai[i];
296           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
297           while (nz--) {
298             idx = 6*(*vi++);
299             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
300             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
301             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
302             v  += 36;
303           }
304           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
305           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
306           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
307           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
308           idiag  += 36;
309           i2     += 6;
310         }
311         break;
312       case 7:
313         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
314         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
315         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
316         i2     = 7;
317         idiag += 49;
318         for (i=1; i<m; i++) {
319           v  = aa + 49*ai[i];
320           vi = aj + ai[i];
321           nz = diag[i] - ai[i];
322           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
323           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
324           while (nz--) {
325             idx = 7*(*vi++);
326             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
327             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
328             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
329             v  += 49;
330           }
331           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
332           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
333           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
334           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
335           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
336           idiag  += 49;
337           i2     += 7;
338         }
339         break;
340       default:
341         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
342         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
343         i2     = bs;
344         idiag += bs2;
345         for (i=1; i<m; i++) {
346           v  = aa + bs2*ai[i];
347           vi = aj + ai[i];
348           nz = diag[i] - ai[i];
349 
350           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
351           /* copy all rows of x that are needed into contiguous space */
352           workt = work;
353           for (j=0; j<nz; j++) {
354             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
355             workt += bs;
356           }
357           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
358           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
359           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
360 
361           idiag += bs2;
362           i2    += bs;
363         }
364         break;
365       }
366       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
367       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
368       xb = t;
369     }
370     else xb = b;
371     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
372       idiag = a->idiag+bs2*(a->mbs-1);
373       i2 = bs * (m-1);
374       switch (bs) {
375       case 1:
376         s[0]  = xb[i2];
377         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
378         x[i2] = xw[0];
379         i2   -= 1;
380         for (i=m-2; i>=0; i--) {
381           v  = aa + (diag[i]+1);
382           vi = aj + diag[i] + 1;
383           nz = ai[i+1] - diag[i] - 1;
384           s[0] = xb[i2];
385           for (j=0; j<nz; j++) {
386             xw[0] = x[vi[j]];
387             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
388           }
389           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
390           x[i2]  = xw[0];
391           idiag -= 1;
392           i2    -= 1;
393         }
394         break;
395       case 2:
396         s[0]  = xb[i2]; s[1] = xb[i2+1];
397         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
398         x[i2] = xw[0]; x[i2+1] = xw[1];
399         i2    -= 2;
400         idiag -= 4;
401         for (i=m-2; i>=0; i--) {
402           v  = aa + 4*(diag[i] + 1);
403           vi = aj + diag[i] + 1;
404           nz = ai[i+1] - diag[i] - 1;
405           s[0] = xb[i2]; s[1] = xb[i2+1];
406           for (j=0; j<nz; j++) {
407             idx = 2*vi[j];
408             it  = 4*j;
409             xw[0] = x[idx]; xw[1] = x[1+idx];
410             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
411           }
412           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
413           x[i2]   = xw[0]; x[i2+1] = xw[1];
414           idiag  -= 4;
415           i2     -= 2;
416         }
417         break;
418       case 3:
419         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
420         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
421         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
422         i2    -= 3;
423         idiag -= 9;
424         for (i=m-2; i>=0; i--) {
425           v  = aa + 9*(diag[i]+1);
426           vi = aj + diag[i] + 1;
427           nz = ai[i+1] - diag[i] - 1;
428           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
429           while (nz--) {
430             idx = 3*(*vi++);
431             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
432             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
433             v  += 9;
434           }
435           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
436           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
437           idiag  -= 9;
438           i2     -= 3;
439         }
440         break;
441       case 4:
442         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
443         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
444         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
445         i2    -= 4;
446         idiag -= 16;
447         for (i=m-2; i>=0; i--) {
448           v  = aa + 16*(diag[i]+1);
449           vi = aj + diag[i] + 1;
450           nz = ai[i+1] - diag[i] - 1;
451           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
452           while (nz--) {
453             idx = 4*(*vi++);
454             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
455             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
456             v  += 16;
457           }
458           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
459           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
460           idiag  -= 16;
461           i2     -= 4;
462         }
463         break;
464       case 5:
465         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
466         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
467         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
468         i2    -= 5;
469         idiag -= 25;
470         for (i=m-2; i>=0; i--) {
471           v  = aa + 25*(diag[i]+1);
472           vi = aj + diag[i] + 1;
473           nz = ai[i+1] - diag[i] - 1;
474           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
475           while (nz--) {
476             idx = 5*(*vi++);
477             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
478             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
479             v  += 25;
480           }
481           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
482           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
483           idiag  -= 25;
484           i2     -= 5;
485         }
486         break;
487       case 6:
488         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
489         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
490         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
491         i2    -= 6;
492         idiag -= 36;
493         for (i=m-2; i>=0; i--) {
494           v  = aa + 36*(diag[i]+1);
495           vi = aj + diag[i] + 1;
496           nz = ai[i+1] - diag[i] - 1;
497           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
498           while (nz--) {
499             idx = 6*(*vi++);
500             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
501             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
502             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
503             v  += 36;
504           }
505           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
506           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
507           idiag  -= 36;
508           i2     -= 6;
509         }
510         break;
511       case 7:
512         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
513         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
514         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
515         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
516         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
517         i2    -= 7;
518         idiag -= 49;
519         for (i=m-2; i>=0; i--) {
520           v  = aa + 49*(diag[i]+1);
521           vi = aj + diag[i] + 1;
522           nz = ai[i+1] - diag[i] - 1;
523           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
524           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
525           while (nz--) {
526             idx = 7*(*vi++);
527             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
528             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
529             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
530             v  += 49;
531           }
532           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
533           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
534           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
535           idiag  -= 49;
536           i2     -= 7;
537         }
538         break;
539       default:
540         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
541         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
542         i2    -= bs;
543         idiag -= bs2;
544         for (i=m-2; i>=0; i--) {
545           v  = aa + bs2*(diag[i]+1);
546           vi = aj + diag[i] + 1;
547           nz = ai[i+1] - diag[i] - 1;
548 
549           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
550           /* copy all rows of x that are needed into contiguous space */
551           workt = work;
552           for (j=0; j<nz; j++) {
553             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
554             workt += bs;
555           }
556           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
557           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
558 
559           idiag -= bs2;
560           i2    -= bs;
561         }
562         break;
563       }
564       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
565     }
566     its--;
567   }
568   while (its--) {
569     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
570       idiag = a->idiag;
571       i2 = 0;
572       switch (bs) {
573       case 1:
574         for (i=0; i<m; i++) {
575           v  = aa + ai[i];
576           vi = aj + ai[i];
577           nz = ai[i+1] - ai[i];
578           s[0] = b[i2];
579           for (j=0; j<nz; j++) {
580             xw[0] = x[vi[j]];
581             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
582           }
583           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
584           x[i2] += xw[0];
585           idiag += 1;
586           i2    += 1;
587         }
588         break;
589       case 2:
590         for (i=0; i<m; i++) {
591           v  = aa + 4*ai[i];
592           vi = aj + ai[i];
593           nz = ai[i+1] - ai[i];
594           s[0] = b[i2]; s[1] = b[i2+1];
595           for (j=0; j<nz; j++) {
596             idx = 2*vi[j];
597             it  = 4*j;
598             xw[0] = x[idx]; xw[1] = x[1+idx];
599             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
600           }
601           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
602           x[i2]  += xw[0]; x[i2+1] += xw[1];
603           idiag  += 4;
604           i2     += 2;
605         }
606         break;
607       case 3:
608         for (i=0; i<m; i++) {
609           v  = aa + 9*ai[i];
610           vi = aj + ai[i];
611           nz = ai[i+1] - ai[i];
612           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
613           while (nz--) {
614             idx = 3*(*vi++);
615             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
616             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
617             v  += 9;
618           }
619           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
620           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
621           idiag  += 9;
622           i2     += 3;
623         }
624         break;
625       case 4:
626         for (i=0; i<m; i++) {
627           v  = aa + 16*ai[i];
628           vi = aj + ai[i];
629           nz = ai[i+1] - ai[i];
630           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
631           while (nz--) {
632             idx = 4*(*vi++);
633             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
634             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
635             v  += 16;
636           }
637           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
638           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
639           idiag  += 16;
640           i2     += 4;
641         }
642         break;
643       case 5:
644         for (i=0; i<m; i++) {
645           v  = aa + 25*ai[i];
646           vi = aj + ai[i];
647           nz = ai[i+1] - ai[i];
648           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
649           while (nz--) {
650             idx = 5*(*vi++);
651             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
652             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
653             v  += 25;
654           }
655           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
656           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
657           idiag  += 25;
658           i2     += 5;
659         }
660         break;
661       case 6:
662         for (i=0; i<m; i++) {
663           v  = aa + 36*ai[i];
664           vi = aj + ai[i];
665           nz = ai[i+1] - ai[i];
666           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
667           while (nz--) {
668             idx = 6*(*vi++);
669             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
670             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
671             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
672             v  += 36;
673           }
674           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
675           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
676           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
677           idiag  += 36;
678           i2     += 6;
679         }
680         break;
681       case 7:
682         for (i=0; i<m; i++) {
683           v  = aa + 49*ai[i];
684           vi = aj + ai[i];
685           nz = ai[i+1] - ai[i];
686           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
687           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
688           while (nz--) {
689             idx = 7*(*vi++);
690             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
691             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
692             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
693             v  += 49;
694           }
695           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
696           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
697           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
698           idiag  += 49;
699           i2     += 7;
700         }
701         break;
702       default:
703         for (i=0; i<m; i++) {
704           v  = aa + bs2*ai[i];
705           vi = aj + ai[i];
706           nz = ai[i+1] - ai[i];
707 
708           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
709           /* copy all rows of x that are needed into contiguous space */
710           workt = work;
711           for (j=0; j<nz; j++) {
712             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
713             workt += bs;
714           }
715           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
716           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
717 
718           idiag += bs2;
719           i2    += bs;
720         }
721         break;
722       }
723       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
724     }
725     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
726       idiag = a->idiag+bs2*(a->mbs-1);
727       i2 = bs * (m-1);
728       switch (bs) {
729       case 1:
730         for (i=m-1; i>=0; i--) {
731           v  = aa + ai[i];
732           vi = aj + ai[i];
733           nz = ai[i+1] - ai[i];
734           s[0] = b[i2];
735           for (j=0; j<nz; j++) {
736             xw[0] = x[vi[j]];
737             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
738           }
739           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
740           x[i2] += xw[0];
741           idiag -= 1;
742           i2    -= 1;
743         }
744         break;
745       case 2:
746         for (i=m-1; i>=0; i--) {
747           v  = aa + 4*ai[i];
748           vi = aj + ai[i];
749           nz = ai[i+1] - ai[i];
750           s[0] = b[i2]; s[1] = b[i2+1];
751           for (j=0; j<nz; j++) {
752             idx = 2*vi[j];
753             it  = 4*j;
754             xw[0] = x[idx]; xw[1] = x[1+idx];
755             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
756           }
757           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
758           x[i2]  += xw[0]; x[i2+1] += xw[1];
759           idiag  -= 4;
760           i2     -= 2;
761         }
762         break;
763       case 3:
764         for (i=m-1; i>=0; i--) {
765           v  = aa + 9*ai[i];
766           vi = aj + ai[i];
767           nz = ai[i+1] - ai[i];
768           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
769           while (nz--) {
770             idx = 3*(*vi++);
771             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
772             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
773             v  += 9;
774           }
775           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
776           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
777           idiag  -= 9;
778           i2     -= 3;
779         }
780         break;
781       case 4:
782         for (i=m-1; i>=0; i--) {
783           v  = aa + 16*ai[i];
784           vi = aj + ai[i];
785           nz = ai[i+1] - ai[i];
786           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
787           while (nz--) {
788             idx = 4*(*vi++);
789             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
790             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
791             v  += 16;
792           }
793           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
794           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
795           idiag  -= 16;
796           i2     -= 4;
797         }
798         break;
799       case 5:
800         for (i=m-1; i>=0; i--) {
801           v  = aa + 25*ai[i];
802           vi = aj + ai[i];
803           nz = ai[i+1] - ai[i];
804           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
805           while (nz--) {
806             idx = 5*(*vi++);
807             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
808             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
809             v  += 25;
810           }
811           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
812           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
813           idiag  -= 25;
814           i2     -= 5;
815         }
816         break;
817       case 6:
818         for (i=m-1; i>=0; i--) {
819           v  = aa + 36*ai[i];
820           vi = aj + ai[i];
821           nz = ai[i+1] - ai[i];
822           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
823           while (nz--) {
824             idx = 6*(*vi++);
825             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
826             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
827             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
828             v  += 36;
829           }
830           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
831           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
832           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
833           idiag  -= 36;
834           i2     -= 6;
835         }
836         break;
837       case 7:
838         for (i=m-1; i>=0; i--) {
839           v  = aa + 49*ai[i];
840           vi = aj + ai[i];
841           nz = ai[i+1] - ai[i];
842           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
843           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
844           while (nz--) {
845             idx = 7*(*vi++);
846             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
847             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
848             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
849             v  += 49;
850           }
851           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
852           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
853           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
854           idiag  -= 49;
855           i2     -= 7;
856         }
857         break;
858       default:
859         for (i=m-1; i>=0; i--) {
860           v  = aa + bs2*ai[i];
861           vi = aj + ai[i];
862           nz = ai[i+1] - ai[i];
863 
864           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
865           /* copy all rows of x that are needed into contiguous space */
866           workt = work;
867           for (j=0; j<nz; j++) {
868             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
869             workt += bs;
870           }
871           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
872           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
873 
874           idiag -= bs2;
875           i2    -= bs;
876         }
877         break;
878       }
879       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
880     }
881   }
882   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
883   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
884   PetscFunctionReturn(0);
885 }
886 
887 
888 /*
889     Special version for direct calls from Fortran (Used in PETSc-fun3d)
890 */
891 #if defined(PETSC_HAVE_FORTRAN_CAPS)
892 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
893 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
894 #define matsetvaluesblocked4_ matsetvaluesblocked4
895 #endif
896 
897 #undef __FUNCT__
898 #define __FUNCT__ "matsetvaluesblocked4_"
899 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
900 {
901   Mat               A  = *AA;
902   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
903   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
904   PetscInt          *ai    =a->i,*ailen=a->ilen;
905   PetscInt          *aj    =a->j,stepval,lastcol = -1;
906   const PetscScalar *value = v;
907   MatScalar         *ap,*aa = a->a,*bap;
908 
909   PetscFunctionBegin;
910   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
911   stepval = (n-1)*4;
912   for (k=0; k<m; k++) { /* loop over added rows */
913     row  = im[k];
914     rp   = aj + ai[row];
915     ap   = aa + 16*ai[row];
916     nrow = ailen[row];
917     low  = 0;
918     high = nrow;
919     for (l=0; l<n; l++) { /* loop over added columns */
920       col = in[l];
921       if (col <= lastcol)  low = 0;
922       else                high = nrow;
923       lastcol = col;
924       value   = v + k*(stepval+4 + l)*4;
925       while (high-low > 7) {
926         t = (low+high)/2;
927         if (rp[t] > col) high = t;
928         else             low  = t;
929       }
930       for (i=low; i<high; i++) {
931         if (rp[i] > col) break;
932         if (rp[i] == col) {
933           bap = ap +  16*i;
934           for (ii=0; ii<4; ii++,value+=stepval) {
935             for (jj=ii; jj<16; jj+=4) {
936               bap[jj] += *value++;
937             }
938           }
939           goto noinsert2;
940         }
941       }
942       N = nrow++ - 1;
943       high++; /* added new column index thus must search to one higher than before */
944       /* shift up all the later entries in this row */
945       for (ii=N; ii>=i; ii--) {
946         rp[ii+1] = rp[ii];
947         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
948       }
949       if (N >= i) {
950         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
951       }
952       rp[i] = col;
953       bap   = ap +  16*i;
954       for (ii=0; ii<4; ii++,value+=stepval) {
955         for (jj=ii; jj<16; jj+=4) {
956           bap[jj] = *value++;
957         }
958       }
959       noinsert2:;
960       low = i;
961     }
962     ailen[row] = nrow;
963   }
964   PetscFunctionReturnVoid();
965 }
966 
967 #if defined(PETSC_HAVE_FORTRAN_CAPS)
968 #define matsetvalues4_ MATSETVALUES4
969 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
970 #define matsetvalues4_ matsetvalues4
971 #endif
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatSetValues4_"
975 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
976 {
977   Mat         A  = *AA;
978   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
979   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
980   PetscInt    *ai=a->i,*ailen=a->ilen;
981   PetscInt    *aj=a->j,brow,bcol;
982   PetscInt    ridx,cidx,lastcol = -1;
983   MatScalar   *ap,value,*aa=a->a,*bap;
984 
985   PetscFunctionBegin;
986   for (k=0; k<m; k++) { /* loop over added rows */
987     row  = im[k]; brow = row/4;
988     rp   = aj + ai[brow];
989     ap   = aa + 16*ai[brow];
990     nrow = ailen[brow];
991     low  = 0;
992     high = nrow;
993     for (l=0; l<n; l++) { /* loop over added columns */
994       col   = in[l]; bcol = col/4;
995       ridx  = row % 4; cidx = col % 4;
996       value = v[l + k*n];
997       if (col <= lastcol)  low = 0;
998       else                high = nrow;
999       lastcol = col;
1000       while (high-low > 7) {
1001         t = (low+high)/2;
1002         if (rp[t] > bcol) high = t;
1003         else              low  = t;
1004       }
1005       for (i=low; i<high; i++) {
1006         if (rp[i] > bcol) break;
1007         if (rp[i] == bcol) {
1008           bap   = ap +  16*i + 4*cidx + ridx;
1009           *bap += value;
1010           goto noinsert1;
1011         }
1012       }
1013       N = nrow++ - 1;
1014       high++; /* added new column thus must search to one higher than before */
1015       /* shift up all the later entries in this row */
1016       for (ii=N; ii>=i; ii--) {
1017         rp[ii+1] = rp[ii];
1018         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1019       }
1020       if (N>=i) {
1021         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1022       }
1023       rp[i]                    = bcol;
1024       ap[16*i + 4*cidx + ridx] = value;
1025 noinsert1:;
1026       low = i;
1027     }
1028     ailen[brow] = nrow;
1029   }
1030   PetscFunctionReturnVoid();
1031 }
1032 
1033 /*
1034      Checks for missing diagonals
1035 */
1036 #undef __FUNCT__
1037 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1038 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1039 {
1040   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1041   PetscErrorCode ierr;
1042   PetscInt       *diag,*ii = a->i,i;
1043 
1044   PetscFunctionBegin;
1045   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1046   *missing = PETSC_FALSE;
1047   if (A->rmap->n > 0 && !ii) {
1048     *missing = PETSC_TRUE;
1049     if (d) *d = 0;
1050     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1051   } else {
1052     diag = a->diag;
1053     for (i=0; i<a->mbs; i++) {
1054       if (diag[i] >= ii[i+1]) {
1055         *missing = PETSC_TRUE;
1056         if (d) *d = i;
1057         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1058         break;
1059       }
1060     }
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1067 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1068 {
1069   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1070   PetscErrorCode ierr;
1071   PetscInt       i,j,m = a->mbs;
1072 
1073   PetscFunctionBegin;
1074   if (!a->diag) {
1075     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1076     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1077     a->free_diag = PETSC_TRUE;
1078   }
1079   for (i=0; i<m; i++) {
1080     a->diag[i] = a->i[i+1];
1081     for (j=a->i[i]; j<a->i[i+1]; j++) {
1082       if (a->j[j] == i) {
1083         a->diag[i] = j;
1084         break;
1085       }
1086     }
1087   }
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1094 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1095 {
1096   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1097   PetscErrorCode ierr;
1098   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1099   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1100 
1101   PetscFunctionBegin;
1102   *nn = n;
1103   if (!ia) PetscFunctionReturn(0);
1104   if (symmetric) {
1105     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1106     nz   = tia[n];
1107   } else {
1108     tia = a->i; tja = a->j;
1109   }
1110 
1111   if (!blockcompressed && bs > 1) {
1112     (*nn) *= bs;
1113     /* malloc & create the natural set of indices */
1114     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1115     if (n) {
1116       (*ia)[0] = 0;
1117       for (j=1; j<bs; j++) {
1118         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1119       }
1120     }
1121 
1122     for (i=1; i<n; i++) {
1123       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1124       for (j=1; j<bs; j++) {
1125         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1126       }
1127     }
1128     if (n) {
1129       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1130     }
1131 
1132     if (inja) {
1133       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1134       cnt = 0;
1135       for (i=0; i<n; i++) {
1136         for (j=0; j<bs; j++) {
1137           for (k=tia[i]; k<tia[i+1]; k++) {
1138             for (l=0; l<bs; l++) {
1139               (*ja)[cnt++] = bs*tja[k] + l;
1140             }
1141           }
1142         }
1143       }
1144     }
1145 
1146     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1147       ierr = PetscFree(tia);CHKERRQ(ierr);
1148       ierr = PetscFree(tja);CHKERRQ(ierr);
1149     }
1150   } else if (oshift == 1) {
1151     if (symmetric) {
1152       nz = tia[A->rmap->n/bs];
1153       /*  add 1 to i and j indices */
1154       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1155       *ia = tia;
1156       if (ja) {
1157         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1158         *ja = tja;
1159       }
1160     } else {
1161       nz = a->i[A->rmap->n/bs];
1162       /* malloc space and  add 1 to i and j indices */
1163       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1164       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1165       if (ja) {
1166         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1167         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1168       }
1169     }
1170   } else {
1171     *ia = tia;
1172     if (ja) *ja = tja;
1173   }
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1179 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1180 {
1181   PetscErrorCode ierr;
1182 
1183   PetscFunctionBegin;
1184   if (!ia) PetscFunctionReturn(0);
1185   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1186     ierr = PetscFree(*ia);CHKERRQ(ierr);
1187     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1188   }
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1194 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1195 {
1196   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1197   PetscErrorCode ierr;
1198 
1199   PetscFunctionBegin;
1200 #if defined(PETSC_USE_LOG)
1201   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1202 #endif
1203   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1204   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1205   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1206   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1207   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1208   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1209   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1210   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1211   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1212   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1213   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1214   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1215   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1216 
1217   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1218   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1219   ierr = PetscFree(A->data);CHKERRQ(ierr);
1220 
1221   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1237 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1238 {
1239   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243   switch (op) {
1244   case MAT_ROW_ORIENTED:
1245     a->roworiented = flg;
1246     break;
1247   case MAT_KEEP_NONZERO_PATTERN:
1248     a->keepnonzeropattern = flg;
1249     break;
1250   case MAT_NEW_NONZERO_LOCATIONS:
1251     a->nonew = (flg ? 0 : 1);
1252     break;
1253   case MAT_NEW_NONZERO_LOCATION_ERR:
1254     a->nonew = (flg ? -1 : 0);
1255     break;
1256   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1257     a->nonew = (flg ? -2 : 0);
1258     break;
1259   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1260     a->nounused = (flg ? -1 : 0);
1261     break;
1262   case MAT_NEW_DIAGONALS:
1263   case MAT_IGNORE_OFF_PROC_ENTRIES:
1264   case MAT_USE_HASH_TABLE:
1265     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1266     break;
1267   case MAT_SPD:
1268   case MAT_SYMMETRIC:
1269   case MAT_STRUCTURALLY_SYMMETRIC:
1270   case MAT_HERMITIAN:
1271   case MAT_SYMMETRY_ETERNAL:
1272     /* These options are handled directly by MatSetOption() */
1273     break;
1274   default:
1275     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1283 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1284 {
1285   PetscErrorCode ierr;
1286   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1287   MatScalar      *aa_i;
1288   PetscScalar    *v_i;
1289 
1290   PetscFunctionBegin;
1291   bs  = A->rmap->bs;
1292   bs2 = bs*bs;
1293   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1294 
1295   bn  = row/bs;   /* Block number */
1296   bp  = row % bs; /* Block Position */
1297   M   = ai[bn+1] - ai[bn];
1298   *nz = bs*M;
1299 
1300   if (v) {
1301     *v = 0;
1302     if (*nz) {
1303       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1304       for (i=0; i<M; i++) { /* for each block in the block row */
1305         v_i  = *v + i*bs;
1306         aa_i = aa + bs2*(ai[bn] + i);
1307         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1308       }
1309     }
1310   }
1311 
1312   if (idx) {
1313     *idx = 0;
1314     if (*nz) {
1315       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1316       for (i=0; i<M; i++) { /* for each block in the block row */
1317         idx_i = *idx + i*bs;
1318         itmp  = bs*aj[ai[bn] + i];
1319         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1320       }
1321     }
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1328 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1329 {
1330   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1340 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1341 {
1342   PetscErrorCode ierr;
1343 
1344   PetscFunctionBegin;
1345   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1346   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1354 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1355 {
1356   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1357   Mat            C;
1358   PetscErrorCode ierr;
1359   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1360   PetscInt       *rows,*cols,bs2=a->bs2;
1361   MatScalar      *array;
1362 
1363   PetscFunctionBegin;
1364   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1365   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1366     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1367 
1368     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1369     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1370     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1371     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1372     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1373     ierr = PetscFree(col);CHKERRQ(ierr);
1374   } else {
1375     C = *B;
1376   }
1377 
1378   array = a->a;
1379   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1380   for (i=0; i<mbs; i++) {
1381     cols[0] = i*bs;
1382     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1383     len = ai[i+1] - ai[i];
1384     for (j=0; j<len; j++) {
1385       rows[0] = (*aj++)*bs;
1386       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1387       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1388       array += bs2;
1389     }
1390   }
1391   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1392 
1393   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1394   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1395 
1396   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1397     *B = C;
1398   } else {
1399     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1406 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1407 {
1408   PetscErrorCode ierr;
1409   Mat            Btrans;
1410 
1411   PetscFunctionBegin;
1412   *f   = PETSC_FALSE;
1413   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1414   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1415   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1421 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1422 {
1423   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1424   PetscErrorCode ierr;
1425   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1426   int            fd;
1427   PetscScalar    *aa;
1428   FILE           *file;
1429 
1430   PetscFunctionBegin;
1431   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1432   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1433   col_lens[0] = MAT_FILE_CLASSID;
1434 
1435   col_lens[1] = A->rmap->N;
1436   col_lens[2] = A->cmap->n;
1437   col_lens[3] = a->nz*bs2;
1438 
1439   /* store lengths of each row and write (including header) to file */
1440   count = 0;
1441   for (i=0; i<a->mbs; i++) {
1442     for (j=0; j<bs; j++) {
1443       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1444     }
1445   }
1446   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1447   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1448 
1449   /* store column indices (zero start index) */
1450   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1451   count = 0;
1452   for (i=0; i<a->mbs; i++) {
1453     for (j=0; j<bs; j++) {
1454       for (k=a->i[i]; k<a->i[i+1]; k++) {
1455         for (l=0; l<bs; l++) {
1456           jj[count++] = bs*a->j[k] + l;
1457         }
1458       }
1459     }
1460   }
1461   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1462   ierr = PetscFree(jj);CHKERRQ(ierr);
1463 
1464   /* store nonzero values */
1465   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1466   count = 0;
1467   for (i=0; i<a->mbs; i++) {
1468     for (j=0; j<bs; j++) {
1469       for (k=a->i[i]; k<a->i[i+1]; k++) {
1470         for (l=0; l<bs; l++) {
1471           aa[count++] = a->a[bs2*k + l*bs + j];
1472         }
1473       }
1474     }
1475   }
1476   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1477   ierr = PetscFree(aa);CHKERRQ(ierr);
1478 
1479   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1480   if (file) {
1481     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1482   }
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1488 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1489 {
1490   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1491   PetscErrorCode    ierr;
1492   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1493   PetscViewerFormat format;
1494 
1495   PetscFunctionBegin;
1496   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1497   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1498     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1499   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1500     const char *matname;
1501     Mat        aij;
1502     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1503     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1504     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1505     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1506     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1507   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1508       PetscFunctionReturn(0);
1509   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1510     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1511     for (i=0; i<a->mbs; i++) {
1512       for (j=0; j<bs; j++) {
1513         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1514         for (k=a->i[i]; k<a->i[i+1]; k++) {
1515           for (l=0; l<bs; l++) {
1516 #if defined(PETSC_USE_COMPLEX)
1517             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1518               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1519                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1520             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1521               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1522                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1523             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1524               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1525             }
1526 #else
1527             if (a->a[bs2*k + l*bs + j] != 0.0) {
1528               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1529             }
1530 #endif
1531           }
1532         }
1533         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1534       }
1535     }
1536     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1537   } else {
1538     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1539     for (i=0; i<a->mbs; i++) {
1540       for (j=0; j<bs; j++) {
1541         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1542         for (k=a->i[i]; k<a->i[i+1]; k++) {
1543           for (l=0; l<bs; l++) {
1544 #if defined(PETSC_USE_COMPLEX)
1545             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1546               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1547                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1548             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1549               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1550                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1551             } else {
1552               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1553             }
1554 #else
1555             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1556 #endif
1557           }
1558         }
1559         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1560       }
1561     }
1562     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1563   }
1564   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #include <petscdraw.h>
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1571 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1572 {
1573   Mat               A = (Mat) Aa;
1574   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1575   PetscErrorCode    ierr;
1576   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1577   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1578   MatScalar         *aa;
1579   PetscViewer       viewer;
1580   PetscViewerFormat format;
1581 
1582   PetscFunctionBegin;
1583   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1584   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1585 
1586   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1587 
1588   /* loop over matrix elements drawing boxes */
1589 
1590   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1591     color = PETSC_DRAW_BLUE;
1592     for (i=0,row=0; i<mbs; i++,row+=bs) {
1593       for (j=a->i[i]; j<a->i[i+1]; j++) {
1594         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1595         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1596         aa  = a->a + j*bs2;
1597         for (k=0; k<bs; k++) {
1598           for (l=0; l<bs; l++) {
1599             if (PetscRealPart(*aa++) >=  0.) continue;
1600             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1601           }
1602         }
1603       }
1604     }
1605     color = PETSC_DRAW_CYAN;
1606     for (i=0,row=0; i<mbs; i++,row+=bs) {
1607       for (j=a->i[i]; j<a->i[i+1]; j++) {
1608         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1609         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1610         aa  = a->a + j*bs2;
1611         for (k=0; k<bs; k++) {
1612           for (l=0; l<bs; l++) {
1613             if (PetscRealPart(*aa++) != 0.) continue;
1614             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1615           }
1616         }
1617       }
1618     }
1619     color = PETSC_DRAW_RED;
1620     for (i=0,row=0; i<mbs; i++,row+=bs) {
1621       for (j=a->i[i]; j<a->i[i+1]; j++) {
1622         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1623         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1624         aa  = a->a + j*bs2;
1625         for (k=0; k<bs; k++) {
1626           for (l=0; l<bs; l++) {
1627             if (PetscRealPart(*aa++) <= 0.) continue;
1628             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1629           }
1630         }
1631       }
1632     }
1633   } else {
1634     /* use contour shading to indicate magnitude of values */
1635     /* first determine max of all nonzero values */
1636     PetscDraw popup;
1637     PetscReal scale,maxv = 0.0;
1638 
1639     for (i=0; i<a->nz*a->bs2; i++) {
1640       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1641     }
1642     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1643     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1644     if (popup) {
1645       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1646     }
1647     for (i=0,row=0; i<mbs; i++,row+=bs) {
1648       for (j=a->i[i]; j<a->i[i+1]; j++) {
1649         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1650         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1651         aa  = a->a + j*bs2;
1652         for (k=0; k<bs; k++) {
1653           for (l=0; l<bs; l++) {
1654             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1655             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1656           }
1657         }
1658       }
1659     }
1660   }
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1666 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1667 {
1668   PetscErrorCode ierr;
1669   PetscReal      xl,yl,xr,yr,w,h;
1670   PetscDraw      draw;
1671   PetscBool      isnull;
1672 
1673   PetscFunctionBegin;
1674   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1675   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1676 
1677   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1678   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1679   xr  += w;    yr += h;  xl = -w;     yl = -h;
1680   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1681   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1682   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatView_SeqBAIJ"
1688 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1689 {
1690   PetscErrorCode ierr;
1691   PetscBool      iascii,isbinary,isdraw;
1692 
1693   PetscFunctionBegin;
1694   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1695   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1696   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1697   if (iascii) {
1698     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1699   } else if (isbinary) {
1700     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1701   } else if (isdraw) {
1702     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1703   } else {
1704     Mat B;
1705     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1706     ierr = MatView(B,viewer);CHKERRQ(ierr);
1707     ierr = MatDestroy(&B);CHKERRQ(ierr);
1708   }
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1715 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1716 {
1717   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1718   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1719   PetscInt    *ai = a->i,*ailen = a->ilen;
1720   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1721   MatScalar   *ap,*aa = a->a;
1722 
1723   PetscFunctionBegin;
1724   for (k=0; k<m; k++) { /* loop over rows */
1725     row = im[k]; brow = row/bs;
1726     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1727     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1728     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1729     nrow = ailen[brow];
1730     for (l=0; l<n; l++) { /* loop over columns */
1731       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1732       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1733       col  = in[l];
1734       bcol = col/bs;
1735       cidx = col%bs;
1736       ridx = row%bs;
1737       high = nrow;
1738       low  = 0; /* assume unsorted */
1739       while (high-low > 5) {
1740         t = (low+high)/2;
1741         if (rp[t] > bcol) high = t;
1742         else             low  = t;
1743       }
1744       for (i=low; i<high; i++) {
1745         if (rp[i] > bcol) break;
1746         if (rp[i] == bcol) {
1747           *v++ = ap[bs2*i+bs*cidx+ridx];
1748           goto finished;
1749         }
1750       }
1751       *v++ = 0.0;
1752 finished:;
1753     }
1754   }
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1760 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1761 {
1762   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1763   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1764   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1765   PetscErrorCode    ierr;
1766   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1767   PetscBool         roworiented=a->roworiented;
1768   const PetscScalar *value     = v;
1769   MatScalar         *ap,*aa = a->a,*bap;
1770 
1771   PetscFunctionBegin;
1772   if (roworiented) {
1773     stepval = (n-1)*bs;
1774   } else {
1775     stepval = (m-1)*bs;
1776   }
1777   for (k=0; k<m; k++) { /* loop over added rows */
1778     row = im[k];
1779     if (row < 0) continue;
1780 #if defined(PETSC_USE_DEBUG)
1781     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1782 #endif
1783     rp   = aj + ai[row];
1784     ap   = aa + bs2*ai[row];
1785     rmax = imax[row];
1786     nrow = ailen[row];
1787     low  = 0;
1788     high = nrow;
1789     for (l=0; l<n; l++) { /* loop over added columns */
1790       if (in[l] < 0) continue;
1791 #if defined(PETSC_USE_DEBUG)
1792       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1793 #endif
1794       col = in[l];
1795       if (roworiented) {
1796         value = v + (k*(stepval+bs) + l)*bs;
1797       } else {
1798         value = v + (l*(stepval+bs) + k)*bs;
1799       }
1800       if (col <= lastcol) low = 0;
1801       else high = nrow;
1802       lastcol = col;
1803       while (high-low > 7) {
1804         t = (low+high)/2;
1805         if (rp[t] > col) high = t;
1806         else             low  = t;
1807       }
1808       for (i=low; i<high; i++) {
1809         if (rp[i] > col) break;
1810         if (rp[i] == col) {
1811           bap = ap +  bs2*i;
1812           if (roworiented) {
1813             if (is == ADD_VALUES) {
1814               for (ii=0; ii<bs; ii++,value+=stepval) {
1815                 for (jj=ii; jj<bs2; jj+=bs) {
1816                   bap[jj] += *value++;
1817                 }
1818               }
1819             } else {
1820               for (ii=0; ii<bs; ii++,value+=stepval) {
1821                 for (jj=ii; jj<bs2; jj+=bs) {
1822                   bap[jj] = *value++;
1823                 }
1824               }
1825             }
1826           } else {
1827             if (is == ADD_VALUES) {
1828               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1829                 for (jj=0; jj<bs; jj++) {
1830                   bap[jj] += value[jj];
1831                 }
1832                 bap += bs;
1833               }
1834             } else {
1835               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1836                 for (jj=0; jj<bs; jj++) {
1837                   bap[jj]  = value[jj];
1838                 }
1839                 bap += bs;
1840               }
1841             }
1842           }
1843           goto noinsert2;
1844         }
1845       }
1846       if (nonew == 1) goto noinsert2;
1847       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1848       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1849       N = nrow++ - 1; high++;
1850       /* shift up all the later entries in this row */
1851       for (ii=N; ii>=i; ii--) {
1852         rp[ii+1] = rp[ii];
1853         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1854       }
1855       if (N >= i) {
1856         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1857       }
1858       rp[i] = col;
1859       bap   = ap +  bs2*i;
1860       if (roworiented) {
1861         for (ii=0; ii<bs; ii++,value+=stepval) {
1862           for (jj=ii; jj<bs2; jj+=bs) {
1863             bap[jj] = *value++;
1864           }
1865         }
1866       } else {
1867         for (ii=0; ii<bs; ii++,value+=stepval) {
1868           for (jj=0; jj<bs; jj++) {
1869             *bap++ = *value++;
1870           }
1871         }
1872       }
1873 noinsert2:;
1874       low = i;
1875     }
1876     ailen[row] = nrow;
1877   }
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 #undef __FUNCT__
1882 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1883 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1884 {
1885   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1886   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1887   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1888   PetscErrorCode ierr;
1889   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1890   MatScalar      *aa  = a->a,*ap;
1891   PetscReal      ratio=0.6;
1892 
1893   PetscFunctionBegin;
1894   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1895 
1896   if (m) rmax = ailen[0];
1897   for (i=1; i<mbs; i++) {
1898     /* move each row back by the amount of empty slots (fshift) before it*/
1899     fshift += imax[i-1] - ailen[i-1];
1900     rmax    = PetscMax(rmax,ailen[i]);
1901     if (fshift) {
1902       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1903       N  = ailen[i];
1904       for (j=0; j<N; j++) {
1905         ip[j-fshift] = ip[j];
1906 
1907         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1908       }
1909     }
1910     ai[i] = ai[i-1] + ailen[i-1];
1911   }
1912   if (mbs) {
1913     fshift += imax[mbs-1] - ailen[mbs-1];
1914     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1915   }
1916 
1917   /* reset ilen and imax for each row */
1918   a->nonzerorowcnt = 0;
1919   for (i=0; i<mbs; i++) {
1920     ailen[i] = imax[i] = ai[i+1] - ai[i];
1921     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1922   }
1923   a->nz = ai[mbs];
1924 
1925   /* diagonals may have moved, so kill the diagonal pointers */
1926   a->idiagvalid = PETSC_FALSE;
1927   if (fshift && a->diag) {
1928     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1929     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1930     a->diag = 0;
1931   }
1932   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1933   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1934   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1935   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1936 
1937   A->info.mallocs    += a->reallocs;
1938   a->reallocs         = 0;
1939   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1940   a->rmax             = rmax;
1941 
1942   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 /*
1947    This function returns an array of flags which indicate the locations of contiguous
1948    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1949    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1950    Assume: sizes should be long enough to hold all the values.
1951 */
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1954 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1955 {
1956   PetscInt  i,j,k,row;
1957   PetscBool flg;
1958 
1959   PetscFunctionBegin;
1960   for (i=0,j=0; i<n; j++) {
1961     row = idx[i];
1962     if (row%bs!=0) { /* Not the begining of a block */
1963       sizes[j] = 1;
1964       i++;
1965     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1966       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1967       i++;
1968     } else { /* Begining of the block, so check if the complete block exists */
1969       flg = PETSC_TRUE;
1970       for (k=1; k<bs; k++) {
1971         if (row+k != idx[i+k]) { /* break in the block */
1972           flg = PETSC_FALSE;
1973           break;
1974         }
1975       }
1976       if (flg) { /* No break in the bs */
1977         sizes[j] = bs;
1978         i       += bs;
1979       } else {
1980         sizes[j] = 1;
1981         i++;
1982       }
1983     }
1984   }
1985   *bs_max = j;
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 #undef __FUNCT__
1990 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1991 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1992 {
1993   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1994   PetscErrorCode    ierr;
1995   PetscInt          i,j,k,count,*rows;
1996   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1997   PetscScalar       zero = 0.0;
1998   MatScalar         *aa;
1999   const PetscScalar *xx;
2000   PetscScalar       *bb;
2001 
2002   PetscFunctionBegin;
2003   /* fix right hand side if needed */
2004   if (x && b) {
2005     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2006     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2007     for (i=0; i<is_n; i++) {
2008       bb[is_idx[i]] = diag*xx[is_idx[i]];
2009     }
2010     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2011     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2012   }
2013 
2014   /* Make a copy of the IS and  sort it */
2015   /* allocate memory for rows,sizes */
2016   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2017 
2018   /* copy IS values to rows, and sort them */
2019   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2020   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2021 
2022   if (baij->keepnonzeropattern) {
2023     for (i=0; i<is_n; i++) sizes[i] = 1;
2024     bs_max          = is_n;
2025   } else {
2026     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2027     A->nonzerostate++;
2028   }
2029 
2030   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2031     row = rows[j];
2032     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2033     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2034     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2035     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2036       if (diag != (PetscScalar)0.0) {
2037         if (baij->ilen[row/bs] > 0) {
2038           baij->ilen[row/bs]       = 1;
2039           baij->j[baij->i[row/bs]] = row/bs;
2040 
2041           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2042         }
2043         /* Now insert all the diagonal values for this bs */
2044         for (k=0; k<bs; k++) {
2045           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2046         }
2047       } else { /* (diag == 0.0) */
2048         baij->ilen[row/bs] = 0;
2049       } /* end (diag == 0.0) */
2050     } else { /* (sizes[i] != bs) */
2051 #if defined(PETSC_USE_DEBUG)
2052       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2053 #endif
2054       for (k=0; k<count; k++) {
2055         aa[0] =  zero;
2056         aa   += bs;
2057       }
2058       if (diag != (PetscScalar)0.0) {
2059         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2060       }
2061     }
2062   }
2063 
2064   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2065   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 #undef __FUNCT__
2070 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2071 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2072 {
2073   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2074   PetscErrorCode    ierr;
2075   PetscInt          i,j,k,count;
2076   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2077   PetscScalar       zero = 0.0;
2078   MatScalar         *aa;
2079   const PetscScalar *xx;
2080   PetscScalar       *bb;
2081   PetscBool         *zeroed,vecs = PETSC_FALSE;
2082 
2083   PetscFunctionBegin;
2084   /* fix right hand side if needed */
2085   if (x && b) {
2086     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2087     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2088     vecs = PETSC_TRUE;
2089   }
2090 
2091   /* zero the columns */
2092   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2093   for (i=0; i<is_n; i++) {
2094     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2095     zeroed[is_idx[i]] = PETSC_TRUE;
2096   }
2097   for (i=0; i<A->rmap->N; i++) {
2098     if (!zeroed[i]) {
2099       row = i/bs;
2100       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2101         for (k=0; k<bs; k++) {
2102           col = bs*baij->j[j] + k;
2103           if (zeroed[col]) {
2104             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2105             if (vecs) bb[i] -= aa[0]*xx[col];
2106             aa[0] = 0.0;
2107           }
2108         }
2109       }
2110     } else if (vecs) bb[i] = diag*xx[i];
2111   }
2112   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2113   if (vecs) {
2114     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2115     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2116   }
2117 
2118   /* zero the rows */
2119   for (i=0; i<is_n; i++) {
2120     row   = is_idx[i];
2121     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2122     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2123     for (k=0; k<count; k++) {
2124       aa[0] =  zero;
2125       aa   += bs;
2126     }
2127     if (diag != (PetscScalar)0.0) {
2128       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2129     }
2130   }
2131   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 #undef __FUNCT__
2136 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2137 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2138 {
2139   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2140   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2141   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2142   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2143   PetscErrorCode ierr;
2144   PetscInt       ridx,cidx,bs2=a->bs2;
2145   PetscBool      roworiented=a->roworiented;
2146   MatScalar      *ap,value,*aa=a->a,*bap;
2147 
2148   PetscFunctionBegin;
2149   for (k=0; k<m; k++) { /* loop over added rows */
2150     row  = im[k];
2151     brow = row/bs;
2152     if (row < 0) continue;
2153 #if defined(PETSC_USE_DEBUG)
2154     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2155 #endif
2156     rp   = aj + ai[brow];
2157     ap   = aa + bs2*ai[brow];
2158     rmax = imax[brow];
2159     nrow = ailen[brow];
2160     low  = 0;
2161     high = nrow;
2162     for (l=0; l<n; l++) { /* loop over added columns */
2163       if (in[l] < 0) continue;
2164 #if defined(PETSC_USE_DEBUG)
2165       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2166 #endif
2167       col  = in[l]; bcol = col/bs;
2168       ridx = row % bs; cidx = col % bs;
2169       if (roworiented) {
2170         value = v[l + k*n];
2171       } else {
2172         value = v[k + l*m];
2173       }
2174       if (col <= lastcol) low = 0; else high = nrow;
2175       lastcol = col;
2176       while (high-low > 7) {
2177         t = (low+high)/2;
2178         if (rp[t] > bcol) high = t;
2179         else              low  = t;
2180       }
2181       for (i=low; i<high; i++) {
2182         if (rp[i] > bcol) break;
2183         if (rp[i] == bcol) {
2184           bap = ap +  bs2*i + bs*cidx + ridx;
2185           if (is == ADD_VALUES) *bap += value;
2186           else                  *bap  = value;
2187           goto noinsert1;
2188         }
2189       }
2190       if (nonew == 1) goto noinsert1;
2191       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2192       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2193       N = nrow++ - 1; high++;
2194       /* shift up all the later entries in this row */
2195       for (ii=N; ii>=i; ii--) {
2196         rp[ii+1] = rp[ii];
2197         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2198       }
2199       if (N>=i) {
2200         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2201       }
2202       rp[i]                      = bcol;
2203       ap[bs2*i + bs*cidx + ridx] = value;
2204       a->nz++;
2205       A->nonzerostate++;
2206 noinsert1:;
2207       low = i;
2208     }
2209     ailen[brow] = nrow;
2210   }
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 #undef __FUNCT__
2215 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2216 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2217 {
2218   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2219   Mat            outA;
2220   PetscErrorCode ierr;
2221   PetscBool      row_identity,col_identity;
2222 
2223   PetscFunctionBegin;
2224   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2225   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2226   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2227   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2228 
2229   outA            = inA;
2230   inA->factortype = MAT_FACTOR_LU;
2231 
2232   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2233 
2234   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2235   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2236   a->row = row;
2237   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2238   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2239   a->col = col;
2240 
2241   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2242   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2243   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2244   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2245 
2246   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2247   if (!a->solve_work) {
2248     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2249     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2250   }
2251   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2252   PetscFunctionReturn(0);
2253 }
2254 
2255 #undef __FUNCT__
2256 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2257 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2258 {
2259   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2260   PetscInt    i,nz,mbs;
2261 
2262   PetscFunctionBegin;
2263   nz  = baij->maxnz;
2264   mbs = baij->mbs;
2265   for (i=0; i<nz; i++) {
2266     baij->j[i] = indices[i];
2267   }
2268   baij->nz = nz;
2269   for (i=0; i<mbs; i++) {
2270     baij->ilen[i] = baij->imax[i];
2271   }
2272   PetscFunctionReturn(0);
2273 }
2274 
2275 #undef __FUNCT__
2276 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2277 /*@
2278     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2279        in the matrix.
2280 
2281   Input Parameters:
2282 +  mat - the SeqBAIJ matrix
2283 -  indices - the column indices
2284 
2285   Level: advanced
2286 
2287   Notes:
2288     This can be called if you have precomputed the nonzero structure of the
2289   matrix and want to provide it to the matrix object to improve the performance
2290   of the MatSetValues() operation.
2291 
2292     You MUST have set the correct numbers of nonzeros per row in the call to
2293   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2294 
2295     MUST be called before any calls to MatSetValues();
2296 
2297 @*/
2298 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2299 {
2300   PetscErrorCode ierr;
2301 
2302   PetscFunctionBegin;
2303   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2304   PetscValidPointer(indices,2);
2305   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2306   PetscFunctionReturn(0);
2307 }
2308 
2309 #undef __FUNCT__
2310 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2311 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2312 {
2313   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2314   PetscErrorCode ierr;
2315   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2316   PetscReal      atmp;
2317   PetscScalar    *x,zero = 0.0;
2318   MatScalar      *aa;
2319   PetscInt       ncols,brow,krow,kcol;
2320 
2321   PetscFunctionBegin;
2322   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2323   bs  = A->rmap->bs;
2324   aa  = a->a;
2325   ai  = a->i;
2326   aj  = a->j;
2327   mbs = a->mbs;
2328 
2329   ierr = VecSet(v,zero);CHKERRQ(ierr);
2330   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2331   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2332   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2333   for (i=0; i<mbs; i++) {
2334     ncols = ai[1] - ai[0]; ai++;
2335     brow  = bs*i;
2336     for (j=0; j<ncols; j++) {
2337       for (kcol=0; kcol<bs; kcol++) {
2338         for (krow=0; krow<bs; krow++) {
2339           atmp = PetscAbsScalar(*aa);aa++;
2340           row  = brow + krow;   /* row index */
2341           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2342         }
2343       }
2344       aj++;
2345     }
2346   }
2347   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 #undef __FUNCT__
2352 #define __FUNCT__ "MatCopy_SeqBAIJ"
2353 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2354 {
2355   PetscErrorCode ierr;
2356 
2357   PetscFunctionBegin;
2358   /* If the two matrices have the same copy implementation, use fast copy. */
2359   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2360     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2361     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2362     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2363 
2364     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2365     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2366     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2367   } else {
2368     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2369   }
2370   PetscFunctionReturn(0);
2371 }
2372 
2373 #undef __FUNCT__
2374 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2375 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2376 {
2377   PetscErrorCode ierr;
2378 
2379   PetscFunctionBegin;
2380   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2386 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2387 {
2388   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2389 
2390   PetscFunctionBegin;
2391   *array = a->a;
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 #undef __FUNCT__
2396 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2397 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2398 {
2399   PetscFunctionBegin;
2400   PetscFunctionReturn(0);
2401 }
2402 
2403 #undef __FUNCT__
2404 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2405 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2406 {
2407   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2408   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2409   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2410   PetscErrorCode ierr;
2411 
2412   PetscFunctionBegin;
2413   /* Set the number of nonzeros in the new matrix */
2414   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 #undef __FUNCT__
2419 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2420 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2421 {
2422   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2423   PetscErrorCode ierr;
2424   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2425   PetscBLASInt   one=1;
2426 
2427   PetscFunctionBegin;
2428   if (str == SAME_NONZERO_PATTERN) {
2429     PetscScalar  alpha = a;
2430     PetscBLASInt bnz;
2431     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2432     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2433     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2434   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2435     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2436   } else {
2437     Mat      B;
2438     PetscInt *nnz;
2439     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2440     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2441     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2442     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2443     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2444     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2445     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2446     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2447     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2448     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2449     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2450     ierr = PetscFree(nnz);CHKERRQ(ierr);
2451   }
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 #undef __FUNCT__
2456 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2457 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2458 {
2459   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2460   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2461   MatScalar   *aa = a->a;
2462 
2463   PetscFunctionBegin;
2464   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2465   PetscFunctionReturn(0);
2466 }
2467 
2468 #undef __FUNCT__
2469 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2470 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2471 {
2472   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2473   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2474   MatScalar   *aa = a->a;
2475 
2476   PetscFunctionBegin;
2477   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2483 /*
2484     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2485 */
2486 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2487 {
2488   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2489   PetscErrorCode ierr;
2490   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2491   PetscInt       nz = a->i[m],row,*jj,mr,col;
2492 
2493   PetscFunctionBegin;
2494   *nn = n;
2495   if (!ia) PetscFunctionReturn(0);
2496   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2497   else {
2498     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2499     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2500     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2501     jj   = a->j;
2502     for (i=0; i<nz; i++) {
2503       collengths[jj[i]]++;
2504     }
2505     cia[0] = oshift;
2506     for (i=0; i<n; i++) {
2507       cia[i+1] = cia[i] + collengths[i];
2508     }
2509     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2510     jj   = a->j;
2511     for (row=0; row<m; row++) {
2512       mr = a->i[row+1] - a->i[row];
2513       for (i=0; i<mr; i++) {
2514         col = *jj++;
2515 
2516         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2517       }
2518     }
2519     ierr = PetscFree(collengths);CHKERRQ(ierr);
2520     *ia  = cia; *ja = cja;
2521   }
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 #undef __FUNCT__
2526 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2527 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2528 {
2529   PetscErrorCode ierr;
2530 
2531   PetscFunctionBegin;
2532   if (!ia) PetscFunctionReturn(0);
2533   ierr = PetscFree(*ia);CHKERRQ(ierr);
2534   ierr = PetscFree(*ja);CHKERRQ(ierr);
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 /*
2539  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2540  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2541  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2542  */
2543 #undef __FUNCT__
2544 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2545 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2546 {
2547   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2548   PetscErrorCode ierr;
2549   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2550   PetscInt       nz = a->i[m],row,*jj,mr,col;
2551   PetscInt       *cspidx;
2552 
2553   PetscFunctionBegin;
2554   *nn = n;
2555   if (!ia) PetscFunctionReturn(0);
2556 
2557   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2558   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2559   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2560   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2561   jj   = a->j;
2562   for (i=0; i<nz; i++) {
2563     collengths[jj[i]]++;
2564   }
2565   cia[0] = oshift;
2566   for (i=0; i<n; i++) {
2567     cia[i+1] = cia[i] + collengths[i];
2568   }
2569   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2570   jj   = a->j;
2571   for (row=0; row<m; row++) {
2572     mr = a->i[row+1] - a->i[row];
2573     for (i=0; i<mr; i++) {
2574       col = *jj++;
2575       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2576       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2577     }
2578   }
2579   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2580   *ia    = cia; *ja = cja;
2581   *spidx = cspidx;
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 #undef __FUNCT__
2586 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2587 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2588 {
2589   PetscErrorCode ierr;
2590 
2591   PetscFunctionBegin;
2592   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2593   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 #undef __FUNCT__
2598 #define __FUNCT__ "MatShift_SeqBAIJ"
2599 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2600 {
2601   PetscErrorCode ierr;
2602   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2603 
2604   PetscFunctionBegin;
2605   if (!aij->nz) {
2606     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2607   }
2608   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 /* -------------------------------------------------------------------*/
2613 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2614                                        MatGetRow_SeqBAIJ,
2615                                        MatRestoreRow_SeqBAIJ,
2616                                        MatMult_SeqBAIJ_N,
2617                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2618                                        MatMultTranspose_SeqBAIJ,
2619                                        MatMultTransposeAdd_SeqBAIJ,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                /* 10*/ 0,
2624                                        MatLUFactor_SeqBAIJ,
2625                                        0,
2626                                        0,
2627                                        MatTranspose_SeqBAIJ,
2628                                /* 15*/ MatGetInfo_SeqBAIJ,
2629                                        MatEqual_SeqBAIJ,
2630                                        MatGetDiagonal_SeqBAIJ,
2631                                        MatDiagonalScale_SeqBAIJ,
2632                                        MatNorm_SeqBAIJ,
2633                                /* 20*/ 0,
2634                                        MatAssemblyEnd_SeqBAIJ,
2635                                        MatSetOption_SeqBAIJ,
2636                                        MatZeroEntries_SeqBAIJ,
2637                                /* 24*/ MatZeroRows_SeqBAIJ,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                /* 29*/ MatSetUp_SeqBAIJ,
2643                                        0,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                /* 34*/ MatDuplicate_SeqBAIJ,
2648                                        0,
2649                                        0,
2650                                        MatILUFactor_SeqBAIJ,
2651                                        0,
2652                                /* 39*/ MatAXPY_SeqBAIJ,
2653                                        MatGetSubMatrices_SeqBAIJ,
2654                                        MatIncreaseOverlap_SeqBAIJ,
2655                                        MatGetValues_SeqBAIJ,
2656                                        MatCopy_SeqBAIJ,
2657                                /* 44*/ 0,
2658                                        MatScale_SeqBAIJ,
2659                                        MatShift_SeqBAIJ,
2660                                        0,
2661                                        MatZeroRowsColumns_SeqBAIJ,
2662                                /* 49*/ 0,
2663                                        MatGetRowIJ_SeqBAIJ,
2664                                        MatRestoreRowIJ_SeqBAIJ,
2665                                        MatGetColumnIJ_SeqBAIJ,
2666                                        MatRestoreColumnIJ_SeqBAIJ,
2667                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                        MatSetValuesBlocked_SeqBAIJ,
2672                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2673                                        MatDestroy_SeqBAIJ,
2674                                        MatView_SeqBAIJ,
2675                                        0,
2676                                        0,
2677                                /* 64*/ 0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2683                                        0,
2684                                        MatConvert_Basic,
2685                                        0,
2686                                        0,
2687                                /* 74*/ 0,
2688                                        MatFDColoringApply_BAIJ,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                /* 79*/ 0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                        MatLoad_SeqBAIJ,
2697                                /* 84*/ 0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                /* 89*/ 0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                /* 94*/ 0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                /* 99*/ 0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                /*104*/ 0,
2718                                        MatRealPart_SeqBAIJ,
2719                                        MatImaginaryPart_SeqBAIJ,
2720                                        0,
2721                                        0,
2722                                /*109*/ 0,
2723                                        0,
2724                                        0,
2725                                        0,
2726                                        MatMissingDiagonal_SeqBAIJ,
2727                                /*114*/ 0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                /*119*/ 0,
2733                                        0,
2734                                        MatMultHermitianTranspose_SeqBAIJ,
2735                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2736                                        0,
2737                                /*124*/ 0,
2738                                        0,
2739                                        MatInvertBlockDiagonal_SeqBAIJ,
2740                                        0,
2741                                        0,
2742                                /*129*/ 0,
2743                                        0,
2744                                        0,
2745                                        0,
2746                                        0,
2747                                /*134*/ 0,
2748                                        0,
2749                                        0,
2750                                        0,
2751                                        0,
2752                                /*139*/ 0,
2753                                        0,
2754                                        0,
2755                                        MatFDColoringSetUp_SeqXAIJ,
2756                                        0,
2757                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2758 };
2759 
2760 #undef __FUNCT__
2761 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2762 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2763 {
2764   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2765   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2766   PetscErrorCode ierr;
2767 
2768   PetscFunctionBegin;
2769   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2770 
2771   /* allocate space for values if not already there */
2772   if (!aij->saved_values) {
2773     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2774     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2775   }
2776 
2777   /* copy values over */
2778   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 #undef __FUNCT__
2783 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2784 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2785 {
2786   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2787   PetscErrorCode ierr;
2788   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2789 
2790   PetscFunctionBegin;
2791   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2792   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2793 
2794   /* copy values over */
2795   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2796   PetscFunctionReturn(0);
2797 }
2798 
2799 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2800 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2801 
2802 #undef __FUNCT__
2803 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2804 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2805 {
2806   Mat_SeqBAIJ    *b;
2807   PetscErrorCode ierr;
2808   PetscInt       i,mbs,nbs,bs2;
2809   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2810 
2811   PetscFunctionBegin;
2812   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2813   if (nz == MAT_SKIP_ALLOCATION) {
2814     skipallocation = PETSC_TRUE;
2815     nz             = 0;
2816   }
2817 
2818   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2819   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2820   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2821   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2822 
2823   B->preallocated = PETSC_TRUE;
2824 
2825   mbs = B->rmap->n/bs;
2826   nbs = B->cmap->n/bs;
2827   bs2 = bs*bs;
2828 
2829   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2830 
2831   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2832   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2833   if (nnz) {
2834     for (i=0; i<mbs; i++) {
2835       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2836       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2837     }
2838   }
2839 
2840   b    = (Mat_SeqBAIJ*)B->data;
2841   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2842   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2843   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2844 
2845   if (!flg) {
2846     switch (bs) {
2847     case 1:
2848       B->ops->mult    = MatMult_SeqBAIJ_1;
2849       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2850       break;
2851     case 2:
2852       B->ops->mult    = MatMult_SeqBAIJ_2;
2853       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2854       break;
2855     case 3:
2856       B->ops->mult    = MatMult_SeqBAIJ_3;
2857       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2858       break;
2859     case 4:
2860       B->ops->mult    = MatMult_SeqBAIJ_4;
2861       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2862       break;
2863     case 5:
2864       B->ops->mult    = MatMult_SeqBAIJ_5;
2865       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2866       break;
2867     case 6:
2868       B->ops->mult    = MatMult_SeqBAIJ_6;
2869       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2870       break;
2871     case 7:
2872       B->ops->mult    = MatMult_SeqBAIJ_7;
2873       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2874       break;
2875     case 15:
2876       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2877       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2878       break;
2879     default:
2880       B->ops->mult    = MatMult_SeqBAIJ_N;
2881       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2882       break;
2883     }
2884   }
2885   B->ops->sor = MatSOR_SeqBAIJ;
2886   b->mbs = mbs;
2887   b->nbs = nbs;
2888   if (!skipallocation) {
2889     if (!b->imax) {
2890       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2891       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2892 
2893       b->free_imax_ilen = PETSC_TRUE;
2894     }
2895     /* b->ilen will count nonzeros in each block row so far. */
2896     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2897     if (!nnz) {
2898       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2899       else if (nz < 0) nz = 1;
2900       for (i=0; i<mbs; i++) b->imax[i] = nz;
2901       nz = nz*mbs;
2902     } else {
2903       nz = 0;
2904       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2905     }
2906 
2907     /* allocate the matrix space */
2908     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2909     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2910     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2911     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2912     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2913 
2914     b->singlemalloc = PETSC_TRUE;
2915     b->i[0]         = 0;
2916     for (i=1; i<mbs+1; i++) {
2917       b->i[i] = b->i[i-1] + b->imax[i-1];
2918     }
2919     b->free_a  = PETSC_TRUE;
2920     b->free_ij = PETSC_TRUE;
2921   } else {
2922     b->free_a  = PETSC_FALSE;
2923     b->free_ij = PETSC_FALSE;
2924   }
2925 
2926   b->bs2              = bs2;
2927   b->mbs              = mbs;
2928   b->nz               = 0;
2929   b->maxnz            = nz;
2930   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2931   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 #undef __FUNCT__
2936 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2937 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2938 {
2939   PetscInt       i,m,nz,nz_max=0,*nnz;
2940   PetscScalar    *values=0;
2941   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2942   PetscErrorCode ierr;
2943 
2944   PetscFunctionBegin;
2945   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2946   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2947   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2948   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2949   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2950   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2951   m    = B->rmap->n/bs;
2952 
2953   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2954   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2955   for (i=0; i<m; i++) {
2956     nz = ii[i+1]- ii[i];
2957     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2958     nz_max = PetscMax(nz_max, nz);
2959     nnz[i] = nz;
2960   }
2961   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2962   ierr = PetscFree(nnz);CHKERRQ(ierr);
2963 
2964   values = (PetscScalar*)V;
2965   if (!values) {
2966     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2967   }
2968   for (i=0; i<m; i++) {
2969     PetscInt          ncols  = ii[i+1] - ii[i];
2970     const PetscInt    *icols = jj + ii[i];
2971     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2972     if (!roworiented) {
2973       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2974     } else {
2975       PetscInt j;
2976       for (j=0; j<ncols; j++) {
2977         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2978         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2979       }
2980     }
2981   }
2982   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2983   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2984   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2985   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2986   PetscFunctionReturn(0);
2987 }
2988 
2989 /*MC
2990    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2991    block sparse compressed row format.
2992 
2993    Options Database Keys:
2994 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2995 
2996   Level: beginner
2997 
2998 .seealso: MatCreateSeqBAIJ()
2999 M*/
3000 
3001 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3002 
3003 #undef __FUNCT__
3004 #define __FUNCT__ "MatCreate_SeqBAIJ"
3005 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3006 {
3007   PetscErrorCode ierr;
3008   PetscMPIInt    size;
3009   Mat_SeqBAIJ    *b;
3010 
3011   PetscFunctionBegin;
3012   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3013   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3014 
3015   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3016   B->data = (void*)b;
3017   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3018 
3019   b->row          = 0;
3020   b->col          = 0;
3021   b->icol         = 0;
3022   b->reallocs     = 0;
3023   b->saved_values = 0;
3024 
3025   b->roworiented        = PETSC_TRUE;
3026   b->nonew              = 0;
3027   b->diag               = 0;
3028   B->spptr              = 0;
3029   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3030   b->keepnonzeropattern = PETSC_FALSE;
3031 
3032   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3033   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3034   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3035   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3042   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3043   PetscFunctionReturn(0);
3044 }
3045 
3046 #undef __FUNCT__
3047 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3048 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3049 {
3050   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3051   PetscErrorCode ierr;
3052   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3053 
3054   PetscFunctionBegin;
3055   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3056 
3057   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3058     c->imax           = a->imax;
3059     c->ilen           = a->ilen;
3060     c->free_imax_ilen = PETSC_FALSE;
3061   } else {
3062     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3063     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3064     for (i=0; i<mbs; i++) {
3065       c->imax[i] = a->imax[i];
3066       c->ilen[i] = a->ilen[i];
3067     }
3068     c->free_imax_ilen = PETSC_TRUE;
3069   }
3070 
3071   /* allocate the matrix space */
3072   if (mallocmatspace) {
3073     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3074       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3075       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3076 
3077       c->i            = a->i;
3078       c->j            = a->j;
3079       c->singlemalloc = PETSC_FALSE;
3080       c->free_a       = PETSC_TRUE;
3081       c->free_ij      = PETSC_FALSE;
3082       c->parent       = A;
3083       C->preallocated = PETSC_TRUE;
3084       C->assembled    = PETSC_TRUE;
3085 
3086       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3087       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3088       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3089     } else {
3090       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3091       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3092 
3093       c->singlemalloc = PETSC_TRUE;
3094       c->free_a       = PETSC_TRUE;
3095       c->free_ij      = PETSC_TRUE;
3096 
3097       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3098       if (mbs > 0) {
3099         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3100         if (cpvalues == MAT_COPY_VALUES) {
3101           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3102         } else {
3103           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3104         }
3105       }
3106       C->preallocated = PETSC_TRUE;
3107       C->assembled    = PETSC_TRUE;
3108     }
3109   }
3110 
3111   c->roworiented = a->roworiented;
3112   c->nonew       = a->nonew;
3113 
3114   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3115   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3116 
3117   c->bs2         = a->bs2;
3118   c->mbs         = a->mbs;
3119   c->nbs         = a->nbs;
3120 
3121   if (a->diag) {
3122     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3123       c->diag      = a->diag;
3124       c->free_diag = PETSC_FALSE;
3125     } else {
3126       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3127       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3128       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3129       c->free_diag = PETSC_TRUE;
3130     }
3131   } else c->diag = 0;
3132 
3133   c->nz         = a->nz;
3134   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3135   c->solve_work = NULL;
3136   c->mult_work  = NULL;
3137   c->sor_workt  = NULL;
3138   c->sor_work   = NULL;
3139 
3140   c->compressedrow.use   = a->compressedrow.use;
3141   c->compressedrow.nrows = a->compressedrow.nrows;
3142   if (a->compressedrow.use) {
3143     i    = a->compressedrow.nrows;
3144     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3145     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3146     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3147     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3148   } else {
3149     c->compressedrow.use    = PETSC_FALSE;
3150     c->compressedrow.i      = NULL;
3151     c->compressedrow.rindex = NULL;
3152   }
3153   C->nonzerostate = A->nonzerostate;
3154 
3155   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3156   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3157   PetscFunctionReturn(0);
3158 }
3159 
3160 #undef __FUNCT__
3161 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3162 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3163 {
3164   PetscErrorCode ierr;
3165 
3166   PetscFunctionBegin;
3167   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3168   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3169   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3170   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3171   PetscFunctionReturn(0);
3172 }
3173 
3174 #undef __FUNCT__
3175 #define __FUNCT__ "MatLoad_SeqBAIJ"
3176 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3177 {
3178   Mat_SeqBAIJ    *a;
3179   PetscErrorCode ierr;
3180   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3181   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3182   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3183   PetscInt       *masked,nmask,tmp,bs2,ishift;
3184   PetscMPIInt    size;
3185   int            fd;
3186   PetscScalar    *aa;
3187   MPI_Comm       comm;
3188 
3189   PetscFunctionBegin;
3190   /* force binary viewer to load .info file if it has not yet done so */
3191   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3192   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3193   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3194   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3195   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3196   if (bs < 0) bs = 1;
3197   bs2  = bs*bs;
3198 
3199   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3200   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3201   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3202   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3203   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3204   M = header[1]; N = header[2]; nz = header[3];
3205 
3206   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3207   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3208 
3209   /*
3210      This code adds extra rows to make sure the number of rows is
3211     divisible by the blocksize
3212   */
3213   mbs        = M/bs;
3214   extra_rows = bs - M + bs*(mbs);
3215   if (extra_rows == bs) extra_rows = 0;
3216   else mbs++;
3217   if (extra_rows) {
3218     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3219   }
3220 
3221   /* Set global sizes if not already set */
3222   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3223     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3224   } else { /* Check if the matrix global sizes are correct */
3225     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3226     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3227       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3228     }
3229     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3230   }
3231 
3232   /* read in row lengths */
3233   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3234   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3235   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3236 
3237   /* read in column indices */
3238   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3239   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3240   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3241 
3242   /* loop over row lengths determining block row lengths */
3243   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3244   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3245   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3246   rowcount = 0;
3247   nzcount  = 0;
3248   for (i=0; i<mbs; i++) {
3249     nmask = 0;
3250     for (j=0; j<bs; j++) {
3251       kmax = rowlengths[rowcount];
3252       for (k=0; k<kmax; k++) {
3253         tmp = jj[nzcount++]/bs;
3254         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3255       }
3256       rowcount++;
3257     }
3258     browlengths[i] += nmask;
3259     /* zero out the mask elements we set */
3260     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3261   }
3262 
3263   /* Do preallocation  */
3264   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3265   a    = (Mat_SeqBAIJ*)newmat->data;
3266 
3267   /* set matrix "i" values */
3268   a->i[0] = 0;
3269   for (i=1; i<= mbs; i++) {
3270     a->i[i]      = a->i[i-1] + browlengths[i-1];
3271     a->ilen[i-1] = browlengths[i-1];
3272   }
3273   a->nz = 0;
3274   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3275 
3276   /* read in nonzero values */
3277   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3278   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3279   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3280 
3281   /* set "a" and "j" values into matrix */
3282   nzcount = 0; jcount = 0;
3283   for (i=0; i<mbs; i++) {
3284     nzcountb = nzcount;
3285     nmask    = 0;
3286     for (j=0; j<bs; j++) {
3287       kmax = rowlengths[i*bs+j];
3288       for (k=0; k<kmax; k++) {
3289         tmp = jj[nzcount++]/bs;
3290         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3291       }
3292     }
3293     /* sort the masked values */
3294     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3295 
3296     /* set "j" values into matrix */
3297     maskcount = 1;
3298     for (j=0; j<nmask; j++) {
3299       a->j[jcount++]  = masked[j];
3300       mask[masked[j]] = maskcount++;
3301     }
3302     /* set "a" values into matrix */
3303     ishift = bs2*a->i[i];
3304     for (j=0; j<bs; j++) {
3305       kmax = rowlengths[i*bs+j];
3306       for (k=0; k<kmax; k++) {
3307         tmp       = jj[nzcountb]/bs;
3308         block     = mask[tmp] - 1;
3309         point     = jj[nzcountb] - bs*tmp;
3310         idx       = ishift + bs2*block + j + bs*point;
3311         a->a[idx] = (MatScalar)aa[nzcountb++];
3312       }
3313     }
3314     /* zero out the mask elements we set */
3315     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3316   }
3317   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3318 
3319   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3320   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3321   ierr = PetscFree(aa);CHKERRQ(ierr);
3322   ierr = PetscFree(jj);CHKERRQ(ierr);
3323   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3324 
3325   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3326   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3327   PetscFunctionReturn(0);
3328 }
3329 
3330 #undef __FUNCT__
3331 #define __FUNCT__ "MatCreateSeqBAIJ"
3332 /*@C
3333    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3334    compressed row) format.  For good matrix assembly performance the
3335    user should preallocate the matrix storage by setting the parameter nz
3336    (or the array nnz).  By setting these parameters accurately, performance
3337    during matrix assembly can be increased by more than a factor of 50.
3338 
3339    Collective on MPI_Comm
3340 
3341    Input Parameters:
3342 +  comm - MPI communicator, set to PETSC_COMM_SELF
3343 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3344           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3345 .  m - number of rows
3346 .  n - number of columns
3347 .  nz - number of nonzero blocks  per block row (same for all rows)
3348 -  nnz - array containing the number of nonzero blocks in the various block rows
3349          (possibly different for each block row) or NULL
3350 
3351    Output Parameter:
3352 .  A - the matrix
3353 
3354    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3355    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3356    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3357 
3358    Options Database Keys:
3359 .   -mat_no_unroll - uses code that does not unroll the loops in the
3360                      block calculations (much slower)
3361 .    -mat_block_size - size of the blocks to use
3362 
3363    Level: intermediate
3364 
3365    Notes:
3366    The number of rows and columns must be divisible by blocksize.
3367 
3368    If the nnz parameter is given then the nz parameter is ignored
3369 
3370    A nonzero block is any block that as 1 or more nonzeros in it
3371 
3372    The block AIJ format is fully compatible with standard Fortran 77
3373    storage.  That is, the stored row and column indices can begin at
3374    either one (as in Fortran) or zero.  See the users' manual for details.
3375 
3376    Specify the preallocated storage with either nz or nnz (not both).
3377    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3378    allocation.  See Users-Manual: ch_mat for details.
3379    matrices.
3380 
3381 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3382 @*/
3383 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3384 {
3385   PetscErrorCode ierr;
3386 
3387   PetscFunctionBegin;
3388   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3389   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3390   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3391   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3392   PetscFunctionReturn(0);
3393 }
3394 
3395 #undef __FUNCT__
3396 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3397 /*@C
3398    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3399    per row in the matrix. For good matrix assembly performance the
3400    user should preallocate the matrix storage by setting the parameter nz
3401    (or the array nnz).  By setting these parameters accurately, performance
3402    during matrix assembly can be increased by more than a factor of 50.
3403 
3404    Collective on MPI_Comm
3405 
3406    Input Parameters:
3407 +  B - the matrix
3408 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3409           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3410 .  nz - number of block nonzeros per block row (same for all rows)
3411 -  nnz - array containing the number of block nonzeros in the various block rows
3412          (possibly different for each block row) or NULL
3413 
3414    Options Database Keys:
3415 .   -mat_no_unroll - uses code that does not unroll the loops in the
3416                      block calculations (much slower)
3417 .    -mat_block_size - size of the blocks to use
3418 
3419    Level: intermediate
3420 
3421    Notes:
3422    If the nnz parameter is given then the nz parameter is ignored
3423 
3424    You can call MatGetInfo() to get information on how effective the preallocation was;
3425    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3426    You can also run with the option -info and look for messages with the string
3427    malloc in them to see if additional memory allocation was needed.
3428 
3429    The block AIJ format is fully compatible with standard Fortran 77
3430    storage.  That is, the stored row and column indices can begin at
3431    either one (as in Fortran) or zero.  See the users' manual for details.
3432 
3433    Specify the preallocated storage with either nz or nnz (not both).
3434    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3435    allocation.  See Users-Manual: ch_mat for details.
3436 
3437 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3438 @*/
3439 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3440 {
3441   PetscErrorCode ierr;
3442 
3443   PetscFunctionBegin;
3444   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3445   PetscValidType(B,1);
3446   PetscValidLogicalCollectiveInt(B,bs,2);
3447   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3448   PetscFunctionReturn(0);
3449 }
3450 
3451 #undef __FUNCT__
3452 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3453 /*@C
3454    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3455    (the default sequential PETSc format).
3456 
3457    Collective on MPI_Comm
3458 
3459    Input Parameters:
3460 +  B - the matrix
3461 .  i - the indices into j for the start of each local row (starts with zero)
3462 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3463 -  v - optional values in the matrix
3464 
3465    Level: developer
3466 
3467    Notes:
3468    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3469    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3470    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3471    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3472    block column and the second index is over columns within a block.
3473 
3474 .keywords: matrix, aij, compressed row, sparse
3475 
3476 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3477 @*/
3478 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3479 {
3480   PetscErrorCode ierr;
3481 
3482   PetscFunctionBegin;
3483   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3484   PetscValidType(B,1);
3485   PetscValidLogicalCollectiveInt(B,bs,2);
3486   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3487   PetscFunctionReturn(0);
3488 }
3489 
3490 
3491 #undef __FUNCT__
3492 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3493 /*@
3494      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3495 
3496      Collective on MPI_Comm
3497 
3498    Input Parameters:
3499 +  comm - must be an MPI communicator of size 1
3500 .  bs - size of block
3501 .  m - number of rows
3502 .  n - number of columns
3503 .  i - row indices
3504 .  j - column indices
3505 -  a - matrix values
3506 
3507    Output Parameter:
3508 .  mat - the matrix
3509 
3510    Level: advanced
3511 
3512    Notes:
3513        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3514     once the matrix is destroyed
3515 
3516        You cannot set new nonzero locations into this matrix, that will generate an error.
3517 
3518        The i and j indices are 0 based
3519 
3520        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3521 
3522       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3523       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3524       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3525       with column-major ordering within blocks.
3526 
3527 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3528 
3529 @*/
3530 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3531 {
3532   PetscErrorCode ierr;
3533   PetscInt       ii;
3534   Mat_SeqBAIJ    *baij;
3535 
3536   PetscFunctionBegin;
3537   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3538   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3539 
3540   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3541   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3542   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3543   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3544   baij = (Mat_SeqBAIJ*)(*mat)->data;
3545   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3546   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3547 
3548   baij->i = i;
3549   baij->j = j;
3550   baij->a = a;
3551 
3552   baij->singlemalloc = PETSC_FALSE;
3553   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3554   baij->free_a       = PETSC_FALSE;
3555   baij->free_ij      = PETSC_FALSE;
3556 
3557   for (ii=0; ii<m; ii++) {
3558     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3559 #if defined(PETSC_USE_DEBUG)
3560     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3561 #endif
3562   }
3563 #if defined(PETSC_USE_DEBUG)
3564   for (ii=0; ii<baij->i[m]; ii++) {
3565     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3566     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3567   }
3568 #endif
3569 
3570   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3571   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3572   PetscFunctionReturn(0);
3573 }
3574 
3575 #undef __FUNCT__
3576 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3577 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3578 {
3579   PetscErrorCode ierr;
3580 
3581   PetscFunctionBegin;
3582   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3583   PetscFunctionReturn(0);
3584 }
3585