xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 9f4d3c52fa2fe0bb72fec4f4e85d8e495867af35)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,const MatType,MatReuse,Mat*);
17 #endif
18 
19 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
20 {
21   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
22   PetscErrorCode ierr;
23   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
24   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
25   PetscReal      shift = 0.0;
26   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
27 
28   PetscFunctionBegin;
29   allowzeropivot = PetscNot(A->erroriffailure);
30 
31   if (a->idiagvalid) {
32     if (values) *values = a->idiag;
33     PetscFunctionReturn(0);
34   }
35   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
36   diag_offset = a->diag;
37   if (!a->idiag) {
38     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
39     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
40   }
41   diag  = a->idiag;
42   mdiag = a->idiag+bs2*mbs;
43   if (values) *values = a->idiag;
44   /* factor and invert each block */
45   switch (bs) {
46   case 1:
47     for (i=0; i<mbs; i++) {
48       odiag    = v + 1*diag_offset[i];
49       diag[0]  = odiag[0];
50       mdiag[0] = odiag[0];
51 
52       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
53         if (allowzeropivot) {
54           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
55           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
56           A->factorerror_zeropivot_row   = i;
57           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
58         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
59       }
60 
61       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
62       diag    += 1;
63       mdiag   += 1;
64     }
65     break;
66   case 2:
67     for (i=0; i<mbs; i++) {
68       odiag    = v + 4*diag_offset[i];
69       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
70       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
71       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
72       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
73       diag    += 4;
74       mdiag   += 4;
75     }
76     break;
77   case 3:
78     for (i=0; i<mbs; i++) {
79       odiag    = v + 9*diag_offset[i];
80       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
81       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
82       diag[8]  = odiag[8];
83       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
84       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
85       mdiag[8] = odiag[8];
86       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
87       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
88       diag    += 9;
89       mdiag   += 9;
90     }
91     break;
92   case 4:
93     for (i=0; i<mbs; i++) {
94       odiag  = v + 16*diag_offset[i];
95       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
97       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
98       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
99       diag  += 16;
100       mdiag += 16;
101     }
102     break;
103   case 5:
104     for (i=0; i<mbs; i++) {
105       odiag  = v + 25*diag_offset[i];
106       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
107       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
108       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
109       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
110       diag  += 25;
111       mdiag += 25;
112     }
113     break;
114   case 6:
115     for (i=0; i<mbs; i++) {
116       odiag  = v + 36*diag_offset[i];
117       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
118       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
119       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
120       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
121       diag  += 36;
122       mdiag += 36;
123     }
124     break;
125   case 7:
126     for (i=0; i<mbs; i++) {
127       odiag  = v + 49*diag_offset[i];
128       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
129       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
130       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
131       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
132       diag  += 49;
133       mdiag += 49;
134     }
135     break;
136   default:
137     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
138     for (i=0; i<mbs; i++) {
139       odiag  = v + bs2*diag_offset[i];
140       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
141       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
142       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
143       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
144       diag  += bs2;
145       mdiag += bs2;
146     }
147     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
148   }
149   a->idiagvalid = PETSC_TRUE;
150   PetscFunctionReturn(0);
151 }
152 
153 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
154 {
155   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
156   PetscScalar       *x,*work,*w,*workt,*t;
157   const MatScalar   *v,*aa = a->a, *idiag;
158   const PetscScalar *b,*xb;
159   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
160   PetscErrorCode    ierr;
161   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
162   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
163 
164   PetscFunctionBegin;
165   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
166   its = its*lits;
167   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
168   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
169   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
170   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
171   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
172 
173   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
174 
175   if (!m) PetscFunctionReturn(0);
176   diag  = a->diag;
177   idiag = a->idiag;
178   k    = PetscMax(A->rmap->n,A->cmap->n);
179   if (!a->mult_work) {
180     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
181   }
182   if (!a->sor_workt) {
183     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
184   }
185   if (!a->sor_work) {
186     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
187   }
188   work = a->mult_work;
189   t    = a->sor_workt;
190   w    = a->sor_work;
191 
192   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
193   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
194 
195   if (flag & SOR_ZERO_INITIAL_GUESS) {
196     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
197       switch (bs) {
198       case 1:
199         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
200         t[0] = b[0];
201         i2     = 1;
202         idiag += 1;
203         for (i=1; i<m; i++) {
204           v  = aa + ai[i];
205           vi = aj + ai[i];
206           nz = diag[i] - ai[i];
207           s[0] = b[i2];
208           for (j=0; j<nz; j++) {
209             xw[0] = x[vi[j]];
210             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
211           }
212           t[i2] = s[0];
213           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
214           x[i2]  = xw[0];
215           idiag += 1;
216           i2    += 1;
217         }
218         break;
219       case 2:
220         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
221         t[0] = b[0]; t[1] = b[1];
222         i2     = 2;
223         idiag += 4;
224         for (i=1; i<m; i++) {
225           v  = aa + 4*ai[i];
226           vi = aj + ai[i];
227           nz = diag[i] - ai[i];
228           s[0] = b[i2]; s[1] = b[i2+1];
229           for (j=0; j<nz; j++) {
230             idx = 2*vi[j];
231             it  = 4*j;
232             xw[0] = x[idx]; xw[1] = x[1+idx];
233             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
234           }
235           t[i2] = s[0]; t[i2+1] = s[1];
236           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
237           x[i2]   = xw[0]; x[i2+1] = xw[1];
238           idiag  += 4;
239           i2     += 2;
240         }
241         break;
242       case 3:
243         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
244         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
245         i2     = 3;
246         idiag += 9;
247         for (i=1; i<m; i++) {
248           v  = aa + 9*ai[i];
249           vi = aj + ai[i];
250           nz = diag[i] - ai[i];
251           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
252           while (nz--) {
253             idx = 3*(*vi++);
254             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
255             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
256             v  += 9;
257           }
258           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
259           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
260           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
261           idiag  += 9;
262           i2     += 3;
263         }
264         break;
265       case 4:
266         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
267         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
268         i2     = 4;
269         idiag += 16;
270         for (i=1; i<m; i++) {
271           v  = aa + 16*ai[i];
272           vi = aj + ai[i];
273           nz = diag[i] - ai[i];
274           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
275           while (nz--) {
276             idx = 4*(*vi++);
277             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
278             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
279             v  += 16;
280           }
281           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
282           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
283           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
284           idiag  += 16;
285           i2     += 4;
286         }
287         break;
288       case 5:
289         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
290         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
291         i2     = 5;
292         idiag += 25;
293         for (i=1; i<m; i++) {
294           v  = aa + 25*ai[i];
295           vi = aj + ai[i];
296           nz = diag[i] - ai[i];
297           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
298           while (nz--) {
299             idx = 5*(*vi++);
300             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
301             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
302             v  += 25;
303           }
304           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
305           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
306           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
307           idiag  += 25;
308           i2     += 5;
309         }
310         break;
311       case 6:
312         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
313         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
314         i2     = 6;
315         idiag += 36;
316         for (i=1; i<m; i++) {
317           v  = aa + 36*ai[i];
318           vi = aj + ai[i];
319           nz = diag[i] - ai[i];
320           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
321           while (nz--) {
322             idx = 6*(*vi++);
323             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
324             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
325             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
326             v  += 36;
327           }
328           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
329           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
330           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
331           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
332           idiag  += 36;
333           i2     += 6;
334         }
335         break;
336       case 7:
337         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
338         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
339         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
340         i2     = 7;
341         idiag += 49;
342         for (i=1; i<m; i++) {
343           v  = aa + 49*ai[i];
344           vi = aj + ai[i];
345           nz = diag[i] - ai[i];
346           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
347           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
348           while (nz--) {
349             idx = 7*(*vi++);
350             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
351             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
352             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
353             v  += 49;
354           }
355           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
356           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
357           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
358           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
359           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
360           idiag  += 49;
361           i2     += 7;
362         }
363         break;
364       default:
365         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
366         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
367         i2     = bs;
368         idiag += bs2;
369         for (i=1; i<m; i++) {
370           v  = aa + bs2*ai[i];
371           vi = aj + ai[i];
372           nz = diag[i] - ai[i];
373 
374           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
375           /* copy all rows of x that are needed into contiguous space */
376           workt = work;
377           for (j=0; j<nz; j++) {
378             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
379             workt += bs;
380           }
381           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
382           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
383           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
384 
385           idiag += bs2;
386           i2    += bs;
387         }
388         break;
389       }
390       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
391       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
392       xb = t;
393     }
394     else xb = b;
395     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
396       idiag = a->idiag+bs2*(a->mbs-1);
397       i2 = bs * (m-1);
398       switch (bs) {
399       case 1:
400         s[0]  = xb[i2];
401         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
402         x[i2] = xw[0];
403         i2   -= 1;
404         for (i=m-2; i>=0; i--) {
405           v  = aa + (diag[i]+1);
406           vi = aj + diag[i] + 1;
407           nz = ai[i+1] - diag[i] - 1;
408           s[0] = xb[i2];
409           for (j=0; j<nz; j++) {
410             xw[0] = x[vi[j]];
411             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
412           }
413           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
414           x[i2]  = xw[0];
415           idiag -= 1;
416           i2    -= 1;
417         }
418         break;
419       case 2:
420         s[0]  = xb[i2]; s[1] = xb[i2+1];
421         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
422         x[i2] = xw[0]; x[i2+1] = xw[1];
423         i2    -= 2;
424         idiag -= 4;
425         for (i=m-2; i>=0; i--) {
426           v  = aa + 4*(diag[i] + 1);
427           vi = aj + diag[i] + 1;
428           nz = ai[i+1] - diag[i] - 1;
429           s[0] = xb[i2]; s[1] = xb[i2+1];
430           for (j=0; j<nz; j++) {
431             idx = 2*vi[j];
432             it  = 4*j;
433             xw[0] = x[idx]; xw[1] = x[1+idx];
434             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
435           }
436           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
437           x[i2]   = xw[0]; x[i2+1] = xw[1];
438           idiag  -= 4;
439           i2     -= 2;
440         }
441         break;
442       case 3:
443         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
444         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
445         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
446         i2    -= 3;
447         idiag -= 9;
448         for (i=m-2; i>=0; i--) {
449           v  = aa + 9*(diag[i]+1);
450           vi = aj + diag[i] + 1;
451           nz = ai[i+1] - diag[i] - 1;
452           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
453           while (nz--) {
454             idx = 3*(*vi++);
455             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
456             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
457             v  += 9;
458           }
459           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
460           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
461           idiag  -= 9;
462           i2     -= 3;
463         }
464         break;
465       case 4:
466         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
467         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
468         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
469         i2    -= 4;
470         idiag -= 16;
471         for (i=m-2; i>=0; i--) {
472           v  = aa + 16*(diag[i]+1);
473           vi = aj + diag[i] + 1;
474           nz = ai[i+1] - diag[i] - 1;
475           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
476           while (nz--) {
477             idx = 4*(*vi++);
478             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
479             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
480             v  += 16;
481           }
482           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
483           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
484           idiag  -= 16;
485           i2     -= 4;
486         }
487         break;
488       case 5:
489         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
490         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
491         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
492         i2    -= 5;
493         idiag -= 25;
494         for (i=m-2; i>=0; i--) {
495           v  = aa + 25*(diag[i]+1);
496           vi = aj + diag[i] + 1;
497           nz = ai[i+1] - diag[i] - 1;
498           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
499           while (nz--) {
500             idx = 5*(*vi++);
501             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
502             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
503             v  += 25;
504           }
505           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
506           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
507           idiag  -= 25;
508           i2     -= 5;
509         }
510         break;
511       case 6:
512         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
513         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
514         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
515         i2    -= 6;
516         idiag -= 36;
517         for (i=m-2; i>=0; i--) {
518           v  = aa + 36*(diag[i]+1);
519           vi = aj + diag[i] + 1;
520           nz = ai[i+1] - diag[i] - 1;
521           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
522           while (nz--) {
523             idx = 6*(*vi++);
524             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
525             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
526             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
527             v  += 36;
528           }
529           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
530           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
531           idiag  -= 36;
532           i2     -= 6;
533         }
534         break;
535       case 7:
536         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
537         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
538         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
539         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541         i2    -= 7;
542         idiag -= 49;
543         for (i=m-2; i>=0; i--) {
544           v  = aa + 49*(diag[i]+1);
545           vi = aj + diag[i] + 1;
546           nz = ai[i+1] - diag[i] - 1;
547           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
548           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
549           while (nz--) {
550             idx = 7*(*vi++);
551             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
552             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
553             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
554             v  += 49;
555           }
556           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
557           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
558           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
559           idiag  -= 49;
560           i2     -= 7;
561         }
562         break;
563       default:
564         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
565         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
566         i2    -= bs;
567         idiag -= bs2;
568         for (i=m-2; i>=0; i--) {
569           v  = aa + bs2*(diag[i]+1);
570           vi = aj + diag[i] + 1;
571           nz = ai[i+1] - diag[i] - 1;
572 
573           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
574           /* copy all rows of x that are needed into contiguous space */
575           workt = work;
576           for (j=0; j<nz; j++) {
577             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
578             workt += bs;
579           }
580           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
581           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
582 
583           idiag -= bs2;
584           i2    -= bs;
585         }
586         break;
587       }
588       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
589     }
590     its--;
591   }
592   while (its--) {
593     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
594       idiag = a->idiag;
595       i2 = 0;
596       switch (bs) {
597       case 1:
598         for (i=0; i<m; i++) {
599           v  = aa + ai[i];
600           vi = aj + ai[i];
601           nz = ai[i+1] - ai[i];
602           s[0] = b[i2];
603           for (j=0; j<nz; j++) {
604             xw[0] = x[vi[j]];
605             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
606           }
607           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
608           x[i2] += xw[0];
609           idiag += 1;
610           i2    += 1;
611         }
612         break;
613       case 2:
614         for (i=0; i<m; i++) {
615           v  = aa + 4*ai[i];
616           vi = aj + ai[i];
617           nz = ai[i+1] - ai[i];
618           s[0] = b[i2]; s[1] = b[i2+1];
619           for (j=0; j<nz; j++) {
620             idx = 2*vi[j];
621             it  = 4*j;
622             xw[0] = x[idx]; xw[1] = x[1+idx];
623             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
624           }
625           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
626           x[i2]  += xw[0]; x[i2+1] += xw[1];
627           idiag  += 4;
628           i2     += 2;
629         }
630         break;
631       case 3:
632         for (i=0; i<m; i++) {
633           v  = aa + 9*ai[i];
634           vi = aj + ai[i];
635           nz = ai[i+1] - ai[i];
636           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
637           while (nz--) {
638             idx = 3*(*vi++);
639             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
640             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
641             v  += 9;
642           }
643           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
644           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
645           idiag  += 9;
646           i2     += 3;
647         }
648         break;
649       case 4:
650         for (i=0; i<m; i++) {
651           v  = aa + 16*ai[i];
652           vi = aj + ai[i];
653           nz = ai[i+1] - ai[i];
654           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
655           while (nz--) {
656             idx = 4*(*vi++);
657             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
658             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
659             v  += 16;
660           }
661           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
662           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
663           idiag  += 16;
664           i2     += 4;
665         }
666         break;
667       case 5:
668         for (i=0; i<m; i++) {
669           v  = aa + 25*ai[i];
670           vi = aj + ai[i];
671           nz = ai[i+1] - ai[i];
672           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
673           while (nz--) {
674             idx = 5*(*vi++);
675             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
676             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
677             v  += 25;
678           }
679           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
680           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
681           idiag  += 25;
682           i2     += 5;
683         }
684         break;
685       case 6:
686         for (i=0; i<m; i++) {
687           v  = aa + 36*ai[i];
688           vi = aj + ai[i];
689           nz = ai[i+1] - ai[i];
690           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
691           while (nz--) {
692             idx = 6*(*vi++);
693             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
694             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
695             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
696             v  += 36;
697           }
698           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
699           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
700           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
701           idiag  += 36;
702           i2     += 6;
703         }
704         break;
705       case 7:
706         for (i=0; i<m; i++) {
707           v  = aa + 49*ai[i];
708           vi = aj + ai[i];
709           nz = ai[i+1] - ai[i];
710           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
711           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
712           while (nz--) {
713             idx = 7*(*vi++);
714             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
715             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
716             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
717             v  += 49;
718           }
719           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
720           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
721           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
722           idiag  += 49;
723           i2     += 7;
724         }
725         break;
726       default:
727         for (i=0; i<m; i++) {
728           v  = aa + bs2*ai[i];
729           vi = aj + ai[i];
730           nz = ai[i+1] - ai[i];
731 
732           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
733           /* copy all rows of x that are needed into contiguous space */
734           workt = work;
735           for (j=0; j<nz; j++) {
736             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
737             workt += bs;
738           }
739           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
740           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
741 
742           idiag += bs2;
743           i2    += bs;
744         }
745         break;
746       }
747       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
748     }
749     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
750       idiag = a->idiag+bs2*(a->mbs-1);
751       i2 = bs * (m-1);
752       switch (bs) {
753       case 1:
754         for (i=m-1; i>=0; i--) {
755           v  = aa + ai[i];
756           vi = aj + ai[i];
757           nz = ai[i+1] - ai[i];
758           s[0] = b[i2];
759           for (j=0; j<nz; j++) {
760             xw[0] = x[vi[j]];
761             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
762           }
763           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
764           x[i2] += xw[0];
765           idiag -= 1;
766           i2    -= 1;
767         }
768         break;
769       case 2:
770         for (i=m-1; i>=0; i--) {
771           v  = aa + 4*ai[i];
772           vi = aj + ai[i];
773           nz = ai[i+1] - ai[i];
774           s[0] = b[i2]; s[1] = b[i2+1];
775           for (j=0; j<nz; j++) {
776             idx = 2*vi[j];
777             it  = 4*j;
778             xw[0] = x[idx]; xw[1] = x[1+idx];
779             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
780           }
781           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
782           x[i2]  += xw[0]; x[i2+1] += xw[1];
783           idiag  -= 4;
784           i2     -= 2;
785         }
786         break;
787       case 3:
788         for (i=m-1; i>=0; i--) {
789           v  = aa + 9*ai[i];
790           vi = aj + ai[i];
791           nz = ai[i+1] - ai[i];
792           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
793           while (nz--) {
794             idx = 3*(*vi++);
795             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
796             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
797             v  += 9;
798           }
799           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
800           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
801           idiag  -= 9;
802           i2     -= 3;
803         }
804         break;
805       case 4:
806         for (i=m-1; i>=0; i--) {
807           v  = aa + 16*ai[i];
808           vi = aj + ai[i];
809           nz = ai[i+1] - ai[i];
810           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
811           while (nz--) {
812             idx = 4*(*vi++);
813             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
814             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
815             v  += 16;
816           }
817           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
818           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
819           idiag  -= 16;
820           i2     -= 4;
821         }
822         break;
823       case 5:
824         for (i=m-1; i>=0; i--) {
825           v  = aa + 25*ai[i];
826           vi = aj + ai[i];
827           nz = ai[i+1] - ai[i];
828           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
829           while (nz--) {
830             idx = 5*(*vi++);
831             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
832             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
833             v  += 25;
834           }
835           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
836           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
837           idiag  -= 25;
838           i2     -= 5;
839         }
840         break;
841       case 6:
842         for (i=m-1; i>=0; i--) {
843           v  = aa + 36*ai[i];
844           vi = aj + ai[i];
845           nz = ai[i+1] - ai[i];
846           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
847           while (nz--) {
848             idx = 6*(*vi++);
849             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
850             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
851             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
852             v  += 36;
853           }
854           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
855           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
856           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
857           idiag  -= 36;
858           i2     -= 6;
859         }
860         break;
861       case 7:
862         for (i=m-1; i>=0; i--) {
863           v  = aa + 49*ai[i];
864           vi = aj + ai[i];
865           nz = ai[i+1] - ai[i];
866           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
867           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
868           while (nz--) {
869             idx = 7*(*vi++);
870             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
871             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
872             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
873             v  += 49;
874           }
875           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
876           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
877           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
878           idiag  -= 49;
879           i2     -= 7;
880         }
881         break;
882       default:
883         for (i=m-1; i>=0; i--) {
884           v  = aa + bs2*ai[i];
885           vi = aj + ai[i];
886           nz = ai[i+1] - ai[i];
887 
888           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
889           /* copy all rows of x that are needed into contiguous space */
890           workt = work;
891           for (j=0; j<nz; j++) {
892             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
893             workt += bs;
894           }
895           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
896           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
897 
898           idiag -= bs2;
899           i2    -= bs;
900         }
901         break;
902       }
903       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
904     }
905   }
906   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
907   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 
912 /*
913     Special version for direct calls from Fortran (Used in PETSc-fun3d)
914 */
915 #if defined(PETSC_HAVE_FORTRAN_CAPS)
916 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
917 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
918 #define matsetvaluesblocked4_ matsetvaluesblocked4
919 #endif
920 
921 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
922 {
923   Mat               A  = *AA;
924   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
925   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
926   PetscInt          *ai    =a->i,*ailen=a->ilen;
927   PetscInt          *aj    =a->j,stepval,lastcol = -1;
928   const PetscScalar *value = v;
929   MatScalar         *ap,*aa = a->a,*bap;
930 
931   PetscFunctionBegin;
932   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
933   stepval = (n-1)*4;
934   for (k=0; k<m; k++) { /* loop over added rows */
935     row  = im[k];
936     rp   = aj + ai[row];
937     ap   = aa + 16*ai[row];
938     nrow = ailen[row];
939     low  = 0;
940     high = nrow;
941     for (l=0; l<n; l++) { /* loop over added columns */
942       col = in[l];
943       if (col <= lastcol)  low = 0;
944       else                high = nrow;
945       lastcol = col;
946       value   = v + k*(stepval+4 + l)*4;
947       while (high-low > 7) {
948         t = (low+high)/2;
949         if (rp[t] > col) high = t;
950         else             low  = t;
951       }
952       for (i=low; i<high; i++) {
953         if (rp[i] > col) break;
954         if (rp[i] == col) {
955           bap = ap +  16*i;
956           for (ii=0; ii<4; ii++,value+=stepval) {
957             for (jj=ii; jj<16; jj+=4) {
958               bap[jj] += *value++;
959             }
960           }
961           goto noinsert2;
962         }
963       }
964       N = nrow++ - 1;
965       high++; /* added new column index thus must search to one higher than before */
966       /* shift up all the later entries in this row */
967       for (ii=N; ii>=i; ii--) {
968         rp[ii+1] = rp[ii];
969         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
970       }
971       if (N >= i) {
972         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
973       }
974       rp[i] = col;
975       bap   = ap +  16*i;
976       for (ii=0; ii<4; ii++,value+=stepval) {
977         for (jj=ii; jj<16; jj+=4) {
978           bap[jj] = *value++;
979         }
980       }
981       noinsert2:;
982       low = i;
983     }
984     ailen[row] = nrow;
985   }
986   PetscFunctionReturnVoid();
987 }
988 
989 #if defined(PETSC_HAVE_FORTRAN_CAPS)
990 #define matsetvalues4_ MATSETVALUES4
991 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
992 #define matsetvalues4_ matsetvalues4
993 #endif
994 
995 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
996 {
997   Mat         A  = *AA;
998   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
999   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1000   PetscInt    *ai=a->i,*ailen=a->ilen;
1001   PetscInt    *aj=a->j,brow,bcol;
1002   PetscInt    ridx,cidx,lastcol = -1;
1003   MatScalar   *ap,value,*aa=a->a,*bap;
1004 
1005   PetscFunctionBegin;
1006   for (k=0; k<m; k++) { /* loop over added rows */
1007     row  = im[k]; brow = row/4;
1008     rp   = aj + ai[brow];
1009     ap   = aa + 16*ai[brow];
1010     nrow = ailen[brow];
1011     low  = 0;
1012     high = nrow;
1013     for (l=0; l<n; l++) { /* loop over added columns */
1014       col   = in[l]; bcol = col/4;
1015       ridx  = row % 4; cidx = col % 4;
1016       value = v[l + k*n];
1017       if (col <= lastcol)  low = 0;
1018       else                high = nrow;
1019       lastcol = col;
1020       while (high-low > 7) {
1021         t = (low+high)/2;
1022         if (rp[t] > bcol) high = t;
1023         else              low  = t;
1024       }
1025       for (i=low; i<high; i++) {
1026         if (rp[i] > bcol) break;
1027         if (rp[i] == bcol) {
1028           bap   = ap +  16*i + 4*cidx + ridx;
1029           *bap += value;
1030           goto noinsert1;
1031         }
1032       }
1033       N = nrow++ - 1;
1034       high++; /* added new column thus must search to one higher than before */
1035       /* shift up all the later entries in this row */
1036       for (ii=N; ii>=i; ii--) {
1037         rp[ii+1] = rp[ii];
1038         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1039       }
1040       if (N>=i) {
1041         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1042       }
1043       rp[i]                    = bcol;
1044       ap[16*i + 4*cidx + ridx] = value;
1045 noinsert1:;
1046       low = i;
1047     }
1048     ailen[brow] = nrow;
1049   }
1050   PetscFunctionReturnVoid();
1051 }
1052 
1053 /*
1054      Checks for missing diagonals
1055 */
1056 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1057 {
1058   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1059   PetscErrorCode ierr;
1060   PetscInt       *diag,*ii = a->i,i;
1061 
1062   PetscFunctionBegin;
1063   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1064   *missing = PETSC_FALSE;
1065   if (A->rmap->n > 0 && !ii) {
1066     *missing = PETSC_TRUE;
1067     if (d) *d = 0;
1068     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1069   } else {
1070     diag = a->diag;
1071     for (i=0; i<a->mbs; i++) {
1072       if (diag[i] >= ii[i+1]) {
1073         *missing = PETSC_TRUE;
1074         if (d) *d = i;
1075         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1076         break;
1077       }
1078     }
1079   }
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1084 {
1085   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1086   PetscErrorCode ierr;
1087   PetscInt       i,j,m = a->mbs;
1088 
1089   PetscFunctionBegin;
1090   if (!a->diag) {
1091     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1092     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1093     a->free_diag = PETSC_TRUE;
1094   }
1095   for (i=0; i<m; i++) {
1096     a->diag[i] = a->i[i+1];
1097     for (j=a->i[i]; j<a->i[i+1]; j++) {
1098       if (a->j[j] == i) {
1099         a->diag[i] = j;
1100         break;
1101       }
1102     }
1103   }
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 
1108 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1109 {
1110   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1111   PetscErrorCode ierr;
1112   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1113   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1114 
1115   PetscFunctionBegin;
1116   *nn = n;
1117   if (!ia) PetscFunctionReturn(0);
1118   if (symmetric) {
1119     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1120     nz   = tia[n];
1121   } else {
1122     tia = a->i; tja = a->j;
1123   }
1124 
1125   if (!blockcompressed && bs > 1) {
1126     (*nn) *= bs;
1127     /* malloc & create the natural set of indices */
1128     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1129     if (n) {
1130       (*ia)[0] = oshift;
1131       for (j=1; j<bs; j++) {
1132         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1133       }
1134     }
1135 
1136     for (i=1; i<n; i++) {
1137       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1138       for (j=1; j<bs; j++) {
1139         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1140       }
1141     }
1142     if (n) {
1143       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1144     }
1145 
1146     if (inja) {
1147       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1148       cnt = 0;
1149       for (i=0; i<n; i++) {
1150         for (j=0; j<bs; j++) {
1151           for (k=tia[i]; k<tia[i+1]; k++) {
1152             for (l=0; l<bs; l++) {
1153               (*ja)[cnt++] = bs*tja[k] + l;
1154             }
1155           }
1156         }
1157       }
1158     }
1159 
1160     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1161       ierr = PetscFree(tia);CHKERRQ(ierr);
1162       ierr = PetscFree(tja);CHKERRQ(ierr);
1163     }
1164   } else if (oshift == 1) {
1165     if (symmetric) {
1166       nz = tia[A->rmap->n/bs];
1167       /*  add 1 to i and j indices */
1168       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1169       *ia = tia;
1170       if (ja) {
1171         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1172         *ja = tja;
1173       }
1174     } else {
1175       nz = a->i[A->rmap->n/bs];
1176       /* malloc space and  add 1 to i and j indices */
1177       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1178       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1179       if (ja) {
1180         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1181         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1182       }
1183     }
1184   } else {
1185     *ia = tia;
1186     if (ja) *ja = tja;
1187   }
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1192 {
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196   if (!ia) PetscFunctionReturn(0);
1197   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1198     ierr = PetscFree(*ia);CHKERRQ(ierr);
1199     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1200   }
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1205 {
1206   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1207   PetscErrorCode ierr;
1208 
1209   PetscFunctionBegin;
1210 #if defined(PETSC_USE_LOG)
1211   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1212 #endif
1213   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1214   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1215   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1216   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1217   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1218   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1219   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1220   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1221   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1222   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1223   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1224   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1225   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1226 
1227   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1228   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1229   ierr = PetscFree(A->data);CHKERRQ(ierr);
1230 
1231   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1242 #if defined(PETSC_HAVE_HYPRE)
1243   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1244 #endif
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1249 {
1250   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1251   PetscErrorCode ierr;
1252 
1253   PetscFunctionBegin;
1254   switch (op) {
1255   case MAT_ROW_ORIENTED:
1256     a->roworiented = flg;
1257     break;
1258   case MAT_KEEP_NONZERO_PATTERN:
1259     a->keepnonzeropattern = flg;
1260     break;
1261   case MAT_NEW_NONZERO_LOCATIONS:
1262     a->nonew = (flg ? 0 : 1);
1263     break;
1264   case MAT_NEW_NONZERO_LOCATION_ERR:
1265     a->nonew = (flg ? -1 : 0);
1266     break;
1267   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1268     a->nonew = (flg ? -2 : 0);
1269     break;
1270   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1271     a->nounused = (flg ? -1 : 0);
1272     break;
1273   case MAT_NEW_DIAGONALS:
1274   case MAT_IGNORE_OFF_PROC_ENTRIES:
1275   case MAT_USE_HASH_TABLE:
1276     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1277     break;
1278   case MAT_SPD:
1279   case MAT_SYMMETRIC:
1280   case MAT_STRUCTURALLY_SYMMETRIC:
1281   case MAT_HERMITIAN:
1282   case MAT_SYMMETRY_ETERNAL:
1283   case MAT_SUBMAT_SINGLEIS:
1284   case MAT_STRUCTURE_ONLY:
1285     /* These options are handled directly by MatSetOption() */
1286     break;
1287   default:
1288     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1289   }
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1294 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1295 {
1296   PetscErrorCode ierr;
1297   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1298   MatScalar      *aa_i;
1299   PetscScalar    *v_i;
1300 
1301   PetscFunctionBegin;
1302   bs  = A->rmap->bs;
1303   bs2 = bs*bs;
1304   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1305 
1306   bn  = row/bs;   /* Block number */
1307   bp  = row % bs; /* Block Position */
1308   M   = ai[bn+1] - ai[bn];
1309   *nz = bs*M;
1310 
1311   if (v) {
1312     *v = 0;
1313     if (*nz) {
1314       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1315       for (i=0; i<M; i++) { /* for each block in the block row */
1316         v_i  = *v + i*bs;
1317         aa_i = aa + bs2*(ai[bn] + i);
1318         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1319       }
1320     }
1321   }
1322 
1323   if (idx) {
1324     *idx = 0;
1325     if (*nz) {
1326       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1327       for (i=0; i<M; i++) { /* for each block in the block row */
1328         idx_i = *idx + i*bs;
1329         itmp  = bs*aj[ai[bn] + i];
1330         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1331       }
1332     }
1333   }
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1338 {
1339   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1340   PetscErrorCode ierr;
1341 
1342   PetscFunctionBegin;
1343   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1348 {
1349   PetscErrorCode ierr;
1350 
1351   PetscFunctionBegin;
1352   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1353   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1358 {
1359   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1360   Mat            C;
1361   PetscErrorCode ierr;
1362   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1363   PetscInt       *rows,*cols,bs2=a->bs2;
1364   MatScalar      *array;
1365 
1366   PetscFunctionBegin;
1367   if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1368   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1369     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1370 
1371     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1372     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1373     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1374     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1375     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1376     ierr = PetscFree(col);CHKERRQ(ierr);
1377   } else {
1378     C = *B;
1379   }
1380 
1381   array = a->a;
1382   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1383   for (i=0; i<mbs; i++) {
1384     cols[0] = i*bs;
1385     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1386     len = ai[i+1] - ai[i];
1387     for (j=0; j<len; j++) {
1388       rows[0] = (*aj++)*bs;
1389       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1390       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1391       array += bs2;
1392     }
1393   }
1394   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1395 
1396   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1397   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1398 
1399   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1400     *B = C;
1401   } else {
1402     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1408 {
1409   PetscErrorCode ierr;
1410   Mat            Btrans;
1411 
1412   PetscFunctionBegin;
1413   *f   = PETSC_FALSE;
1414   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1415   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1416   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1421 {
1422   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1423   PetscErrorCode ierr;
1424   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1425   int            fd;
1426   PetscScalar    *aa;
1427   FILE           *file;
1428 
1429   PetscFunctionBegin;
1430   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1431   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1432   col_lens[0] = MAT_FILE_CLASSID;
1433 
1434   col_lens[1] = A->rmap->N;
1435   col_lens[2] = A->cmap->n;
1436   col_lens[3] = a->nz*bs2;
1437 
1438   /* store lengths of each row and write (including header) to file */
1439   count = 0;
1440   for (i=0; i<a->mbs; i++) {
1441     for (j=0; j<bs; j++) {
1442       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1443     }
1444   }
1445   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1446   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1447 
1448   /* store column indices (zero start index) */
1449   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1450   count = 0;
1451   for (i=0; i<a->mbs; i++) {
1452     for (j=0; j<bs; j++) {
1453       for (k=a->i[i]; k<a->i[i+1]; k++) {
1454         for (l=0; l<bs; l++) {
1455           jj[count++] = bs*a->j[k] + l;
1456         }
1457       }
1458     }
1459   }
1460   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1461   ierr = PetscFree(jj);CHKERRQ(ierr);
1462 
1463   /* store nonzero values */
1464   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1465   count = 0;
1466   for (i=0; i<a->mbs; i++) {
1467     for (j=0; j<bs; j++) {
1468       for (k=a->i[i]; k<a->i[i+1]; k++) {
1469         for (l=0; l<bs; l++) {
1470           aa[count++] = a->a[bs2*k + l*bs + j];
1471         }
1472       }
1473     }
1474   }
1475   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1476   ierr = PetscFree(aa);CHKERRQ(ierr);
1477 
1478   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1479   if (file) {
1480     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1486 {
1487   PetscErrorCode ierr;
1488   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1489   PetscInt       i,bs = A->rmap->bs,k;
1490 
1491   PetscFunctionBegin;
1492   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1493   for (i=0; i<a->mbs; i++) {
1494     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1495     for (k=a->i[i]; k<a->i[i+1]; k++) {
1496       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1497     }
1498     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1499   }
1500   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1505 {
1506   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1507   PetscErrorCode    ierr;
1508   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1509   PetscViewerFormat format;
1510 
1511   PetscFunctionBegin;
1512   if (A->structure_only) {
1513     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1514     PetscFunctionReturn(0);
1515   }
1516 
1517   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1518   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1519     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1520   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1521     const char *matname;
1522     Mat        aij;
1523     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1524     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1525     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1526     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1527     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1528   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1529       PetscFunctionReturn(0);
1530   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1531     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1532     for (i=0; i<a->mbs; i++) {
1533       for (j=0; j<bs; j++) {
1534         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1535         for (k=a->i[i]; k<a->i[i+1]; k++) {
1536           for (l=0; l<bs; l++) {
1537 #if defined(PETSC_USE_COMPLEX)
1538             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1539               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1540                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1541             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1542               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1543                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1544             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1545               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1546             }
1547 #else
1548             if (a->a[bs2*k + l*bs + j] != 0.0) {
1549               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1550             }
1551 #endif
1552           }
1553         }
1554         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1555       }
1556     }
1557     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1558   } else {
1559     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1560     for (i=0; i<a->mbs; i++) {
1561       for (j=0; j<bs; j++) {
1562         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1563         for (k=a->i[i]; k<a->i[i+1]; k++) {
1564           for (l=0; l<bs; l++) {
1565 #if defined(PETSC_USE_COMPLEX)
1566             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1568                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1571                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1572             } else {
1573               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1574             }
1575 #else
1576             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1577 #endif
1578           }
1579         }
1580         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1581       }
1582     }
1583     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1584   }
1585   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #include <petscdraw.h>
1590 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1591 {
1592   Mat               A = (Mat) Aa;
1593   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1594   PetscErrorCode    ierr;
1595   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1596   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1597   MatScalar         *aa;
1598   PetscViewer       viewer;
1599   PetscViewerFormat format;
1600 
1601   PetscFunctionBegin;
1602   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1603   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1604   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1605 
1606   /* loop over matrix elements drawing boxes */
1607 
1608   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1609     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1610     /* Blue for negative, Cyan for zero and  Red for positive */
1611     color = PETSC_DRAW_BLUE;
1612     for (i=0,row=0; i<mbs; i++,row+=bs) {
1613       for (j=a->i[i]; j<a->i[i+1]; j++) {
1614         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1615         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1616         aa  = a->a + j*bs2;
1617         for (k=0; k<bs; k++) {
1618           for (l=0; l<bs; l++) {
1619             if (PetscRealPart(*aa++) >=  0.) continue;
1620             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1621           }
1622         }
1623       }
1624     }
1625     color = PETSC_DRAW_CYAN;
1626     for (i=0,row=0; i<mbs; i++,row+=bs) {
1627       for (j=a->i[i]; j<a->i[i+1]; j++) {
1628         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1629         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1630         aa  = a->a + j*bs2;
1631         for (k=0; k<bs; k++) {
1632           for (l=0; l<bs; l++) {
1633             if (PetscRealPart(*aa++) != 0.) continue;
1634             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1635           }
1636         }
1637       }
1638     }
1639     color = PETSC_DRAW_RED;
1640     for (i=0,row=0; i<mbs; i++,row+=bs) {
1641       for (j=a->i[i]; j<a->i[i+1]; j++) {
1642         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1643         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1644         aa  = a->a + j*bs2;
1645         for (k=0; k<bs; k++) {
1646           for (l=0; l<bs; l++) {
1647             if (PetscRealPart(*aa++) <= 0.) continue;
1648             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1649           }
1650         }
1651       }
1652     }
1653     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1654   } else {
1655     /* use contour shading to indicate magnitude of values */
1656     /* first determine max of all nonzero values */
1657     PetscReal minv = 0.0, maxv = 0.0;
1658     PetscDraw popup;
1659 
1660     for (i=0; i<a->nz*a->bs2; i++) {
1661       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1662     }
1663     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1664     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1665     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1666 
1667     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1668     for (i=0,row=0; i<mbs; i++,row+=bs) {
1669       for (j=a->i[i]; j<a->i[i+1]; j++) {
1670         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1671         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1672         aa  = a->a + j*bs2;
1673         for (k=0; k<bs; k++) {
1674           for (l=0; l<bs; l++) {
1675             MatScalar v = *aa++;
1676             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1677             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1678           }
1679         }
1680       }
1681     }
1682     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1683   }
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1688 {
1689   PetscErrorCode ierr;
1690   PetscReal      xl,yl,xr,yr,w,h;
1691   PetscDraw      draw;
1692   PetscBool      isnull;
1693 
1694   PetscFunctionBegin;
1695   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1696   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1697   if (isnull) PetscFunctionReturn(0);
1698 
1699   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1700   xr  += w;          yr += h;        xl = -w;     yl = -h;
1701   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1702   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1703   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1704   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1705   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1710 {
1711   PetscErrorCode ierr;
1712   PetscBool      iascii,isbinary,isdraw;
1713 
1714   PetscFunctionBegin;
1715   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1716   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1717   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1718   if (iascii) {
1719     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1720   } else if (isbinary) {
1721     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1722   } else if (isdraw) {
1723     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1724   } else {
1725     Mat B;
1726     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1727     ierr = MatView(B,viewer);CHKERRQ(ierr);
1728     ierr = MatDestroy(&B);CHKERRQ(ierr);
1729   }
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 
1734 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1735 {
1736   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1737   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1738   PetscInt    *ai = a->i,*ailen = a->ilen;
1739   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1740   MatScalar   *ap,*aa = a->a;
1741 
1742   PetscFunctionBegin;
1743   for (k=0; k<m; k++) { /* loop over rows */
1744     row = im[k]; brow = row/bs;
1745     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1746     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1747     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1748     nrow = ailen[brow];
1749     for (l=0; l<n; l++) { /* loop over columns */
1750       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1751       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1752       col  = in[l];
1753       bcol = col/bs;
1754       cidx = col%bs;
1755       ridx = row%bs;
1756       high = nrow;
1757       low  = 0; /* assume unsorted */
1758       while (high-low > 5) {
1759         t = (low+high)/2;
1760         if (rp[t] > bcol) high = t;
1761         else             low  = t;
1762       }
1763       for (i=low; i<high; i++) {
1764         if (rp[i] > bcol) break;
1765         if (rp[i] == bcol) {
1766           *v++ = ap[bs2*i+bs*cidx+ridx];
1767           goto finished;
1768         }
1769       }
1770       *v++ = 0.0;
1771 finished:;
1772     }
1773   }
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1778 {
1779   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1780   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1781   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1782   PetscErrorCode    ierr;
1783   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1784   PetscBool         roworiented=a->roworiented;
1785   const PetscScalar *value     = v;
1786   MatScalar         *ap=NULL,*aa = a->a,*bap;
1787 
1788   PetscFunctionBegin;
1789   if (roworiented) {
1790     stepval = (n-1)*bs;
1791   } else {
1792     stepval = (m-1)*bs;
1793   }
1794   for (k=0; k<m; k++) { /* loop over added rows */
1795     row = im[k];
1796     if (row < 0) continue;
1797 #if defined(PETSC_USE_DEBUG)
1798     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1799 #endif
1800     rp   = aj + ai[row];
1801     if (!A->structure_only) ap = aa + bs2*ai[row];
1802     rmax = imax[row];
1803     nrow = ailen[row];
1804     low  = 0;
1805     high = nrow;
1806     for (l=0; l<n; l++) { /* loop over added columns */
1807       if (in[l] < 0) continue;
1808 #if defined(PETSC_USE_DEBUG)
1809       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1810 #endif
1811       col = in[l];
1812       if (!A->structure_only) {
1813         if (roworiented) {
1814           value = v + (k*(stepval+bs) + l)*bs;
1815         } else {
1816           value = v + (l*(stepval+bs) + k)*bs;
1817         }
1818       }
1819       if (col <= lastcol) low = 0;
1820       else high = nrow;
1821       lastcol = col;
1822       while (high-low > 7) {
1823         t = (low+high)/2;
1824         if (rp[t] > col) high = t;
1825         else             low  = t;
1826       }
1827       for (i=low; i<high; i++) {
1828         if (rp[i] > col) break;
1829         if (rp[i] == col) {
1830           if (A->structure_only) goto noinsert2;
1831           bap = ap +  bs2*i;
1832           if (roworiented) {
1833             if (is == ADD_VALUES) {
1834               for (ii=0; ii<bs; ii++,value+=stepval) {
1835                 for (jj=ii; jj<bs2; jj+=bs) {
1836                   bap[jj] += *value++;
1837                 }
1838               }
1839             } else {
1840               for (ii=0; ii<bs; ii++,value+=stepval) {
1841                 for (jj=ii; jj<bs2; jj+=bs) {
1842                   bap[jj] = *value++;
1843                 }
1844               }
1845             }
1846           } else {
1847             if (is == ADD_VALUES) {
1848               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1849                 for (jj=0; jj<bs; jj++) {
1850                   bap[jj] += value[jj];
1851                 }
1852                 bap += bs;
1853               }
1854             } else {
1855               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1856                 for (jj=0; jj<bs; jj++) {
1857                   bap[jj]  = value[jj];
1858                 }
1859                 bap += bs;
1860               }
1861             }
1862           }
1863           goto noinsert2;
1864         }
1865       }
1866       if (nonew == 1) goto noinsert2;
1867       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1868       if (A->structure_only) {
1869         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1870       } else {
1871         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1872       }
1873       N = nrow++ - 1; high++;
1874       /* shift up all the later entries in this row */
1875       for (ii=N; ii>=i; ii--) {
1876         rp[ii+1] = rp[ii];
1877         if (!A->structure_only) {
1878           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1879         }
1880       }
1881       if (N >= i && !A->structure_only) {
1882         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1883       }
1884 
1885       rp[i] = col;
1886       if (!A->structure_only) {
1887         bap   = ap +  bs2*i;
1888         if (roworiented) {
1889           for (ii=0; ii<bs; ii++,value+=stepval) {
1890             for (jj=ii; jj<bs2; jj+=bs) {
1891               bap[jj] = *value++;
1892             }
1893           }
1894         } else {
1895           for (ii=0; ii<bs; ii++,value+=stepval) {
1896             for (jj=0; jj<bs; jj++) {
1897               *bap++ = *value++;
1898             }
1899           }
1900         }
1901       }
1902 noinsert2:;
1903       low = i;
1904     }
1905     ailen[row] = nrow;
1906   }
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1911 {
1912   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1913   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1914   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1915   PetscErrorCode ierr;
1916   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1917   MatScalar      *aa  = a->a,*ap;
1918   PetscReal      ratio=0.6;
1919 
1920   PetscFunctionBegin;
1921   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1922 
1923   if (m) rmax = ailen[0];
1924   for (i=1; i<mbs; i++) {
1925     /* move each row back by the amount of empty slots (fshift) before it*/
1926     fshift += imax[i-1] - ailen[i-1];
1927     rmax    = PetscMax(rmax,ailen[i]);
1928     if (fshift) {
1929       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1930       N  = ailen[i];
1931       for (j=0; j<N; j++) {
1932         ip[j-fshift] = ip[j];
1933         if (!A->structure_only) {
1934           ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1935         }
1936       }
1937     }
1938     ai[i] = ai[i-1] + ailen[i-1];
1939   }
1940   if (mbs) {
1941     fshift += imax[mbs-1] - ailen[mbs-1];
1942     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1943   }
1944 
1945   /* reset ilen and imax for each row */
1946   a->nonzerorowcnt = 0;
1947   if (A->structure_only) {
1948     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1949   } else { /* !A->structure_only */
1950     for (i=0; i<mbs; i++) {
1951       ailen[i] = imax[i] = ai[i+1] - ai[i];
1952       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1953     }
1954   }
1955   a->nz = ai[mbs];
1956 
1957   /* diagonals may have moved, so kill the diagonal pointers */
1958   a->idiagvalid = PETSC_FALSE;
1959   if (fshift && a->diag) {
1960     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1961     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1962     a->diag = 0;
1963   }
1964   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1965   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1966   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1967   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1968 
1969   A->info.mallocs    += a->reallocs;
1970   a->reallocs         = 0;
1971   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1972   a->rmax             = rmax;
1973 
1974   if (!A->structure_only) {
1975     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1976   }
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 /*
1981    This function returns an array of flags which indicate the locations of contiguous
1982    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1983    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1984    Assume: sizes should be long enough to hold all the values.
1985 */
1986 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1987 {
1988   PetscInt  i,j,k,row;
1989   PetscBool flg;
1990 
1991   PetscFunctionBegin;
1992   for (i=0,j=0; i<n; j++) {
1993     row = idx[i];
1994     if (row%bs!=0) { /* Not the begining of a block */
1995       sizes[j] = 1;
1996       i++;
1997     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1998       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1999       i++;
2000     } else { /* Begining of the block, so check if the complete block exists */
2001       flg = PETSC_TRUE;
2002       for (k=1; k<bs; k++) {
2003         if (row+k != idx[i+k]) { /* break in the block */
2004           flg = PETSC_FALSE;
2005           break;
2006         }
2007       }
2008       if (flg) { /* No break in the bs */
2009         sizes[j] = bs;
2010         i       += bs;
2011       } else {
2012         sizes[j] = 1;
2013         i++;
2014       }
2015     }
2016   }
2017   *bs_max = j;
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2022 {
2023   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2024   PetscErrorCode    ierr;
2025   PetscInt          i,j,k,count,*rows;
2026   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2027   PetscScalar       zero = 0.0;
2028   MatScalar         *aa;
2029   const PetscScalar *xx;
2030   PetscScalar       *bb;
2031 
2032   PetscFunctionBegin;
2033   /* fix right hand side if needed */
2034   if (x && b) {
2035     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2036     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2037     for (i=0; i<is_n; i++) {
2038       bb[is_idx[i]] = diag*xx[is_idx[i]];
2039     }
2040     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2041     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2042   }
2043 
2044   /* Make a copy of the IS and  sort it */
2045   /* allocate memory for rows,sizes */
2046   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2047 
2048   /* copy IS values to rows, and sort them */
2049   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2050   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2051 
2052   if (baij->keepnonzeropattern) {
2053     for (i=0; i<is_n; i++) sizes[i] = 1;
2054     bs_max          = is_n;
2055   } else {
2056     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2057     A->nonzerostate++;
2058   }
2059 
2060   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2061     row = rows[j];
2062     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2063     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2064     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2065     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2066       if (diag != (PetscScalar)0.0) {
2067         if (baij->ilen[row/bs] > 0) {
2068           baij->ilen[row/bs]       = 1;
2069           baij->j[baij->i[row/bs]] = row/bs;
2070 
2071           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2072         }
2073         /* Now insert all the diagonal values for this bs */
2074         for (k=0; k<bs; k++) {
2075           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2076         }
2077       } else { /* (diag == 0.0) */
2078         baij->ilen[row/bs] = 0;
2079       } /* end (diag == 0.0) */
2080     } else { /* (sizes[i] != bs) */
2081 #if defined(PETSC_USE_DEBUG)
2082       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2083 #endif
2084       for (k=0; k<count; k++) {
2085         aa[0] =  zero;
2086         aa   += bs;
2087       }
2088       if (diag != (PetscScalar)0.0) {
2089         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2090       }
2091     }
2092   }
2093 
2094   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2095   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2100 {
2101   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2102   PetscErrorCode    ierr;
2103   PetscInt          i,j,k,count;
2104   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2105   PetscScalar       zero = 0.0;
2106   MatScalar         *aa;
2107   const PetscScalar *xx;
2108   PetscScalar       *bb;
2109   PetscBool         *zeroed,vecs = PETSC_FALSE;
2110 
2111   PetscFunctionBegin;
2112   /* fix right hand side if needed */
2113   if (x && b) {
2114     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2115     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2116     vecs = PETSC_TRUE;
2117   }
2118 
2119   /* zero the columns */
2120   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2121   for (i=0; i<is_n; i++) {
2122     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2123     zeroed[is_idx[i]] = PETSC_TRUE;
2124   }
2125   for (i=0; i<A->rmap->N; i++) {
2126     if (!zeroed[i]) {
2127       row = i/bs;
2128       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2129         for (k=0; k<bs; k++) {
2130           col = bs*baij->j[j] + k;
2131           if (zeroed[col]) {
2132             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2133             if (vecs) bb[i] -= aa[0]*xx[col];
2134             aa[0] = 0.0;
2135           }
2136         }
2137       }
2138     } else if (vecs) bb[i] = diag*xx[i];
2139   }
2140   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2141   if (vecs) {
2142     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2143     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2144   }
2145 
2146   /* zero the rows */
2147   for (i=0; i<is_n; i++) {
2148     row   = is_idx[i];
2149     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2150     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2151     for (k=0; k<count; k++) {
2152       aa[0] =  zero;
2153       aa   += bs;
2154     }
2155     if (diag != (PetscScalar)0.0) {
2156       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2157     }
2158   }
2159   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2164 {
2165   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2166   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2167   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2168   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2169   PetscErrorCode ierr;
2170   PetscInt       ridx,cidx,bs2=a->bs2;
2171   PetscBool      roworiented=a->roworiented;
2172   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2173 
2174   PetscFunctionBegin;
2175   for (k=0; k<m; k++) { /* loop over added rows */
2176     row  = im[k];
2177     brow = row/bs;
2178     if (row < 0) continue;
2179 #if defined(PETSC_USE_DEBUG)
2180     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2181 #endif
2182     rp   = aj + ai[brow];
2183     if (!A->structure_only) ap = aa + bs2*ai[brow];
2184     rmax = imax[brow];
2185     nrow = ailen[brow];
2186     low  = 0;
2187     high = nrow;
2188     for (l=0; l<n; l++) { /* loop over added columns */
2189       if (in[l] < 0) continue;
2190 #if defined(PETSC_USE_DEBUG)
2191       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2192 #endif
2193       col  = in[l]; bcol = col/bs;
2194       ridx = row % bs; cidx = col % bs;
2195       if (!A->structure_only) {
2196         if (roworiented) {
2197           value = v[l + k*n];
2198         } else {
2199           value = v[k + l*m];
2200         }
2201       }
2202       if (col <= lastcol) low = 0; else high = nrow;
2203       lastcol = col;
2204       while (high-low > 7) {
2205         t = (low+high)/2;
2206         if (rp[t] > bcol) high = t;
2207         else              low  = t;
2208       }
2209       for (i=low; i<high; i++) {
2210         if (rp[i] > bcol) break;
2211         if (rp[i] == bcol) {
2212           bap = ap +  bs2*i + bs*cidx + ridx;
2213           if (!A->structure_only) {
2214             if (is == ADD_VALUES) *bap += value;
2215             else                  *bap  = value;
2216           }
2217           goto noinsert1;
2218         }
2219       }
2220       if (nonew == 1) goto noinsert1;
2221       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2222       if (A->structure_only) {
2223         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2224       } else {
2225         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2226       }
2227       N = nrow++ - 1; high++;
2228       /* shift up all the later entries in this row */
2229       for (ii=N; ii>=i; ii--) {
2230         rp[ii+1] = rp[ii];
2231         if (!A->structure_only) {
2232           ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2233         }
2234       }
2235       if (N>=i && !A->structure_only) {
2236         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2237       }
2238       rp[i]                      = bcol;
2239       if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2240       a->nz++;
2241       A->nonzerostate++;
2242 noinsert1:;
2243       low = i;
2244     }
2245     ailen[brow] = nrow;
2246   }
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2251 {
2252   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2253   Mat            outA;
2254   PetscErrorCode ierr;
2255   PetscBool      row_identity,col_identity;
2256 
2257   PetscFunctionBegin;
2258   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2259   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2260   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2261   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2262 
2263   outA            = inA;
2264   inA->factortype = MAT_FACTOR_LU;
2265   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2266   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2267 
2268   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2269 
2270   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2271   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2272   a->row = row;
2273   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2274   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2275   a->col = col;
2276 
2277   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2278   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2279   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2280   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2281 
2282   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2283   if (!a->solve_work) {
2284     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2285     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2286   }
2287   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2292 {
2293   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2294   PetscInt    i,nz,mbs;
2295 
2296   PetscFunctionBegin;
2297   nz  = baij->maxnz;
2298   mbs = baij->mbs;
2299   for (i=0; i<nz; i++) {
2300     baij->j[i] = indices[i];
2301   }
2302   baij->nz = nz;
2303   for (i=0; i<mbs; i++) {
2304     baij->ilen[i] = baij->imax[i];
2305   }
2306   PetscFunctionReturn(0);
2307 }
2308 
2309 /*@
2310     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2311        in the matrix.
2312 
2313   Input Parameters:
2314 +  mat - the SeqBAIJ matrix
2315 -  indices - the column indices
2316 
2317   Level: advanced
2318 
2319   Notes:
2320     This can be called if you have precomputed the nonzero structure of the
2321   matrix and want to provide it to the matrix object to improve the performance
2322   of the MatSetValues() operation.
2323 
2324     You MUST have set the correct numbers of nonzeros per row in the call to
2325   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2326 
2327     MUST be called before any calls to MatSetValues();
2328 
2329 @*/
2330 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2331 {
2332   PetscErrorCode ierr;
2333 
2334   PetscFunctionBegin;
2335   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2336   PetscValidPointer(indices,2);
2337   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2342 {
2343   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2344   PetscErrorCode ierr;
2345   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2346   PetscReal      atmp;
2347   PetscScalar    *x,zero = 0.0;
2348   MatScalar      *aa;
2349   PetscInt       ncols,brow,krow,kcol;
2350 
2351   PetscFunctionBegin;
2352   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2353   bs  = A->rmap->bs;
2354   aa  = a->a;
2355   ai  = a->i;
2356   aj  = a->j;
2357   mbs = a->mbs;
2358 
2359   ierr = VecSet(v,zero);CHKERRQ(ierr);
2360   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2361   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2362   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2363   for (i=0; i<mbs; i++) {
2364     ncols = ai[1] - ai[0]; ai++;
2365     brow  = bs*i;
2366     for (j=0; j<ncols; j++) {
2367       for (kcol=0; kcol<bs; kcol++) {
2368         for (krow=0; krow<bs; krow++) {
2369           atmp = PetscAbsScalar(*aa);aa++;
2370           row  = brow + krow;   /* row index */
2371           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2372         }
2373       }
2374       aj++;
2375     }
2376   }
2377   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2382 {
2383   PetscErrorCode ierr;
2384 
2385   PetscFunctionBegin;
2386   /* If the two matrices have the same copy implementation, use fast copy. */
2387   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2388     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2389     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2390     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2391 
2392     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2393     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2394     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2395     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2396   } else {
2397     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2398   }
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2403 {
2404   PetscErrorCode ierr;
2405 
2406   PetscFunctionBegin;
2407   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2412 {
2413   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2414 
2415   PetscFunctionBegin;
2416   *array = a->a;
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2421 {
2422   PetscFunctionBegin;
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2427 {
2428   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2429   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2430   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2431   PetscErrorCode ierr;
2432 
2433   PetscFunctionBegin;
2434   /* Set the number of nonzeros in the new matrix */
2435   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2440 {
2441   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2442   PetscErrorCode ierr;
2443   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2444   PetscBLASInt   one=1;
2445 
2446   PetscFunctionBegin;
2447   if (str == SAME_NONZERO_PATTERN) {
2448     PetscScalar  alpha = a;
2449     PetscBLASInt bnz;
2450     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2451     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2452     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2453   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2454     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2455   } else {
2456     Mat      B;
2457     PetscInt *nnz;
2458     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2459     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2460     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2461     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2462     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2463     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2464     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2465     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2466     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2467     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2468     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2469     ierr = PetscFree(nnz);CHKERRQ(ierr);
2470   }
2471   PetscFunctionReturn(0);
2472 }
2473 
2474 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2475 {
2476   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2477   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2478   MatScalar   *aa = a->a;
2479 
2480   PetscFunctionBegin;
2481   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2486 {
2487   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2488   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2489   MatScalar   *aa = a->a;
2490 
2491   PetscFunctionBegin;
2492   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 /*
2497     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2498 */
2499 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2500 {
2501   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2502   PetscErrorCode ierr;
2503   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2504   PetscInt       nz = a->i[m],row,*jj,mr,col;
2505 
2506   PetscFunctionBegin;
2507   *nn = n;
2508   if (!ia) PetscFunctionReturn(0);
2509   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2510   else {
2511     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2512     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2513     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2514     jj   = a->j;
2515     for (i=0; i<nz; i++) {
2516       collengths[jj[i]]++;
2517     }
2518     cia[0] = oshift;
2519     for (i=0; i<n; i++) {
2520       cia[i+1] = cia[i] + collengths[i];
2521     }
2522     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2523     jj   = a->j;
2524     for (row=0; row<m; row++) {
2525       mr = a->i[row+1] - a->i[row];
2526       for (i=0; i<mr; i++) {
2527         col = *jj++;
2528 
2529         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2530       }
2531     }
2532     ierr = PetscFree(collengths);CHKERRQ(ierr);
2533     *ia  = cia; *ja = cja;
2534   }
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2539 {
2540   PetscErrorCode ierr;
2541 
2542   PetscFunctionBegin;
2543   if (!ia) PetscFunctionReturn(0);
2544   ierr = PetscFree(*ia);CHKERRQ(ierr);
2545   ierr = PetscFree(*ja);CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 /*
2550  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2551  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2552  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2553  */
2554 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2555 {
2556   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2557   PetscErrorCode ierr;
2558   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2559   PetscInt       nz = a->i[m],row,*jj,mr,col;
2560   PetscInt       *cspidx;
2561 
2562   PetscFunctionBegin;
2563   *nn = n;
2564   if (!ia) PetscFunctionReturn(0);
2565 
2566   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2567   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2568   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2569   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2570   jj   = a->j;
2571   for (i=0; i<nz; i++) {
2572     collengths[jj[i]]++;
2573   }
2574   cia[0] = oshift;
2575   for (i=0; i<n; i++) {
2576     cia[i+1] = cia[i] + collengths[i];
2577   }
2578   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2579   jj   = a->j;
2580   for (row=0; row<m; row++) {
2581     mr = a->i[row+1] - a->i[row];
2582     for (i=0; i<mr; i++) {
2583       col = *jj++;
2584       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2585       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2586     }
2587   }
2588   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2589   *ia    = cia; *ja = cja;
2590   *spidx = cspidx;
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2595 {
2596   PetscErrorCode ierr;
2597 
2598   PetscFunctionBegin;
2599   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2600   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2605 {
2606   PetscErrorCode ierr;
2607   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2608 
2609   PetscFunctionBegin;
2610   if (!Y->preallocated || !aij->nz) {
2611     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2612   }
2613   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2614   PetscFunctionReturn(0);
2615 }
2616 
2617 /* -------------------------------------------------------------------*/
2618 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2619                                        MatGetRow_SeqBAIJ,
2620                                        MatRestoreRow_SeqBAIJ,
2621                                        MatMult_SeqBAIJ_N,
2622                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2623                                        MatMultTranspose_SeqBAIJ,
2624                                        MatMultTransposeAdd_SeqBAIJ,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                /* 10*/ 0,
2629                                        MatLUFactor_SeqBAIJ,
2630                                        0,
2631                                        0,
2632                                        MatTranspose_SeqBAIJ,
2633                                /* 15*/ MatGetInfo_SeqBAIJ,
2634                                        MatEqual_SeqBAIJ,
2635                                        MatGetDiagonal_SeqBAIJ,
2636                                        MatDiagonalScale_SeqBAIJ,
2637                                        MatNorm_SeqBAIJ,
2638                                /* 20*/ 0,
2639                                        MatAssemblyEnd_SeqBAIJ,
2640                                        MatSetOption_SeqBAIJ,
2641                                        MatZeroEntries_SeqBAIJ,
2642                                /* 24*/ MatZeroRows_SeqBAIJ,
2643                                        0,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                /* 29*/ MatSetUp_SeqBAIJ,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                /* 34*/ MatDuplicate_SeqBAIJ,
2653                                        0,
2654                                        0,
2655                                        MatILUFactor_SeqBAIJ,
2656                                        0,
2657                                /* 39*/ MatAXPY_SeqBAIJ,
2658                                        MatCreateSubMatrices_SeqBAIJ,
2659                                        MatIncreaseOverlap_SeqBAIJ,
2660                                        MatGetValues_SeqBAIJ,
2661                                        MatCopy_SeqBAIJ,
2662                                /* 44*/ 0,
2663                                        MatScale_SeqBAIJ,
2664                                        MatShift_SeqBAIJ,
2665                                        0,
2666                                        MatZeroRowsColumns_SeqBAIJ,
2667                                /* 49*/ 0,
2668                                        MatGetRowIJ_SeqBAIJ,
2669                                        MatRestoreRowIJ_SeqBAIJ,
2670                                        MatGetColumnIJ_SeqBAIJ,
2671                                        MatRestoreColumnIJ_SeqBAIJ,
2672                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                        MatSetValuesBlocked_SeqBAIJ,
2677                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2678                                        MatDestroy_SeqBAIJ,
2679                                        MatView_SeqBAIJ,
2680                                        0,
2681                                        0,
2682                                /* 64*/ 0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2688                                        0,
2689                                        MatConvert_Basic,
2690                                        0,
2691                                        0,
2692                                /* 74*/ 0,
2693                                        MatFDColoringApply_BAIJ,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                /* 79*/ 0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                        MatLoad_SeqBAIJ,
2702                                /* 84*/ 0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                /* 89*/ 0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                /* 94*/ 0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                /* 99*/ 0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                        0,
2722                                /*104*/ 0,
2723                                        MatRealPart_SeqBAIJ,
2724                                        MatImaginaryPart_SeqBAIJ,
2725                                        0,
2726                                        0,
2727                                /*109*/ 0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                        MatMissingDiagonal_SeqBAIJ,
2732                                /*114*/ 0,
2733                                        0,
2734                                        0,
2735                                        0,
2736                                        0,
2737                                /*119*/ 0,
2738                                        0,
2739                                        MatMultHermitianTranspose_SeqBAIJ,
2740                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2741                                        0,
2742                                /*124*/ 0,
2743                                        0,
2744                                        MatInvertBlockDiagonal_SeqBAIJ,
2745                                        0,
2746                                        0,
2747                                /*129*/ 0,
2748                                        0,
2749                                        0,
2750                                        0,
2751                                        0,
2752                                /*134*/ 0,
2753                                        0,
2754                                        0,
2755                                        0,
2756                                        0,
2757                                /*139*/ MatSetBlockSizes_Default,
2758                                        0,
2759                                        0,
2760                                        MatFDColoringSetUp_SeqXAIJ,
2761                                        0,
2762                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2763                                        MatDestroySubMatrices_SeqBAIJ
2764 };
2765 
2766 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2767 {
2768   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2769   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2770   PetscErrorCode ierr;
2771 
2772   PetscFunctionBegin;
2773   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2774 
2775   /* allocate space for values if not already there */
2776   if (!aij->saved_values) {
2777     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2778     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2779   }
2780 
2781   /* copy values over */
2782   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2787 {
2788   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2789   PetscErrorCode ierr;
2790   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2791 
2792   PetscFunctionBegin;
2793   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2794   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2795 
2796   /* copy values over */
2797   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2802 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2803 
2804 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2805 {
2806   Mat_SeqBAIJ    *b;
2807   PetscErrorCode ierr;
2808   PetscInt       i,mbs,nbs,bs2;
2809   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2810 
2811   PetscFunctionBegin;
2812   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2813   if (nz == MAT_SKIP_ALLOCATION) {
2814     skipallocation = PETSC_TRUE;
2815     nz             = 0;
2816   }
2817 
2818   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2819   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2820   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2821   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2822 
2823   B->preallocated = PETSC_TRUE;
2824 
2825   mbs = B->rmap->n/bs;
2826   nbs = B->cmap->n/bs;
2827   bs2 = bs*bs;
2828 
2829   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2830 
2831   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2832   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2833   if (nnz) {
2834     for (i=0; i<mbs; i++) {
2835       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2836       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2837     }
2838   }
2839 
2840   b    = (Mat_SeqBAIJ*)B->data;
2841   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2842   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2843   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2844 
2845   if (!flg) {
2846     switch (bs) {
2847     case 1:
2848       B->ops->mult    = MatMult_SeqBAIJ_1;
2849       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2850       break;
2851     case 2:
2852       B->ops->mult    = MatMult_SeqBAIJ_2;
2853       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2854       break;
2855     case 3:
2856       B->ops->mult    = MatMult_SeqBAIJ_3;
2857       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2858       break;
2859     case 4:
2860       B->ops->mult    = MatMult_SeqBAIJ_4;
2861       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2862       break;
2863     case 5:
2864       B->ops->mult    = MatMult_SeqBAIJ_5;
2865       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2866       break;
2867     case 6:
2868       B->ops->mult    = MatMult_SeqBAIJ_6;
2869       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2870       break;
2871     case 7:
2872       B->ops->mult    = MatMult_SeqBAIJ_7;
2873       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2874       break;
2875     case 11:
2876       B->ops->mult    = MatMult_SeqBAIJ_11;
2877       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2878       break;
2879     case 15:
2880       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2881       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2882       break;
2883     default:
2884       B->ops->mult    = MatMult_SeqBAIJ_N;
2885       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2886       break;
2887     }
2888   }
2889   B->ops->sor = MatSOR_SeqBAIJ;
2890   b->mbs = mbs;
2891   b->nbs = nbs;
2892   if (!skipallocation) {
2893     if (!b->imax) {
2894       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2895       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2896 
2897       b->free_imax_ilen = PETSC_TRUE;
2898     }
2899     /* b->ilen will count nonzeros in each block row so far. */
2900     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2901     if (!nnz) {
2902       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2903       else if (nz < 0) nz = 1;
2904       for (i=0; i<mbs; i++) b->imax[i] = nz;
2905       nz = nz*mbs;
2906     } else {
2907       nz = 0;
2908       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2909     }
2910 
2911     /* allocate the matrix space */
2912     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2913     if (B->structure_only) {
2914       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2915       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2916       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2917     } else {
2918       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2919       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2920       ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2921     }
2922     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2923 
2924     if (B->structure_only) {
2925       b->singlemalloc = PETSC_FALSE;
2926       b->free_a       = PETSC_FALSE;
2927     } else {
2928       b->singlemalloc = PETSC_TRUE;
2929       b->free_a       = PETSC_TRUE;
2930     }
2931     b->free_ij = PETSC_TRUE;
2932 
2933     b->i[0] = 0;
2934     for (i=1; i<mbs+1; i++) {
2935       b->i[i] = b->i[i-1] + b->imax[i-1];
2936     }
2937 
2938   } else {
2939     b->free_a  = PETSC_FALSE;
2940     b->free_ij = PETSC_FALSE;
2941   }
2942 
2943   b->bs2              = bs2;
2944   b->mbs              = mbs;
2945   b->nz               = 0;
2946   b->maxnz            = nz;
2947   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2948   B->was_assembled    = PETSC_FALSE;
2949   B->assembled        = PETSC_FALSE;
2950   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2951   PetscFunctionReturn(0);
2952 }
2953 
2954 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2955 {
2956   PetscInt       i,m,nz,nz_max=0,*nnz;
2957   PetscScalar    *values=0;
2958   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2959   PetscErrorCode ierr;
2960 
2961   PetscFunctionBegin;
2962   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2963   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2964   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2965   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2966   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2967   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2968   m    = B->rmap->n/bs;
2969 
2970   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2971   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2972   for (i=0; i<m; i++) {
2973     nz = ii[i+1]- ii[i];
2974     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2975     nz_max = PetscMax(nz_max, nz);
2976     nnz[i] = nz;
2977   }
2978   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2979   ierr = PetscFree(nnz);CHKERRQ(ierr);
2980 
2981   values = (PetscScalar*)V;
2982   if (!values) {
2983     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2984   }
2985   for (i=0; i<m; i++) {
2986     PetscInt          ncols  = ii[i+1] - ii[i];
2987     const PetscInt    *icols = jj + ii[i];
2988     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2989     if (!roworiented) {
2990       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2991     } else {
2992       PetscInt j;
2993       for (j=0; j<ncols; j++) {
2994         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2995         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2996       }
2997     }
2998   }
2999   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3000   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3001   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3002   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 /*MC
3007    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3008    block sparse compressed row format.
3009 
3010    Options Database Keys:
3011 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3012 
3013   Level: beginner
3014 
3015 .seealso: MatCreateSeqBAIJ()
3016 M*/
3017 
3018 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3019 
3020 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3021 {
3022   PetscErrorCode ierr;
3023   PetscMPIInt    size;
3024   Mat_SeqBAIJ    *b;
3025 
3026   PetscFunctionBegin;
3027   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3028   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3029 
3030   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3031   B->data = (void*)b;
3032   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3033 
3034   b->row          = 0;
3035   b->col          = 0;
3036   b->icol         = 0;
3037   b->reallocs     = 0;
3038   b->saved_values = 0;
3039 
3040   b->roworiented        = PETSC_TRUE;
3041   b->nonew              = 0;
3042   b->diag               = 0;
3043   B->spptr              = 0;
3044   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3045   b->keepnonzeropattern = PETSC_FALSE;
3046 
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3056 #if defined(PETSC_HAVE_HYPRE)
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3058 #endif
3059   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3060   PetscFunctionReturn(0);
3061 }
3062 
3063 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3064 {
3065   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3066   PetscErrorCode ierr;
3067   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3068 
3069   PetscFunctionBegin;
3070   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3071 
3072   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3073     c->imax           = a->imax;
3074     c->ilen           = a->ilen;
3075     c->free_imax_ilen = PETSC_FALSE;
3076   } else {
3077     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3078     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3079     for (i=0; i<mbs; i++) {
3080       c->imax[i] = a->imax[i];
3081       c->ilen[i] = a->ilen[i];
3082     }
3083     c->free_imax_ilen = PETSC_TRUE;
3084   }
3085 
3086   /* allocate the matrix space */
3087   if (mallocmatspace) {
3088     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3089       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3090       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3091 
3092       c->i            = a->i;
3093       c->j            = a->j;
3094       c->singlemalloc = PETSC_FALSE;
3095       c->free_a       = PETSC_TRUE;
3096       c->free_ij      = PETSC_FALSE;
3097       c->parent       = A;
3098       C->preallocated = PETSC_TRUE;
3099       C->assembled    = PETSC_TRUE;
3100 
3101       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3102       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3103       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3104     } else {
3105       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3106       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3107 
3108       c->singlemalloc = PETSC_TRUE;
3109       c->free_a       = PETSC_TRUE;
3110       c->free_ij      = PETSC_TRUE;
3111 
3112       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3113       if (mbs > 0) {
3114         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3115         if (cpvalues == MAT_COPY_VALUES) {
3116           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3117         } else {
3118           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3119         }
3120       }
3121       C->preallocated = PETSC_TRUE;
3122       C->assembled    = PETSC_TRUE;
3123     }
3124   }
3125 
3126   c->roworiented = a->roworiented;
3127   c->nonew       = a->nonew;
3128 
3129   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3130   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3131 
3132   c->bs2         = a->bs2;
3133   c->mbs         = a->mbs;
3134   c->nbs         = a->nbs;
3135 
3136   if (a->diag) {
3137     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3138       c->diag      = a->diag;
3139       c->free_diag = PETSC_FALSE;
3140     } else {
3141       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3142       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3143       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3144       c->free_diag = PETSC_TRUE;
3145     }
3146   } else c->diag = 0;
3147 
3148   c->nz         = a->nz;
3149   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3150   c->solve_work = NULL;
3151   c->mult_work  = NULL;
3152   c->sor_workt  = NULL;
3153   c->sor_work   = NULL;
3154 
3155   c->compressedrow.use   = a->compressedrow.use;
3156   c->compressedrow.nrows = a->compressedrow.nrows;
3157   if (a->compressedrow.use) {
3158     i    = a->compressedrow.nrows;
3159     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3160     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3161     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3162     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3163   } else {
3164     c->compressedrow.use    = PETSC_FALSE;
3165     c->compressedrow.i      = NULL;
3166     c->compressedrow.rindex = NULL;
3167   }
3168   C->nonzerostate = A->nonzerostate;
3169 
3170   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3171   PetscFunctionReturn(0);
3172 }
3173 
3174 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3175 {
3176   PetscErrorCode ierr;
3177 
3178   PetscFunctionBegin;
3179   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3180   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3181   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3182   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3183   PetscFunctionReturn(0);
3184 }
3185 
3186 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3187 {
3188   Mat_SeqBAIJ    *a;
3189   PetscErrorCode ierr;
3190   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3191   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3192   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3193   PetscInt       *masked,nmask,tmp,bs2,ishift;
3194   PetscMPIInt    size;
3195   int            fd;
3196   PetscScalar    *aa;
3197   MPI_Comm       comm;
3198 
3199   PetscFunctionBegin;
3200   /* force binary viewer to load .info file if it has not yet done so */
3201   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3202   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3203   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3204   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3205   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3206   if (bs < 0) bs = 1;
3207   bs2  = bs*bs;
3208 
3209   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3210   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3211   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3212   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3213   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3214   M = header[1]; N = header[2]; nz = header[3];
3215 
3216   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3217   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3218 
3219   /*
3220      This code adds extra rows to make sure the number of rows is
3221     divisible by the blocksize
3222   */
3223   mbs        = M/bs;
3224   extra_rows = bs - M + bs*(mbs);
3225   if (extra_rows == bs) extra_rows = 0;
3226   else mbs++;
3227   if (extra_rows) {
3228     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3229   }
3230 
3231   /* Set global sizes if not already set */
3232   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3233     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3234   } else { /* Check if the matrix global sizes are correct */
3235     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3236     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3237       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3238     }
3239     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3240   }
3241 
3242   /* read in row lengths */
3243   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3244   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3245   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3246 
3247   /* read in column indices */
3248   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3249   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3250   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3251 
3252   /* loop over row lengths determining block row lengths */
3253   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3254   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3255   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3256   rowcount = 0;
3257   nzcount  = 0;
3258   for (i=0; i<mbs; i++) {
3259     nmask = 0;
3260     for (j=0; j<bs; j++) {
3261       kmax = rowlengths[rowcount];
3262       for (k=0; k<kmax; k++) {
3263         tmp = jj[nzcount++]/bs;
3264         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3265       }
3266       rowcount++;
3267     }
3268     browlengths[i] += nmask;
3269     /* zero out the mask elements we set */
3270     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3271   }
3272 
3273   /* Do preallocation  */
3274   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3275   a    = (Mat_SeqBAIJ*)newmat->data;
3276 
3277   /* set matrix "i" values */
3278   a->i[0] = 0;
3279   for (i=1; i<= mbs; i++) {
3280     a->i[i]      = a->i[i-1] + browlengths[i-1];
3281     a->ilen[i-1] = browlengths[i-1];
3282   }
3283   a->nz = 0;
3284   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3285 
3286   /* read in nonzero values */
3287   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3288   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3289   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3290 
3291   /* set "a" and "j" values into matrix */
3292   nzcount = 0; jcount = 0;
3293   for (i=0; i<mbs; i++) {
3294     nzcountb = nzcount;
3295     nmask    = 0;
3296     for (j=0; j<bs; j++) {
3297       kmax = rowlengths[i*bs+j];
3298       for (k=0; k<kmax; k++) {
3299         tmp = jj[nzcount++]/bs;
3300         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3301       }
3302     }
3303     /* sort the masked values */
3304     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3305 
3306     /* set "j" values into matrix */
3307     maskcount = 1;
3308     for (j=0; j<nmask; j++) {
3309       a->j[jcount++]  = masked[j];
3310       mask[masked[j]] = maskcount++;
3311     }
3312     /* set "a" values into matrix */
3313     ishift = bs2*a->i[i];
3314     for (j=0; j<bs; j++) {
3315       kmax = rowlengths[i*bs+j];
3316       for (k=0; k<kmax; k++) {
3317         tmp       = jj[nzcountb]/bs;
3318         block     = mask[tmp] - 1;
3319         point     = jj[nzcountb] - bs*tmp;
3320         idx       = ishift + bs2*block + j + bs*point;
3321         a->a[idx] = (MatScalar)aa[nzcountb++];
3322       }
3323     }
3324     /* zero out the mask elements we set */
3325     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3326   }
3327   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3328 
3329   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3330   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3331   ierr = PetscFree(aa);CHKERRQ(ierr);
3332   ierr = PetscFree(jj);CHKERRQ(ierr);
3333   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3334 
3335   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3336   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3337   PetscFunctionReturn(0);
3338 }
3339 
3340 /*@C
3341    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3342    compressed row) format.  For good matrix assembly performance the
3343    user should preallocate the matrix storage by setting the parameter nz
3344    (or the array nnz).  By setting these parameters accurately, performance
3345    during matrix assembly can be increased by more than a factor of 50.
3346 
3347    Collective on MPI_Comm
3348 
3349    Input Parameters:
3350 +  comm - MPI communicator, set to PETSC_COMM_SELF
3351 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3352           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3353 .  m - number of rows
3354 .  n - number of columns
3355 .  nz - number of nonzero blocks  per block row (same for all rows)
3356 -  nnz - array containing the number of nonzero blocks in the various block rows
3357          (possibly different for each block row) or NULL
3358 
3359    Output Parameter:
3360 .  A - the matrix
3361 
3362    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3363    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3364    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3365 
3366    Options Database Keys:
3367 .   -mat_no_unroll - uses code that does not unroll the loops in the
3368                      block calculations (much slower)
3369 .    -mat_block_size - size of the blocks to use
3370 
3371    Level: intermediate
3372 
3373    Notes:
3374    The number of rows and columns must be divisible by blocksize.
3375 
3376    If the nnz parameter is given then the nz parameter is ignored
3377 
3378    A nonzero block is any block that as 1 or more nonzeros in it
3379 
3380    The block AIJ format is fully compatible with standard Fortran 77
3381    storage.  That is, the stored row and column indices can begin at
3382    either one (as in Fortran) or zero.  See the users' manual for details.
3383 
3384    Specify the preallocated storage with either nz or nnz (not both).
3385    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3386    allocation.  See Users-Manual: ch_mat for details.
3387    matrices.
3388 
3389 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3390 @*/
3391 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3392 {
3393   PetscErrorCode ierr;
3394 
3395   PetscFunctionBegin;
3396   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3397   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3398   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3399   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3400   PetscFunctionReturn(0);
3401 }
3402 
3403 /*@C
3404    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3405    per row in the matrix. For good matrix assembly performance the
3406    user should preallocate the matrix storage by setting the parameter nz
3407    (or the array nnz).  By setting these parameters accurately, performance
3408    during matrix assembly can be increased by more than a factor of 50.
3409 
3410    Collective on MPI_Comm
3411 
3412    Input Parameters:
3413 +  B - the matrix
3414 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3415           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3416 .  nz - number of block nonzeros per block row (same for all rows)
3417 -  nnz - array containing the number of block nonzeros in the various block rows
3418          (possibly different for each block row) or NULL
3419 
3420    Options Database Keys:
3421 .   -mat_no_unroll - uses code that does not unroll the loops in the
3422                      block calculations (much slower)
3423 .   -mat_block_size - size of the blocks to use
3424 
3425    Level: intermediate
3426 
3427    Notes:
3428    If the nnz parameter is given then the nz parameter is ignored
3429 
3430    You can call MatGetInfo() to get information on how effective the preallocation was;
3431    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3432    You can also run with the option -info and look for messages with the string
3433    malloc in them to see if additional memory allocation was needed.
3434 
3435    The block AIJ format is fully compatible with standard Fortran 77
3436    storage.  That is, the stored row and column indices can begin at
3437    either one (as in Fortran) or zero.  See the users' manual for details.
3438 
3439    Specify the preallocated storage with either nz or nnz (not both).
3440    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3441    allocation.  See Users-Manual: ch_mat for details.
3442 
3443 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3444 @*/
3445 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3446 {
3447   PetscErrorCode ierr;
3448 
3449   PetscFunctionBegin;
3450   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3451   PetscValidType(B,1);
3452   PetscValidLogicalCollectiveInt(B,bs,2);
3453   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3454   PetscFunctionReturn(0);
3455 }
3456 
3457 /*@C
3458    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3459    (the default sequential PETSc format).
3460 
3461    Collective on MPI_Comm
3462 
3463    Input Parameters:
3464 +  B - the matrix
3465 .  i - the indices into j for the start of each local row (starts with zero)
3466 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3467 -  v - optional values in the matrix
3468 
3469    Level: developer
3470 
3471    Notes:
3472    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3473    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3474    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3475    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3476    block column and the second index is over columns within a block.
3477 
3478 .keywords: matrix, aij, compressed row, sparse
3479 
3480 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3481 @*/
3482 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3483 {
3484   PetscErrorCode ierr;
3485 
3486   PetscFunctionBegin;
3487   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3488   PetscValidType(B,1);
3489   PetscValidLogicalCollectiveInt(B,bs,2);
3490   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3491   PetscFunctionReturn(0);
3492 }
3493 
3494 
3495 /*@
3496      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3497 
3498      Collective on MPI_Comm
3499 
3500    Input Parameters:
3501 +  comm - must be an MPI communicator of size 1
3502 .  bs - size of block
3503 .  m - number of rows
3504 .  n - number of columns
3505 .  i - row indices
3506 .  j - column indices
3507 -  a - matrix values
3508 
3509    Output Parameter:
3510 .  mat - the matrix
3511 
3512    Level: advanced
3513 
3514    Notes:
3515        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3516     once the matrix is destroyed
3517 
3518        You cannot set new nonzero locations into this matrix, that will generate an error.
3519 
3520        The i and j indices are 0 based
3521 
3522        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3523 
3524       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3525       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3526       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3527       with column-major ordering within blocks.
3528 
3529 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3530 
3531 @*/
3532 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3533 {
3534   PetscErrorCode ierr;
3535   PetscInt       ii;
3536   Mat_SeqBAIJ    *baij;
3537 
3538   PetscFunctionBegin;
3539   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3540   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3541 
3542   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3543   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3544   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3545   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3546   baij = (Mat_SeqBAIJ*)(*mat)->data;
3547   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3548   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3549 
3550   baij->i = i;
3551   baij->j = j;
3552   baij->a = a;
3553 
3554   baij->singlemalloc = PETSC_FALSE;
3555   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3556   baij->free_a       = PETSC_FALSE;
3557   baij->free_ij      = PETSC_FALSE;
3558 
3559   for (ii=0; ii<m; ii++) {
3560     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3561 #if defined(PETSC_USE_DEBUG)
3562     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3563 #endif
3564   }
3565 #if defined(PETSC_USE_DEBUG)
3566   for (ii=0; ii<baij->i[m]; ii++) {
3567     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3568     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3569   }
3570 #endif
3571 
3572   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3573   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3574   PetscFunctionReturn(0);
3575 }
3576 
3577 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3578 {
3579   PetscErrorCode ierr;
3580   PetscMPIInt    size;
3581 
3582   PetscFunctionBegin;
3583   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3584   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3585     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3586   } else {
3587     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3588   }
3589   PetscFunctionReturn(0);
3590 }
3591