xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 9b2437101ac1db3c201d7be9c485cbee0b1dde4b)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
17 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
18 {
19   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
20   PetscErrorCode ierr;
21   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
22   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
23   PetscReal      shift = 0.0;
24   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
25 
26   PetscFunctionBegin;
27   allowzeropivot = PetscNot(A->erroriffailure);
28 
29   if (a->idiagvalid) {
30     if (values) *values = a->idiag;
31     PetscFunctionReturn(0);
32   }
33   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
34   diag_offset = a->diag;
35   if (!a->idiag) {
36     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
37     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
38   }
39   diag  = a->idiag;
40   mdiag = a->idiag+bs2*mbs;
41   if (values) *values = a->idiag;
42   /* factor and invert each block */
43   switch (bs) {
44   case 1:
45     for (i=0; i<mbs; i++) {
46       odiag    = v + 1*diag_offset[i];
47       diag[0]  = odiag[0];
48       mdiag[0] = odiag[0];
49 
50       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
51         if (allowzeropivot) {
52           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
53           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
54           A->factorerror_zeropivot_row   = i;
55           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
56         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
57       }
58 
59       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
60       diag    += 1;
61       mdiag   += 1;
62     }
63     break;
64   case 2:
65     for (i=0; i<mbs; i++) {
66       odiag    = v + 4*diag_offset[i];
67       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
70       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
71       diag    += 4;
72       mdiag   += 4;
73     }
74     break;
75   case 3:
76     for (i=0; i<mbs; i++) {
77       odiag    = v + 9*diag_offset[i];
78       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
80       diag[8]  = odiag[8];
81       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
82       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
83       mdiag[8] = odiag[8];
84       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
85       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
86       diag    += 9;
87       mdiag   += 9;
88     }
89     break;
90   case 4:
91     for (i=0; i<mbs; i++) {
92       odiag  = v + 16*diag_offset[i];
93       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
94       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
96       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
97       diag  += 16;
98       mdiag += 16;
99     }
100     break;
101   case 5:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 25*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
107       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
108       diag  += 25;
109       mdiag += 25;
110     }
111     break;
112   case 6:
113     for (i=0; i<mbs; i++) {
114       odiag  = v + 36*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
118       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
119       diag  += 36;
120       mdiag += 36;
121     }
122     break;
123   case 7:
124     for (i=0; i<mbs; i++) {
125       odiag  = v + 49*diag_offset[i];
126       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
127       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
128       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
129       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
130       diag  += 49;
131       mdiag += 49;
132     }
133     break;
134   default:
135     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
136     for (i=0; i<mbs; i++) {
137       odiag  = v + bs2*diag_offset[i];
138       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
139       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
140       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
141       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
142       diag  += bs2;
143       mdiag += bs2;
144     }
145     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
146   }
147   a->idiagvalid = PETSC_TRUE;
148   PetscFunctionReturn(0);
149 }
150 
151 #undef __FUNCT__
152 #define __FUNCT__ "MatSOR_SeqBAIJ"
153 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
154 {
155   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
156   PetscScalar       *x,*work,*w,*workt,*t;
157   const MatScalar   *v,*aa = a->a, *idiag;
158   const PetscScalar *b,*xb;
159   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
160   PetscErrorCode    ierr;
161   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
162   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
163 
164   PetscFunctionBegin;
165   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
166   its = its*lits;
167   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
168   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
169   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
170   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
171   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
172 
173   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
174 
175   if (!m) PetscFunctionReturn(0);
176   diag  = a->diag;
177   idiag = a->idiag;
178   k    = PetscMax(A->rmap->n,A->cmap->n);
179   if (!a->mult_work) {
180     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
181   }
182   if (!a->sor_workt) {
183     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
184   }
185   if (!a->sor_work) {
186     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
187   }
188   work = a->mult_work;
189   t    = a->sor_workt;
190   w    = a->sor_work;
191 
192   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
193   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
194 
195   if (flag & SOR_ZERO_INITIAL_GUESS) {
196     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
197       switch (bs) {
198       case 1:
199         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
200         t[0] = b[0];
201         i2     = 1;
202         idiag += 1;
203         for (i=1; i<m; i++) {
204           v  = aa + ai[i];
205           vi = aj + ai[i];
206           nz = diag[i] - ai[i];
207           s[0] = b[i2];
208           for (j=0; j<nz; j++) {
209             xw[0] = x[vi[j]];
210             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
211           }
212           t[i2] = s[0];
213           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
214           x[i2]  = xw[0];
215           idiag += 1;
216           i2    += 1;
217         }
218         break;
219       case 2:
220         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
221         t[0] = b[0]; t[1] = b[1];
222         i2     = 2;
223         idiag += 4;
224         for (i=1; i<m; i++) {
225           v  = aa + 4*ai[i];
226           vi = aj + ai[i];
227           nz = diag[i] - ai[i];
228           s[0] = b[i2]; s[1] = b[i2+1];
229           for (j=0; j<nz; j++) {
230             idx = 2*vi[j];
231             it  = 4*j;
232             xw[0] = x[idx]; xw[1] = x[1+idx];
233             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
234           }
235           t[i2] = s[0]; t[i2+1] = s[1];
236           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
237           x[i2]   = xw[0]; x[i2+1] = xw[1];
238           idiag  += 4;
239           i2     += 2;
240         }
241         break;
242       case 3:
243         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
244         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
245         i2     = 3;
246         idiag += 9;
247         for (i=1; i<m; i++) {
248           v  = aa + 9*ai[i];
249           vi = aj + ai[i];
250           nz = diag[i] - ai[i];
251           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
252           while (nz--) {
253             idx = 3*(*vi++);
254             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
255             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
256             v  += 9;
257           }
258           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
259           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
260           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
261           idiag  += 9;
262           i2     += 3;
263         }
264         break;
265       case 4:
266         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
267         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
268         i2     = 4;
269         idiag += 16;
270         for (i=1; i<m; i++) {
271           v  = aa + 16*ai[i];
272           vi = aj + ai[i];
273           nz = diag[i] - ai[i];
274           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
275           while (nz--) {
276             idx = 4*(*vi++);
277             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
278             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
279             v  += 16;
280           }
281           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
282           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
283           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
284           idiag  += 16;
285           i2     += 4;
286         }
287         break;
288       case 5:
289         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
290         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
291         i2     = 5;
292         idiag += 25;
293         for (i=1; i<m; i++) {
294           v  = aa + 25*ai[i];
295           vi = aj + ai[i];
296           nz = diag[i] - ai[i];
297           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
298           while (nz--) {
299             idx = 5*(*vi++);
300             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
301             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
302             v  += 25;
303           }
304           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
305           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
306           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
307           idiag  += 25;
308           i2     += 5;
309         }
310         break;
311       case 6:
312         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
313         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
314         i2     = 6;
315         idiag += 36;
316         for (i=1; i<m; i++) {
317           v  = aa + 36*ai[i];
318           vi = aj + ai[i];
319           nz = diag[i] - ai[i];
320           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
321           while (nz--) {
322             idx = 6*(*vi++);
323             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
324             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
325             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
326             v  += 36;
327           }
328           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
329           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
330           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
331           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
332           idiag  += 36;
333           i2     += 6;
334         }
335         break;
336       case 7:
337         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
338         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
339         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
340         i2     = 7;
341         idiag += 49;
342         for (i=1; i<m; i++) {
343           v  = aa + 49*ai[i];
344           vi = aj + ai[i];
345           nz = diag[i] - ai[i];
346           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
347           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
348           while (nz--) {
349             idx = 7*(*vi++);
350             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
351             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
352             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
353             v  += 49;
354           }
355           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
356           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
357           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
358           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
359           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
360           idiag  += 49;
361           i2     += 7;
362         }
363         break;
364       default:
365         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
366         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
367         i2     = bs;
368         idiag += bs2;
369         for (i=1; i<m; i++) {
370           v  = aa + bs2*ai[i];
371           vi = aj + ai[i];
372           nz = diag[i] - ai[i];
373 
374           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
375           /* copy all rows of x that are needed into contiguous space */
376           workt = work;
377           for (j=0; j<nz; j++) {
378             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
379             workt += bs;
380           }
381           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
382           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
383           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
384 
385           idiag += bs2;
386           i2    += bs;
387         }
388         break;
389       }
390       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
391       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
392       xb = t;
393     }
394     else xb = b;
395     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
396       idiag = a->idiag+bs2*(a->mbs-1);
397       i2 = bs * (m-1);
398       switch (bs) {
399       case 1:
400         s[0]  = xb[i2];
401         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
402         x[i2] = xw[0];
403         i2   -= 1;
404         for (i=m-2; i>=0; i--) {
405           v  = aa + (diag[i]+1);
406           vi = aj + diag[i] + 1;
407           nz = ai[i+1] - diag[i] - 1;
408           s[0] = xb[i2];
409           for (j=0; j<nz; j++) {
410             xw[0] = x[vi[j]];
411             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
412           }
413           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
414           x[i2]  = xw[0];
415           idiag -= 1;
416           i2    -= 1;
417         }
418         break;
419       case 2:
420         s[0]  = xb[i2]; s[1] = xb[i2+1];
421         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
422         x[i2] = xw[0]; x[i2+1] = xw[1];
423         i2    -= 2;
424         idiag -= 4;
425         for (i=m-2; i>=0; i--) {
426           v  = aa + 4*(diag[i] + 1);
427           vi = aj + diag[i] + 1;
428           nz = ai[i+1] - diag[i] - 1;
429           s[0] = xb[i2]; s[1] = xb[i2+1];
430           for (j=0; j<nz; j++) {
431             idx = 2*vi[j];
432             it  = 4*j;
433             xw[0] = x[idx]; xw[1] = x[1+idx];
434             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
435           }
436           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
437           x[i2]   = xw[0]; x[i2+1] = xw[1];
438           idiag  -= 4;
439           i2     -= 2;
440         }
441         break;
442       case 3:
443         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
444         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
445         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
446         i2    -= 3;
447         idiag -= 9;
448         for (i=m-2; i>=0; i--) {
449           v  = aa + 9*(diag[i]+1);
450           vi = aj + diag[i] + 1;
451           nz = ai[i+1] - diag[i] - 1;
452           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
453           while (nz--) {
454             idx = 3*(*vi++);
455             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
456             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
457             v  += 9;
458           }
459           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
460           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
461           idiag  -= 9;
462           i2     -= 3;
463         }
464         break;
465       case 4:
466         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
467         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
468         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
469         i2    -= 4;
470         idiag -= 16;
471         for (i=m-2; i>=0; i--) {
472           v  = aa + 16*(diag[i]+1);
473           vi = aj + diag[i] + 1;
474           nz = ai[i+1] - diag[i] - 1;
475           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
476           while (nz--) {
477             idx = 4*(*vi++);
478             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
479             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
480             v  += 16;
481           }
482           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
483           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
484           idiag  -= 16;
485           i2     -= 4;
486         }
487         break;
488       case 5:
489         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
490         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
491         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
492         i2    -= 5;
493         idiag -= 25;
494         for (i=m-2; i>=0; i--) {
495           v  = aa + 25*(diag[i]+1);
496           vi = aj + diag[i] + 1;
497           nz = ai[i+1] - diag[i] - 1;
498           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
499           while (nz--) {
500             idx = 5*(*vi++);
501             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
502             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
503             v  += 25;
504           }
505           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
506           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
507           idiag  -= 25;
508           i2     -= 5;
509         }
510         break;
511       case 6:
512         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
513         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
514         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
515         i2    -= 6;
516         idiag -= 36;
517         for (i=m-2; i>=0; i--) {
518           v  = aa + 36*(diag[i]+1);
519           vi = aj + diag[i] + 1;
520           nz = ai[i+1] - diag[i] - 1;
521           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
522           while (nz--) {
523             idx = 6*(*vi++);
524             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
525             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
526             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
527             v  += 36;
528           }
529           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
530           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
531           idiag  -= 36;
532           i2     -= 6;
533         }
534         break;
535       case 7:
536         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
537         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
538         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
539         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541         i2    -= 7;
542         idiag -= 49;
543         for (i=m-2; i>=0; i--) {
544           v  = aa + 49*(diag[i]+1);
545           vi = aj + diag[i] + 1;
546           nz = ai[i+1] - diag[i] - 1;
547           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
548           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
549           while (nz--) {
550             idx = 7*(*vi++);
551             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
552             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
553             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
554             v  += 49;
555           }
556           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
557           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
558           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
559           idiag  -= 49;
560           i2     -= 7;
561         }
562         break;
563       default:
564         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
565         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
566         i2    -= bs;
567         idiag -= bs2;
568         for (i=m-2; i>=0; i--) {
569           v  = aa + bs2*(diag[i]+1);
570           vi = aj + diag[i] + 1;
571           nz = ai[i+1] - diag[i] - 1;
572 
573           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
574           /* copy all rows of x that are needed into contiguous space */
575           workt = work;
576           for (j=0; j<nz; j++) {
577             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
578             workt += bs;
579           }
580           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
581           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
582 
583           idiag -= bs2;
584           i2    -= bs;
585         }
586         break;
587       }
588       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
589     }
590     its--;
591   }
592   while (its--) {
593     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
594       idiag = a->idiag;
595       i2 = 0;
596       switch (bs) {
597       case 1:
598         for (i=0; i<m; i++) {
599           v  = aa + ai[i];
600           vi = aj + ai[i];
601           nz = ai[i+1] - ai[i];
602           s[0] = b[i2];
603           for (j=0; j<nz; j++) {
604             xw[0] = x[vi[j]];
605             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
606           }
607           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
608           x[i2] += xw[0];
609           idiag += 1;
610           i2    += 1;
611         }
612         break;
613       case 2:
614         for (i=0; i<m; i++) {
615           v  = aa + 4*ai[i];
616           vi = aj + ai[i];
617           nz = ai[i+1] - ai[i];
618           s[0] = b[i2]; s[1] = b[i2+1];
619           for (j=0; j<nz; j++) {
620             idx = 2*vi[j];
621             it  = 4*j;
622             xw[0] = x[idx]; xw[1] = x[1+idx];
623             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
624           }
625           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
626           x[i2]  += xw[0]; x[i2+1] += xw[1];
627           idiag  += 4;
628           i2     += 2;
629         }
630         break;
631       case 3:
632         for (i=0; i<m; i++) {
633           v  = aa + 9*ai[i];
634           vi = aj + ai[i];
635           nz = ai[i+1] - ai[i];
636           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
637           while (nz--) {
638             idx = 3*(*vi++);
639             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
640             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
641             v  += 9;
642           }
643           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
644           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
645           idiag  += 9;
646           i2     += 3;
647         }
648         break;
649       case 4:
650         for (i=0; i<m; i++) {
651           v  = aa + 16*ai[i];
652           vi = aj + ai[i];
653           nz = ai[i+1] - ai[i];
654           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
655           while (nz--) {
656             idx = 4*(*vi++);
657             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
658             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
659             v  += 16;
660           }
661           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
662           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
663           idiag  += 16;
664           i2     += 4;
665         }
666         break;
667       case 5:
668         for (i=0; i<m; i++) {
669           v  = aa + 25*ai[i];
670           vi = aj + ai[i];
671           nz = ai[i+1] - ai[i];
672           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
673           while (nz--) {
674             idx = 5*(*vi++);
675             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
676             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
677             v  += 25;
678           }
679           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
680           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
681           idiag  += 25;
682           i2     += 5;
683         }
684         break;
685       case 6:
686         for (i=0; i<m; i++) {
687           v  = aa + 36*ai[i];
688           vi = aj + ai[i];
689           nz = ai[i+1] - ai[i];
690           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
691           while (nz--) {
692             idx = 6*(*vi++);
693             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
694             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
695             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
696             v  += 36;
697           }
698           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
699           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
700           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
701           idiag  += 36;
702           i2     += 6;
703         }
704         break;
705       case 7:
706         for (i=0; i<m; i++) {
707           v  = aa + 49*ai[i];
708           vi = aj + ai[i];
709           nz = ai[i+1] - ai[i];
710           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
711           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
712           while (nz--) {
713             idx = 7*(*vi++);
714             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
715             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
716             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
717             v  += 49;
718           }
719           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
720           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
721           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
722           idiag  += 49;
723           i2     += 7;
724         }
725         break;
726       default:
727         for (i=0; i<m; i++) {
728           v  = aa + bs2*ai[i];
729           vi = aj + ai[i];
730           nz = ai[i+1] - ai[i];
731 
732           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
733           /* copy all rows of x that are needed into contiguous space */
734           workt = work;
735           for (j=0; j<nz; j++) {
736             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
737             workt += bs;
738           }
739           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
740           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
741 
742           idiag += bs2;
743           i2    += bs;
744         }
745         break;
746       }
747       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
748     }
749     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
750       idiag = a->idiag+bs2*(a->mbs-1);
751       i2 = bs * (m-1);
752       switch (bs) {
753       case 1:
754         for (i=m-1; i>=0; i--) {
755           v  = aa + ai[i];
756           vi = aj + ai[i];
757           nz = ai[i+1] - ai[i];
758           s[0] = b[i2];
759           for (j=0; j<nz; j++) {
760             xw[0] = x[vi[j]];
761             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
762           }
763           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
764           x[i2] += xw[0];
765           idiag -= 1;
766           i2    -= 1;
767         }
768         break;
769       case 2:
770         for (i=m-1; i>=0; i--) {
771           v  = aa + 4*ai[i];
772           vi = aj + ai[i];
773           nz = ai[i+1] - ai[i];
774           s[0] = b[i2]; s[1] = b[i2+1];
775           for (j=0; j<nz; j++) {
776             idx = 2*vi[j];
777             it  = 4*j;
778             xw[0] = x[idx]; xw[1] = x[1+idx];
779             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
780           }
781           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
782           x[i2]  += xw[0]; x[i2+1] += xw[1];
783           idiag  -= 4;
784           i2     -= 2;
785         }
786         break;
787       case 3:
788         for (i=m-1; i>=0; i--) {
789           v  = aa + 9*ai[i];
790           vi = aj + ai[i];
791           nz = ai[i+1] - ai[i];
792           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
793           while (nz--) {
794             idx = 3*(*vi++);
795             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
796             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
797             v  += 9;
798           }
799           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
800           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
801           idiag  -= 9;
802           i2     -= 3;
803         }
804         break;
805       case 4:
806         for (i=m-1; i>=0; i--) {
807           v  = aa + 16*ai[i];
808           vi = aj + ai[i];
809           nz = ai[i+1] - ai[i];
810           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
811           while (nz--) {
812             idx = 4*(*vi++);
813             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
814             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
815             v  += 16;
816           }
817           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
818           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
819           idiag  -= 16;
820           i2     -= 4;
821         }
822         break;
823       case 5:
824         for (i=m-1; i>=0; i--) {
825           v  = aa + 25*ai[i];
826           vi = aj + ai[i];
827           nz = ai[i+1] - ai[i];
828           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
829           while (nz--) {
830             idx = 5*(*vi++);
831             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
832             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
833             v  += 25;
834           }
835           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
836           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
837           idiag  -= 25;
838           i2     -= 5;
839         }
840         break;
841       case 6:
842         for (i=m-1; i>=0; i--) {
843           v  = aa + 36*ai[i];
844           vi = aj + ai[i];
845           nz = ai[i+1] - ai[i];
846           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
847           while (nz--) {
848             idx = 6*(*vi++);
849             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
850             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
851             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
852             v  += 36;
853           }
854           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
855           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
856           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
857           idiag  -= 36;
858           i2     -= 6;
859         }
860         break;
861       case 7:
862         for (i=m-1; i>=0; i--) {
863           v  = aa + 49*ai[i];
864           vi = aj + ai[i];
865           nz = ai[i+1] - ai[i];
866           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
867           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
868           while (nz--) {
869             idx = 7*(*vi++);
870             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
871             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
872             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
873             v  += 49;
874           }
875           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
876           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
877           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
878           idiag  -= 49;
879           i2     -= 7;
880         }
881         break;
882       default:
883         for (i=m-1; i>=0; i--) {
884           v  = aa + bs2*ai[i];
885           vi = aj + ai[i];
886           nz = ai[i+1] - ai[i];
887 
888           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
889           /* copy all rows of x that are needed into contiguous space */
890           workt = work;
891           for (j=0; j<nz; j++) {
892             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
893             workt += bs;
894           }
895           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
896           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
897 
898           idiag -= bs2;
899           i2    -= bs;
900         }
901         break;
902       }
903       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
904     }
905   }
906   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
907   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 
912 /*
913     Special version for direct calls from Fortran (Used in PETSc-fun3d)
914 */
915 #if defined(PETSC_HAVE_FORTRAN_CAPS)
916 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
917 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
918 #define matsetvaluesblocked4_ matsetvaluesblocked4
919 #endif
920 
921 #undef __FUNCT__
922 #define __FUNCT__ "matsetvaluesblocked4_"
923 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
924 {
925   Mat               A  = *AA;
926   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
927   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
928   PetscInt          *ai    =a->i,*ailen=a->ilen;
929   PetscInt          *aj    =a->j,stepval,lastcol = -1;
930   const PetscScalar *value = v;
931   MatScalar         *ap,*aa = a->a,*bap;
932 
933   PetscFunctionBegin;
934   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
935   stepval = (n-1)*4;
936   for (k=0; k<m; k++) { /* loop over added rows */
937     row  = im[k];
938     rp   = aj + ai[row];
939     ap   = aa + 16*ai[row];
940     nrow = ailen[row];
941     low  = 0;
942     high = nrow;
943     for (l=0; l<n; l++) { /* loop over added columns */
944       col = in[l];
945       if (col <= lastcol)  low = 0;
946       else                high = nrow;
947       lastcol = col;
948       value   = v + k*(stepval+4 + l)*4;
949       while (high-low > 7) {
950         t = (low+high)/2;
951         if (rp[t] > col) high = t;
952         else             low  = t;
953       }
954       for (i=low; i<high; i++) {
955         if (rp[i] > col) break;
956         if (rp[i] == col) {
957           bap = ap +  16*i;
958           for (ii=0; ii<4; ii++,value+=stepval) {
959             for (jj=ii; jj<16; jj+=4) {
960               bap[jj] += *value++;
961             }
962           }
963           goto noinsert2;
964         }
965       }
966       N = nrow++ - 1;
967       high++; /* added new column index thus must search to one higher than before */
968       /* shift up all the later entries in this row */
969       for (ii=N; ii>=i; ii--) {
970         rp[ii+1] = rp[ii];
971         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
972       }
973       if (N >= i) {
974         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
975       }
976       rp[i] = col;
977       bap   = ap +  16*i;
978       for (ii=0; ii<4; ii++,value+=stepval) {
979         for (jj=ii; jj<16; jj+=4) {
980           bap[jj] = *value++;
981         }
982       }
983       noinsert2:;
984       low = i;
985     }
986     ailen[row] = nrow;
987   }
988   PetscFunctionReturnVoid();
989 }
990 
991 #if defined(PETSC_HAVE_FORTRAN_CAPS)
992 #define matsetvalues4_ MATSETVALUES4
993 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
994 #define matsetvalues4_ matsetvalues4
995 #endif
996 
997 #undef __FUNCT__
998 #define __FUNCT__ "MatSetValues4_"
999 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1000 {
1001   Mat         A  = *AA;
1002   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1003   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1004   PetscInt    *ai=a->i,*ailen=a->ilen;
1005   PetscInt    *aj=a->j,brow,bcol;
1006   PetscInt    ridx,cidx,lastcol = -1;
1007   MatScalar   *ap,value,*aa=a->a,*bap;
1008 
1009   PetscFunctionBegin;
1010   for (k=0; k<m; k++) { /* loop over added rows */
1011     row  = im[k]; brow = row/4;
1012     rp   = aj + ai[brow];
1013     ap   = aa + 16*ai[brow];
1014     nrow = ailen[brow];
1015     low  = 0;
1016     high = nrow;
1017     for (l=0; l<n; l++) { /* loop over added columns */
1018       col   = in[l]; bcol = col/4;
1019       ridx  = row % 4; cidx = col % 4;
1020       value = v[l + k*n];
1021       if (col <= lastcol)  low = 0;
1022       else                high = nrow;
1023       lastcol = col;
1024       while (high-low > 7) {
1025         t = (low+high)/2;
1026         if (rp[t] > bcol) high = t;
1027         else              low  = t;
1028       }
1029       for (i=low; i<high; i++) {
1030         if (rp[i] > bcol) break;
1031         if (rp[i] == bcol) {
1032           bap   = ap +  16*i + 4*cidx + ridx;
1033           *bap += value;
1034           goto noinsert1;
1035         }
1036       }
1037       N = nrow++ - 1;
1038       high++; /* added new column thus must search to one higher than before */
1039       /* shift up all the later entries in this row */
1040       for (ii=N; ii>=i; ii--) {
1041         rp[ii+1] = rp[ii];
1042         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1043       }
1044       if (N>=i) {
1045         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1046       }
1047       rp[i]                    = bcol;
1048       ap[16*i + 4*cidx + ridx] = value;
1049 noinsert1:;
1050       low = i;
1051     }
1052     ailen[brow] = nrow;
1053   }
1054   PetscFunctionReturnVoid();
1055 }
1056 
1057 /*
1058      Checks for missing diagonals
1059 */
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1062 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1063 {
1064   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1065   PetscErrorCode ierr;
1066   PetscInt       *diag,*ii = a->i,i;
1067 
1068   PetscFunctionBegin;
1069   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1070   *missing = PETSC_FALSE;
1071   if (A->rmap->n > 0 && !ii) {
1072     *missing = PETSC_TRUE;
1073     if (d) *d = 0;
1074     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1075   } else {
1076     diag = a->diag;
1077     for (i=0; i<a->mbs; i++) {
1078       if (diag[i] >= ii[i+1]) {
1079         *missing = PETSC_TRUE;
1080         if (d) *d = i;
1081         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1082         break;
1083       }
1084     }
1085   }
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1091 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1092 {
1093   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1094   PetscErrorCode ierr;
1095   PetscInt       i,j,m = a->mbs;
1096 
1097   PetscFunctionBegin;
1098   if (!a->diag) {
1099     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1100     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1101     a->free_diag = PETSC_TRUE;
1102   }
1103   for (i=0; i<m; i++) {
1104     a->diag[i] = a->i[i+1];
1105     for (j=a->i[i]; j<a->i[i+1]; j++) {
1106       if (a->j[j] == i) {
1107         a->diag[i] = j;
1108         break;
1109       }
1110     }
1111   }
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1118 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1119 {
1120   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1121   PetscErrorCode ierr;
1122   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1123   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1124 
1125   PetscFunctionBegin;
1126   *nn = n;
1127   if (!ia) PetscFunctionReturn(0);
1128   if (symmetric) {
1129     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1130     nz   = tia[n];
1131   } else {
1132     tia = a->i; tja = a->j;
1133   }
1134 
1135   if (!blockcompressed && bs > 1) {
1136     (*nn) *= bs;
1137     /* malloc & create the natural set of indices */
1138     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1139     if (n) {
1140       (*ia)[0] = oshift;
1141       for (j=1; j<bs; j++) {
1142         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1143       }
1144     }
1145 
1146     for (i=1; i<n; i++) {
1147       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1148       for (j=1; j<bs; j++) {
1149         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1150       }
1151     }
1152     if (n) {
1153       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1154     }
1155 
1156     if (inja) {
1157       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1158       cnt = 0;
1159       for (i=0; i<n; i++) {
1160         for (j=0; j<bs; j++) {
1161           for (k=tia[i]; k<tia[i+1]; k++) {
1162             for (l=0; l<bs; l++) {
1163               (*ja)[cnt++] = bs*tja[k] + l;
1164             }
1165           }
1166         }
1167       }
1168     }
1169 
1170     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1171       ierr = PetscFree(tia);CHKERRQ(ierr);
1172       ierr = PetscFree(tja);CHKERRQ(ierr);
1173     }
1174   } else if (oshift == 1) {
1175     if (symmetric) {
1176       nz = tia[A->rmap->n/bs];
1177       /*  add 1 to i and j indices */
1178       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1179       *ia = tia;
1180       if (ja) {
1181         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1182         *ja = tja;
1183       }
1184     } else {
1185       nz = a->i[A->rmap->n/bs];
1186       /* malloc space and  add 1 to i and j indices */
1187       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1188       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1189       if (ja) {
1190         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1191         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1192       }
1193     }
1194   } else {
1195     *ia = tia;
1196     if (ja) *ja = tja;
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1203 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1204 {
1205   PetscErrorCode ierr;
1206 
1207   PetscFunctionBegin;
1208   if (!ia) PetscFunctionReturn(0);
1209   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1210     ierr = PetscFree(*ia);CHKERRQ(ierr);
1211     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1212   }
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 #undef __FUNCT__
1217 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1218 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1219 {
1220   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1221   PetscErrorCode ierr;
1222 
1223   PetscFunctionBegin;
1224 #if defined(PETSC_USE_LOG)
1225   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1226 #endif
1227   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1228   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1229   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1230   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1231   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1232   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1233   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1234   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1235   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1236   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1237   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1238   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1239   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1240 
1241   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1242   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1243   ierr = PetscFree(A->data);CHKERRQ(ierr);
1244 
1245   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1250   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1252   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1253   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1254   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1255   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1256 #if defined(PETSC_HAVE_HYPRE)
1257   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1258 #endif
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1264 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1265 {
1266   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270   switch (op) {
1271   case MAT_ROW_ORIENTED:
1272     a->roworiented = flg;
1273     break;
1274   case MAT_KEEP_NONZERO_PATTERN:
1275     a->keepnonzeropattern = flg;
1276     break;
1277   case MAT_NEW_NONZERO_LOCATIONS:
1278     a->nonew = (flg ? 0 : 1);
1279     break;
1280   case MAT_NEW_NONZERO_LOCATION_ERR:
1281     a->nonew = (flg ? -1 : 0);
1282     break;
1283   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1284     a->nonew = (flg ? -2 : 0);
1285     break;
1286   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1287     a->nounused = (flg ? -1 : 0);
1288     break;
1289   case MAT_NEW_DIAGONALS:
1290   case MAT_IGNORE_OFF_PROC_ENTRIES:
1291   case MAT_USE_HASH_TABLE:
1292     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1293     break;
1294   case MAT_SPD:
1295   case MAT_SYMMETRIC:
1296   case MAT_STRUCTURALLY_SYMMETRIC:
1297   case MAT_HERMITIAN:
1298   case MAT_SYMMETRY_ETERNAL:
1299   case MAT_SUBMAT_SINGLEIS:
1300     /* These options are handled directly by MatSetOption() */
1301     break;
1302   default:
1303     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1304   }
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1311 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1312 {
1313   PetscErrorCode ierr;
1314   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1315   MatScalar      *aa_i;
1316   PetscScalar    *v_i;
1317 
1318   PetscFunctionBegin;
1319   bs  = A->rmap->bs;
1320   bs2 = bs*bs;
1321   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1322 
1323   bn  = row/bs;   /* Block number */
1324   bp  = row % bs; /* Block Position */
1325   M   = ai[bn+1] - ai[bn];
1326   *nz = bs*M;
1327 
1328   if (v) {
1329     *v = 0;
1330     if (*nz) {
1331       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1332       for (i=0; i<M; i++) { /* for each block in the block row */
1333         v_i  = *v + i*bs;
1334         aa_i = aa + bs2*(ai[bn] + i);
1335         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1336       }
1337     }
1338   }
1339 
1340   if (idx) {
1341     *idx = 0;
1342     if (*nz) {
1343       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1344       for (i=0; i<M; i++) { /* for each block in the block row */
1345         idx_i = *idx + i*bs;
1346         itmp  = bs*aj[ai[bn] + i];
1347         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1348       }
1349     }
1350   }
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1356 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1357 {
1358   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1359   PetscErrorCode ierr;
1360 
1361   PetscFunctionBegin;
1362   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1368 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1369 {
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1374   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1382 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1383 {
1384   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1385   Mat            C;
1386   PetscErrorCode ierr;
1387   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1388   PetscInt       *rows,*cols,bs2=a->bs2;
1389   MatScalar      *array;
1390 
1391   PetscFunctionBegin;
1392   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1393   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1394     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1395 
1396     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1397     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1398     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1399     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1400     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1401     ierr = PetscFree(col);CHKERRQ(ierr);
1402   } else {
1403     C = *B;
1404   }
1405 
1406   array = a->a;
1407   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1408   for (i=0; i<mbs; i++) {
1409     cols[0] = i*bs;
1410     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1411     len = ai[i+1] - ai[i];
1412     for (j=0; j<len; j++) {
1413       rows[0] = (*aj++)*bs;
1414       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1415       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1416       array += bs2;
1417     }
1418   }
1419   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1420 
1421   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1422   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1423 
1424   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1425     *B = C;
1426   } else {
1427     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1428   }
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1434 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1435 {
1436   PetscErrorCode ierr;
1437   Mat            Btrans;
1438 
1439   PetscFunctionBegin;
1440   *f   = PETSC_FALSE;
1441   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1442   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1443   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1449 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1450 {
1451   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1452   PetscErrorCode ierr;
1453   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1454   int            fd;
1455   PetscScalar    *aa;
1456   FILE           *file;
1457 
1458   PetscFunctionBegin;
1459   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1460   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1461   col_lens[0] = MAT_FILE_CLASSID;
1462 
1463   col_lens[1] = A->rmap->N;
1464   col_lens[2] = A->cmap->n;
1465   col_lens[3] = a->nz*bs2;
1466 
1467   /* store lengths of each row and write (including header) to file */
1468   count = 0;
1469   for (i=0; i<a->mbs; i++) {
1470     for (j=0; j<bs; j++) {
1471       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1472     }
1473   }
1474   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1475   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1476 
1477   /* store column indices (zero start index) */
1478   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1479   count = 0;
1480   for (i=0; i<a->mbs; i++) {
1481     for (j=0; j<bs; j++) {
1482       for (k=a->i[i]; k<a->i[i+1]; k++) {
1483         for (l=0; l<bs; l++) {
1484           jj[count++] = bs*a->j[k] + l;
1485         }
1486       }
1487     }
1488   }
1489   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1490   ierr = PetscFree(jj);CHKERRQ(ierr);
1491 
1492   /* store nonzero values */
1493   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1494   count = 0;
1495   for (i=0; i<a->mbs; i++) {
1496     for (j=0; j<bs; j++) {
1497       for (k=a->i[i]; k<a->i[i+1]; k++) {
1498         for (l=0; l<bs; l++) {
1499           aa[count++] = a->a[bs2*k + l*bs + j];
1500         }
1501       }
1502     }
1503   }
1504   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1505   ierr = PetscFree(aa);CHKERRQ(ierr);
1506 
1507   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1508   if (file) {
1509     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1510   }
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1516 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1517 {
1518   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1519   PetscErrorCode    ierr;
1520   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1521   PetscViewerFormat format;
1522 
1523   PetscFunctionBegin;
1524   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1525   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1526     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1527   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1528     const char *matname;
1529     Mat        aij;
1530     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1531     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1532     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1533     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1534     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1535   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1536       PetscFunctionReturn(0);
1537   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1538     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1539     for (i=0; i<a->mbs; i++) {
1540       for (j=0; j<bs; j++) {
1541         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1542         for (k=a->i[i]; k<a->i[i+1]; k++) {
1543           for (l=0; l<bs; l++) {
1544 #if defined(PETSC_USE_COMPLEX)
1545             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1546               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1547                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1548             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1549               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1550                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1551             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1552               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1553             }
1554 #else
1555             if (a->a[bs2*k + l*bs + j] != 0.0) {
1556               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1557             }
1558 #endif
1559           }
1560         }
1561         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1562       }
1563     }
1564     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1565   } else {
1566     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1567     for (i=0; i<a->mbs; i++) {
1568       for (j=0; j<bs; j++) {
1569         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1570         for (k=a->i[i]; k<a->i[i+1]; k++) {
1571           for (l=0; l<bs; l++) {
1572 #if defined(PETSC_USE_COMPLEX)
1573             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1574               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1575                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1576             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1577               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1578                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1579             } else {
1580               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1581             }
1582 #else
1583             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1584 #endif
1585           }
1586         }
1587         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1588       }
1589     }
1590     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1591   }
1592   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 #include <petscdraw.h>
1597 #undef __FUNCT__
1598 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1599 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1600 {
1601   Mat               A = (Mat) Aa;
1602   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1603   PetscErrorCode    ierr;
1604   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1605   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1606   MatScalar         *aa;
1607   PetscViewer       viewer;
1608   PetscViewerFormat format;
1609 
1610   PetscFunctionBegin;
1611   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1612   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1613   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1614 
1615   /* loop over matrix elements drawing boxes */
1616 
1617   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1618     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1619     /* Blue for negative, Cyan for zero and  Red for positive */
1620     color = PETSC_DRAW_BLUE;
1621     for (i=0,row=0; i<mbs; i++,row+=bs) {
1622       for (j=a->i[i]; j<a->i[i+1]; j++) {
1623         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1624         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1625         aa  = a->a + j*bs2;
1626         for (k=0; k<bs; k++) {
1627           for (l=0; l<bs; l++) {
1628             if (PetscRealPart(*aa++) >=  0.) continue;
1629             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1630           }
1631         }
1632       }
1633     }
1634     color = PETSC_DRAW_CYAN;
1635     for (i=0,row=0; i<mbs; i++,row+=bs) {
1636       for (j=a->i[i]; j<a->i[i+1]; j++) {
1637         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1638         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1639         aa  = a->a + j*bs2;
1640         for (k=0; k<bs; k++) {
1641           for (l=0; l<bs; l++) {
1642             if (PetscRealPart(*aa++) != 0.) continue;
1643             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1644           }
1645         }
1646       }
1647     }
1648     color = PETSC_DRAW_RED;
1649     for (i=0,row=0; i<mbs; i++,row+=bs) {
1650       for (j=a->i[i]; j<a->i[i+1]; j++) {
1651         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1652         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1653         aa  = a->a + j*bs2;
1654         for (k=0; k<bs; k++) {
1655           for (l=0; l<bs; l++) {
1656             if (PetscRealPart(*aa++) <= 0.) continue;
1657             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1658           }
1659         }
1660       }
1661     }
1662     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1663   } else {
1664     /* use contour shading to indicate magnitude of values */
1665     /* first determine max of all nonzero values */
1666     PetscReal minv = 0.0, maxv = 0.0;
1667     PetscDraw popup;
1668 
1669     for (i=0; i<a->nz*a->bs2; i++) {
1670       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1671     }
1672     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1673     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1674     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1675 
1676     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1677     for (i=0,row=0; i<mbs; i++,row+=bs) {
1678       for (j=a->i[i]; j<a->i[i+1]; j++) {
1679         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1680         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1681         aa  = a->a + j*bs2;
1682         for (k=0; k<bs; k++) {
1683           for (l=0; l<bs; l++) {
1684             MatScalar v = *aa++;
1685             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1686             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1687           }
1688         }
1689       }
1690     }
1691     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1692   }
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1698 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1699 {
1700   PetscErrorCode ierr;
1701   PetscReal      xl,yl,xr,yr,w,h;
1702   PetscDraw      draw;
1703   PetscBool      isnull;
1704 
1705   PetscFunctionBegin;
1706   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1707   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1708   if (isnull) PetscFunctionReturn(0);
1709 
1710   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1711   xr  += w;          yr += h;        xl = -w;     yl = -h;
1712   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1713   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1714   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1715   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1716   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatView_SeqBAIJ"
1722 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1723 {
1724   PetscErrorCode ierr;
1725   PetscBool      iascii,isbinary,isdraw;
1726 
1727   PetscFunctionBegin;
1728   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1729   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1730   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1731   if (iascii) {
1732     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1733   } else if (isbinary) {
1734     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1735   } else if (isdraw) {
1736     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1737   } else {
1738     Mat B;
1739     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1740     ierr = MatView(B,viewer);CHKERRQ(ierr);
1741     ierr = MatDestroy(&B);CHKERRQ(ierr);
1742   }
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1749 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1750 {
1751   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1752   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1753   PetscInt    *ai = a->i,*ailen = a->ilen;
1754   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1755   MatScalar   *ap,*aa = a->a;
1756 
1757   PetscFunctionBegin;
1758   for (k=0; k<m; k++) { /* loop over rows */
1759     row = im[k]; brow = row/bs;
1760     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1761     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1762     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1763     nrow = ailen[brow];
1764     for (l=0; l<n; l++) { /* loop over columns */
1765       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1766       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1767       col  = in[l];
1768       bcol = col/bs;
1769       cidx = col%bs;
1770       ridx = row%bs;
1771       high = nrow;
1772       low  = 0; /* assume unsorted */
1773       while (high-low > 5) {
1774         t = (low+high)/2;
1775         if (rp[t] > bcol) high = t;
1776         else             low  = t;
1777       }
1778       for (i=low; i<high; i++) {
1779         if (rp[i] > bcol) break;
1780         if (rp[i] == bcol) {
1781           *v++ = ap[bs2*i+bs*cidx+ridx];
1782           goto finished;
1783         }
1784       }
1785       *v++ = 0.0;
1786 finished:;
1787     }
1788   }
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 #undef __FUNCT__
1793 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1794 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1795 {
1796   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1797   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1798   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1799   PetscErrorCode    ierr;
1800   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1801   PetscBool         roworiented=a->roworiented;
1802   const PetscScalar *value     = v;
1803   MatScalar         *ap,*aa = a->a,*bap;
1804 
1805   PetscFunctionBegin;
1806   if (roworiented) {
1807     stepval = (n-1)*bs;
1808   } else {
1809     stepval = (m-1)*bs;
1810   }
1811   for (k=0; k<m; k++) { /* loop over added rows */
1812     row = im[k];
1813     if (row < 0) continue;
1814 #if defined(PETSC_USE_DEBUG)
1815     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1816 #endif
1817     rp   = aj + ai[row];
1818     ap   = aa + bs2*ai[row];
1819     rmax = imax[row];
1820     nrow = ailen[row];
1821     low  = 0;
1822     high = nrow;
1823     for (l=0; l<n; l++) { /* loop over added columns */
1824       if (in[l] < 0) continue;
1825 #if defined(PETSC_USE_DEBUG)
1826       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1827 #endif
1828       col = in[l];
1829       if (roworiented) {
1830         value = v + (k*(stepval+bs) + l)*bs;
1831       } else {
1832         value = v + (l*(stepval+bs) + k)*bs;
1833       }
1834       if (col <= lastcol) low = 0;
1835       else high = nrow;
1836       lastcol = col;
1837       while (high-low > 7) {
1838         t = (low+high)/2;
1839         if (rp[t] > col) high = t;
1840         else             low  = t;
1841       }
1842       for (i=low; i<high; i++) {
1843         if (rp[i] > col) break;
1844         if (rp[i] == col) {
1845           bap = ap +  bs2*i;
1846           if (roworiented) {
1847             if (is == ADD_VALUES) {
1848               for (ii=0; ii<bs; ii++,value+=stepval) {
1849                 for (jj=ii; jj<bs2; jj+=bs) {
1850                   bap[jj] += *value++;
1851                 }
1852               }
1853             } else {
1854               for (ii=0; ii<bs; ii++,value+=stepval) {
1855                 for (jj=ii; jj<bs2; jj+=bs) {
1856                   bap[jj] = *value++;
1857                 }
1858               }
1859             }
1860           } else {
1861             if (is == ADD_VALUES) {
1862               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1863                 for (jj=0; jj<bs; jj++) {
1864                   bap[jj] += value[jj];
1865                 }
1866                 bap += bs;
1867               }
1868             } else {
1869               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1870                 for (jj=0; jj<bs; jj++) {
1871                   bap[jj]  = value[jj];
1872                 }
1873                 bap += bs;
1874               }
1875             }
1876           }
1877           goto noinsert2;
1878         }
1879       }
1880       if (nonew == 1) goto noinsert2;
1881       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1882       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1883       N = nrow++ - 1; high++;
1884       /* shift up all the later entries in this row */
1885       for (ii=N; ii>=i; ii--) {
1886         rp[ii+1] = rp[ii];
1887         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1888       }
1889       if (N >= i) {
1890         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1891       }
1892       rp[i] = col;
1893       bap   = ap +  bs2*i;
1894       if (roworiented) {
1895         for (ii=0; ii<bs; ii++,value+=stepval) {
1896           for (jj=ii; jj<bs2; jj+=bs) {
1897             bap[jj] = *value++;
1898           }
1899         }
1900       } else {
1901         for (ii=0; ii<bs; ii++,value+=stepval) {
1902           for (jj=0; jj<bs; jj++) {
1903             *bap++ = *value++;
1904           }
1905         }
1906       }
1907 noinsert2:;
1908       low = i;
1909     }
1910     ailen[row] = nrow;
1911   }
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1917 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1918 {
1919   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1920   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1921   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1922   PetscErrorCode ierr;
1923   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1924   MatScalar      *aa  = a->a,*ap;
1925   PetscReal      ratio=0.6;
1926 
1927   PetscFunctionBegin;
1928   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1929 
1930   if (m) rmax = ailen[0];
1931   for (i=1; i<mbs; i++) {
1932     /* move each row back by the amount of empty slots (fshift) before it*/
1933     fshift += imax[i-1] - ailen[i-1];
1934     rmax    = PetscMax(rmax,ailen[i]);
1935     if (fshift) {
1936       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1937       N  = ailen[i];
1938       for (j=0; j<N; j++) {
1939         ip[j-fshift] = ip[j];
1940 
1941         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1942       }
1943     }
1944     ai[i] = ai[i-1] + ailen[i-1];
1945   }
1946   if (mbs) {
1947     fshift += imax[mbs-1] - ailen[mbs-1];
1948     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1949   }
1950 
1951   /* reset ilen and imax for each row */
1952   a->nonzerorowcnt = 0;
1953   for (i=0; i<mbs; i++) {
1954     ailen[i] = imax[i] = ai[i+1] - ai[i];
1955     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1956   }
1957   a->nz = ai[mbs];
1958 
1959   /* diagonals may have moved, so kill the diagonal pointers */
1960   a->idiagvalid = PETSC_FALSE;
1961   if (fshift && a->diag) {
1962     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1963     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1964     a->diag = 0;
1965   }
1966   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1967   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1968   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1969   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1970 
1971   A->info.mallocs    += a->reallocs;
1972   a->reallocs         = 0;
1973   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1974   a->rmax             = rmax;
1975 
1976   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 /*
1981    This function returns an array of flags which indicate the locations of contiguous
1982    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1983    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1984    Assume: sizes should be long enough to hold all the values.
1985 */
1986 #undef __FUNCT__
1987 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1988 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1989 {
1990   PetscInt  i,j,k,row;
1991   PetscBool flg;
1992 
1993   PetscFunctionBegin;
1994   for (i=0,j=0; i<n; j++) {
1995     row = idx[i];
1996     if (row%bs!=0) { /* Not the begining of a block */
1997       sizes[j] = 1;
1998       i++;
1999     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2000       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2001       i++;
2002     } else { /* Begining of the block, so check if the complete block exists */
2003       flg = PETSC_TRUE;
2004       for (k=1; k<bs; k++) {
2005         if (row+k != idx[i+k]) { /* break in the block */
2006           flg = PETSC_FALSE;
2007           break;
2008         }
2009       }
2010       if (flg) { /* No break in the bs */
2011         sizes[j] = bs;
2012         i       += bs;
2013       } else {
2014         sizes[j] = 1;
2015         i++;
2016       }
2017     }
2018   }
2019   *bs_max = j;
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 #undef __FUNCT__
2024 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2025 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2026 {
2027   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2028   PetscErrorCode    ierr;
2029   PetscInt          i,j,k,count,*rows;
2030   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2031   PetscScalar       zero = 0.0;
2032   MatScalar         *aa;
2033   const PetscScalar *xx;
2034   PetscScalar       *bb;
2035 
2036   PetscFunctionBegin;
2037   /* fix right hand side if needed */
2038   if (x && b) {
2039     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2040     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2041     for (i=0; i<is_n; i++) {
2042       bb[is_idx[i]] = diag*xx[is_idx[i]];
2043     }
2044     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2045     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2046   }
2047 
2048   /* Make a copy of the IS and  sort it */
2049   /* allocate memory for rows,sizes */
2050   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2051 
2052   /* copy IS values to rows, and sort them */
2053   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2054   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2055 
2056   if (baij->keepnonzeropattern) {
2057     for (i=0; i<is_n; i++) sizes[i] = 1;
2058     bs_max          = is_n;
2059   } else {
2060     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2061     A->nonzerostate++;
2062   }
2063 
2064   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2065     row = rows[j];
2066     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2067     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2068     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2069     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2070       if (diag != (PetscScalar)0.0) {
2071         if (baij->ilen[row/bs] > 0) {
2072           baij->ilen[row/bs]       = 1;
2073           baij->j[baij->i[row/bs]] = row/bs;
2074 
2075           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2076         }
2077         /* Now insert all the diagonal values for this bs */
2078         for (k=0; k<bs; k++) {
2079           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2080         }
2081       } else { /* (diag == 0.0) */
2082         baij->ilen[row/bs] = 0;
2083       } /* end (diag == 0.0) */
2084     } else { /* (sizes[i] != bs) */
2085 #if defined(PETSC_USE_DEBUG)
2086       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2087 #endif
2088       for (k=0; k<count; k++) {
2089         aa[0] =  zero;
2090         aa   += bs;
2091       }
2092       if (diag != (PetscScalar)0.0) {
2093         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2094       }
2095     }
2096   }
2097 
2098   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2099   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 #undef __FUNCT__
2104 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2105 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2106 {
2107   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2108   PetscErrorCode    ierr;
2109   PetscInt          i,j,k,count;
2110   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2111   PetscScalar       zero = 0.0;
2112   MatScalar         *aa;
2113   const PetscScalar *xx;
2114   PetscScalar       *bb;
2115   PetscBool         *zeroed,vecs = PETSC_FALSE;
2116 
2117   PetscFunctionBegin;
2118   /* fix right hand side if needed */
2119   if (x && b) {
2120     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2121     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2122     vecs = PETSC_TRUE;
2123   }
2124 
2125   /* zero the columns */
2126   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2127   for (i=0; i<is_n; i++) {
2128     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2129     zeroed[is_idx[i]] = PETSC_TRUE;
2130   }
2131   for (i=0; i<A->rmap->N; i++) {
2132     if (!zeroed[i]) {
2133       row = i/bs;
2134       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2135         for (k=0; k<bs; k++) {
2136           col = bs*baij->j[j] + k;
2137           if (zeroed[col]) {
2138             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2139             if (vecs) bb[i] -= aa[0]*xx[col];
2140             aa[0] = 0.0;
2141           }
2142         }
2143       }
2144     } else if (vecs) bb[i] = diag*xx[i];
2145   }
2146   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2147   if (vecs) {
2148     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2149     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2150   }
2151 
2152   /* zero the rows */
2153   for (i=0; i<is_n; i++) {
2154     row   = is_idx[i];
2155     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2156     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2157     for (k=0; k<count; k++) {
2158       aa[0] =  zero;
2159       aa   += bs;
2160     }
2161     if (diag != (PetscScalar)0.0) {
2162       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2163     }
2164   }
2165   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2171 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2172 {
2173   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2174   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2175   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2176   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2177   PetscErrorCode ierr;
2178   PetscInt       ridx,cidx,bs2=a->bs2;
2179   PetscBool      roworiented=a->roworiented;
2180   MatScalar      *ap,value,*aa=a->a,*bap;
2181 
2182   PetscFunctionBegin;
2183   for (k=0; k<m; k++) { /* loop over added rows */
2184     row  = im[k];
2185     brow = row/bs;
2186     if (row < 0) continue;
2187 #if defined(PETSC_USE_DEBUG)
2188     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2189 #endif
2190     rp   = aj + ai[brow];
2191     ap   = aa + bs2*ai[brow];
2192     rmax = imax[brow];
2193     nrow = ailen[brow];
2194     low  = 0;
2195     high = nrow;
2196     for (l=0; l<n; l++) { /* loop over added columns */
2197       if (in[l] < 0) continue;
2198 #if defined(PETSC_USE_DEBUG)
2199       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2200 #endif
2201       col  = in[l]; bcol = col/bs;
2202       ridx = row % bs; cidx = col % bs;
2203       if (roworiented) {
2204         value = v[l + k*n];
2205       } else {
2206         value = v[k + l*m];
2207       }
2208       if (col <= lastcol) low = 0; else high = nrow;
2209       lastcol = col;
2210       while (high-low > 7) {
2211         t = (low+high)/2;
2212         if (rp[t] > bcol) high = t;
2213         else              low  = t;
2214       }
2215       for (i=low; i<high; i++) {
2216         if (rp[i] > bcol) break;
2217         if (rp[i] == bcol) {
2218           bap = ap +  bs2*i + bs*cidx + ridx;
2219           if (is == ADD_VALUES) *bap += value;
2220           else                  *bap  = value;
2221           goto noinsert1;
2222         }
2223       }
2224       if (nonew == 1) goto noinsert1;
2225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2226       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2227       N = nrow++ - 1; high++;
2228       /* shift up all the later entries in this row */
2229       for (ii=N; ii>=i; ii--) {
2230         rp[ii+1] = rp[ii];
2231         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2232       }
2233       if (N>=i) {
2234         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2235       }
2236       rp[i]                      = bcol;
2237       ap[bs2*i + bs*cidx + ridx] = value;
2238       a->nz++;
2239       A->nonzerostate++;
2240 noinsert1:;
2241       low = i;
2242     }
2243     ailen[brow] = nrow;
2244   }
2245   PetscFunctionReturn(0);
2246 }
2247 
2248 #undef __FUNCT__
2249 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2250 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2251 {
2252   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2253   Mat            outA;
2254   PetscErrorCode ierr;
2255   PetscBool      row_identity,col_identity;
2256 
2257   PetscFunctionBegin;
2258   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2259   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2260   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2261   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2262 
2263   outA            = inA;
2264   inA->factortype = MAT_FACTOR_LU;
2265   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2266   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2267 
2268   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2269 
2270   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2271   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2272   a->row = row;
2273   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2274   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2275   a->col = col;
2276 
2277   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2278   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2279   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2280   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2281 
2282   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2283   if (!a->solve_work) {
2284     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2285     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2286   }
2287   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2293 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2294 {
2295   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2296   PetscInt    i,nz,mbs;
2297 
2298   PetscFunctionBegin;
2299   nz  = baij->maxnz;
2300   mbs = baij->mbs;
2301   for (i=0; i<nz; i++) {
2302     baij->j[i] = indices[i];
2303   }
2304   baij->nz = nz;
2305   for (i=0; i<mbs; i++) {
2306     baij->ilen[i] = baij->imax[i];
2307   }
2308   PetscFunctionReturn(0);
2309 }
2310 
2311 #undef __FUNCT__
2312 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2313 /*@
2314     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2315        in the matrix.
2316 
2317   Input Parameters:
2318 +  mat - the SeqBAIJ matrix
2319 -  indices - the column indices
2320 
2321   Level: advanced
2322 
2323   Notes:
2324     This can be called if you have precomputed the nonzero structure of the
2325   matrix and want to provide it to the matrix object to improve the performance
2326   of the MatSetValues() operation.
2327 
2328     You MUST have set the correct numbers of nonzeros per row in the call to
2329   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2330 
2331     MUST be called before any calls to MatSetValues();
2332 
2333 @*/
2334 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2335 {
2336   PetscErrorCode ierr;
2337 
2338   PetscFunctionBegin;
2339   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2340   PetscValidPointer(indices,2);
2341   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 #undef __FUNCT__
2346 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2347 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2348 {
2349   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2350   PetscErrorCode ierr;
2351   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2352   PetscReal      atmp;
2353   PetscScalar    *x,zero = 0.0;
2354   MatScalar      *aa;
2355   PetscInt       ncols,brow,krow,kcol;
2356 
2357   PetscFunctionBegin;
2358   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2359   bs  = A->rmap->bs;
2360   aa  = a->a;
2361   ai  = a->i;
2362   aj  = a->j;
2363   mbs = a->mbs;
2364 
2365   ierr = VecSet(v,zero);CHKERRQ(ierr);
2366   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2367   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2368   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2369   for (i=0; i<mbs; i++) {
2370     ncols = ai[1] - ai[0]; ai++;
2371     brow  = bs*i;
2372     for (j=0; j<ncols; j++) {
2373       for (kcol=0; kcol<bs; kcol++) {
2374         for (krow=0; krow<bs; krow++) {
2375           atmp = PetscAbsScalar(*aa);aa++;
2376           row  = brow + krow;   /* row index */
2377           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2378         }
2379       }
2380       aj++;
2381     }
2382   }
2383   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatCopy_SeqBAIJ"
2389 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2390 {
2391   PetscErrorCode ierr;
2392 
2393   PetscFunctionBegin;
2394   /* If the two matrices have the same copy implementation, use fast copy. */
2395   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2396     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2397     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2398     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2399 
2400     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2401     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2402     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2403   } else {
2404     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2405   }
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 #undef __FUNCT__
2410 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2411 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2412 {
2413   PetscErrorCode ierr;
2414 
2415   PetscFunctionBegin;
2416   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2422 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2423 {
2424   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2425 
2426   PetscFunctionBegin;
2427   *array = a->a;
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 #undef __FUNCT__
2432 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2433 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2434 {
2435   PetscFunctionBegin;
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 #undef __FUNCT__
2440 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2441 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2442 {
2443   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2444   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2445   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2446   PetscErrorCode ierr;
2447 
2448   PetscFunctionBegin;
2449   /* Set the number of nonzeros in the new matrix */
2450   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 #undef __FUNCT__
2455 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2456 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2457 {
2458   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2459   PetscErrorCode ierr;
2460   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2461   PetscBLASInt   one=1;
2462 
2463   PetscFunctionBegin;
2464   if (str == SAME_NONZERO_PATTERN) {
2465     PetscScalar  alpha = a;
2466     PetscBLASInt bnz;
2467     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2468     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2469     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2470   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2471     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2472   } else {
2473     Mat      B;
2474     PetscInt *nnz;
2475     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2476     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2477     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2478     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2479     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2480     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2481     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2482     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2483     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2484     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2485     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2486     ierr = PetscFree(nnz);CHKERRQ(ierr);
2487   }
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 #undef __FUNCT__
2492 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2493 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2494 {
2495   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2496   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2497   MatScalar   *aa = a->a;
2498 
2499   PetscFunctionBegin;
2500   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2506 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2507 {
2508   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2509   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2510   MatScalar   *aa = a->a;
2511 
2512   PetscFunctionBegin;
2513   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 #undef __FUNCT__
2518 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2519 /*
2520     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2521 */
2522 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2523 {
2524   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2525   PetscErrorCode ierr;
2526   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2527   PetscInt       nz = a->i[m],row,*jj,mr,col;
2528 
2529   PetscFunctionBegin;
2530   *nn = n;
2531   if (!ia) PetscFunctionReturn(0);
2532   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2533   else {
2534     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2535     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2536     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2537     jj   = a->j;
2538     for (i=0; i<nz; i++) {
2539       collengths[jj[i]]++;
2540     }
2541     cia[0] = oshift;
2542     for (i=0; i<n; i++) {
2543       cia[i+1] = cia[i] + collengths[i];
2544     }
2545     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2546     jj   = a->j;
2547     for (row=0; row<m; row++) {
2548       mr = a->i[row+1] - a->i[row];
2549       for (i=0; i<mr; i++) {
2550         col = *jj++;
2551 
2552         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2553       }
2554     }
2555     ierr = PetscFree(collengths);CHKERRQ(ierr);
2556     *ia  = cia; *ja = cja;
2557   }
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2563 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2564 {
2565   PetscErrorCode ierr;
2566 
2567   PetscFunctionBegin;
2568   if (!ia) PetscFunctionReturn(0);
2569   ierr = PetscFree(*ia);CHKERRQ(ierr);
2570   ierr = PetscFree(*ja);CHKERRQ(ierr);
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 /*
2575  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2576  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2577  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2578  */
2579 #undef __FUNCT__
2580 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2581 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2582 {
2583   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2584   PetscErrorCode ierr;
2585   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2586   PetscInt       nz = a->i[m],row,*jj,mr,col;
2587   PetscInt       *cspidx;
2588 
2589   PetscFunctionBegin;
2590   *nn = n;
2591   if (!ia) PetscFunctionReturn(0);
2592 
2593   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2594   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2595   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2596   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2597   jj   = a->j;
2598   for (i=0; i<nz; i++) {
2599     collengths[jj[i]]++;
2600   }
2601   cia[0] = oshift;
2602   for (i=0; i<n; i++) {
2603     cia[i+1] = cia[i] + collengths[i];
2604   }
2605   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2606   jj   = a->j;
2607   for (row=0; row<m; row++) {
2608     mr = a->i[row+1] - a->i[row];
2609     for (i=0; i<mr; i++) {
2610       col = *jj++;
2611       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2612       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2613     }
2614   }
2615   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2616   *ia    = cia; *ja = cja;
2617   *spidx = cspidx;
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 #undef __FUNCT__
2622 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2623 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2624 {
2625   PetscErrorCode ierr;
2626 
2627   PetscFunctionBegin;
2628   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2629   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 #undef __FUNCT__
2634 #define __FUNCT__ "MatShift_SeqBAIJ"
2635 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2636 {
2637   PetscErrorCode ierr;
2638   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2639 
2640   PetscFunctionBegin;
2641   if (!Y->preallocated || !aij->nz) {
2642     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2643   }
2644   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2645   PetscFunctionReturn(0);
2646 }
2647 
2648 /* -------------------------------------------------------------------*/
2649 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2650                                        MatGetRow_SeqBAIJ,
2651                                        MatRestoreRow_SeqBAIJ,
2652                                        MatMult_SeqBAIJ_N,
2653                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2654                                        MatMultTranspose_SeqBAIJ,
2655                                        MatMultTransposeAdd_SeqBAIJ,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                /* 10*/ 0,
2660                                        MatLUFactor_SeqBAIJ,
2661                                        0,
2662                                        0,
2663                                        MatTranspose_SeqBAIJ,
2664                                /* 15*/ MatGetInfo_SeqBAIJ,
2665                                        MatEqual_SeqBAIJ,
2666                                        MatGetDiagonal_SeqBAIJ,
2667                                        MatDiagonalScale_SeqBAIJ,
2668                                        MatNorm_SeqBAIJ,
2669                                /* 20*/ 0,
2670                                        MatAssemblyEnd_SeqBAIJ,
2671                                        MatSetOption_SeqBAIJ,
2672                                        MatZeroEntries_SeqBAIJ,
2673                                /* 24*/ MatZeroRows_SeqBAIJ,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                /* 29*/ MatSetUp_SeqBAIJ,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                /* 34*/ MatDuplicate_SeqBAIJ,
2684                                        0,
2685                                        0,
2686                                        MatILUFactor_SeqBAIJ,
2687                                        0,
2688                                /* 39*/ MatAXPY_SeqBAIJ,
2689                                        MatGetSubMatrices_SeqBAIJ,
2690                                        MatIncreaseOverlap_SeqBAIJ,
2691                                        MatGetValues_SeqBAIJ,
2692                                        MatCopy_SeqBAIJ,
2693                                /* 44*/ 0,
2694                                        MatScale_SeqBAIJ,
2695                                        MatShift_SeqBAIJ,
2696                                        0,
2697                                        MatZeroRowsColumns_SeqBAIJ,
2698                                /* 49*/ 0,
2699                                        MatGetRowIJ_SeqBAIJ,
2700                                        MatRestoreRowIJ_SeqBAIJ,
2701                                        MatGetColumnIJ_SeqBAIJ,
2702                                        MatRestoreColumnIJ_SeqBAIJ,
2703                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                        MatSetValuesBlocked_SeqBAIJ,
2708                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2709                                        MatDestroy_SeqBAIJ,
2710                                        MatView_SeqBAIJ,
2711                                        0,
2712                                        0,
2713                                /* 64*/ 0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2719                                        0,
2720                                        MatConvert_Basic,
2721                                        0,
2722                                        0,
2723                                /* 74*/ 0,
2724                                        MatFDColoringApply_BAIJ,
2725                                        0,
2726                                        0,
2727                                        0,
2728                                /* 79*/ 0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                        MatLoad_SeqBAIJ,
2733                                /* 84*/ 0,
2734                                        0,
2735                                        0,
2736                                        0,
2737                                        0,
2738                                /* 89*/ 0,
2739                                        0,
2740                                        0,
2741                                        0,
2742                                        0,
2743                                /* 94*/ 0,
2744                                        0,
2745                                        0,
2746                                        0,
2747                                        0,
2748                                /* 99*/ 0,
2749                                        0,
2750                                        0,
2751                                        0,
2752                                        0,
2753                                /*104*/ 0,
2754                                        MatRealPart_SeqBAIJ,
2755                                        MatImaginaryPart_SeqBAIJ,
2756                                        0,
2757                                        0,
2758                                /*109*/ 0,
2759                                        0,
2760                                        0,
2761                                        0,
2762                                        MatMissingDiagonal_SeqBAIJ,
2763                                /*114*/ 0,
2764                                        0,
2765                                        0,
2766                                        0,
2767                                        0,
2768                                /*119*/ 0,
2769                                        0,
2770                                        MatMultHermitianTranspose_SeqBAIJ,
2771                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2772                                        0,
2773                                /*124*/ 0,
2774                                        0,
2775                                        MatInvertBlockDiagonal_SeqBAIJ,
2776                                        0,
2777                                        0,
2778                                /*129*/ 0,
2779                                        0,
2780                                        0,
2781                                        0,
2782                                        0,
2783                                /*134*/ 0,
2784                                        0,
2785                                        0,
2786                                        0,
2787                                        0,
2788                                /*139*/ 0,
2789                                        0,
2790                                        0,
2791                                        MatFDColoringSetUp_SeqXAIJ,
2792                                        0,
2793                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2794 };
2795 
2796 #undef __FUNCT__
2797 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2798 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2799 {
2800   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2801   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2802   PetscErrorCode ierr;
2803 
2804   PetscFunctionBegin;
2805   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2806 
2807   /* allocate space for values if not already there */
2808   if (!aij->saved_values) {
2809     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2810     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2811   }
2812 
2813   /* copy values over */
2814   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2815   PetscFunctionReturn(0);
2816 }
2817 
2818 #undef __FUNCT__
2819 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2820 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2821 {
2822   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2823   PetscErrorCode ierr;
2824   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2825 
2826   PetscFunctionBegin;
2827   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2828   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2829 
2830   /* copy values over */
2831   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2836 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2837 
2838 #undef __FUNCT__
2839 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2840 static PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2841 {
2842   Mat_SeqBAIJ    *b;
2843   PetscErrorCode ierr;
2844   PetscInt       i,mbs,nbs,bs2;
2845   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2846 
2847   PetscFunctionBegin;
2848   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2849   if (nz == MAT_SKIP_ALLOCATION) {
2850     skipallocation = PETSC_TRUE;
2851     nz             = 0;
2852   }
2853 
2854   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2855   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2856   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2857   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2858 
2859   B->preallocated = PETSC_TRUE;
2860 
2861   mbs = B->rmap->n/bs;
2862   nbs = B->cmap->n/bs;
2863   bs2 = bs*bs;
2864 
2865   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2866 
2867   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2868   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2869   if (nnz) {
2870     for (i=0; i<mbs; i++) {
2871       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2872       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2873     }
2874   }
2875 
2876   b    = (Mat_SeqBAIJ*)B->data;
2877   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2878   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2879   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2880 
2881   if (!flg) {
2882     switch (bs) {
2883     case 1:
2884       B->ops->mult    = MatMult_SeqBAIJ_1;
2885       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2886       break;
2887     case 2:
2888       B->ops->mult    = MatMult_SeqBAIJ_2;
2889       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2890       break;
2891     case 3:
2892       B->ops->mult    = MatMult_SeqBAIJ_3;
2893       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2894       break;
2895     case 4:
2896       B->ops->mult    = MatMult_SeqBAIJ_4;
2897       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2898       break;
2899     case 5:
2900       B->ops->mult    = MatMult_SeqBAIJ_5;
2901       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2902       break;
2903     case 6:
2904       B->ops->mult    = MatMult_SeqBAIJ_6;
2905       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2906       break;
2907     case 7:
2908       B->ops->mult    = MatMult_SeqBAIJ_7;
2909       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2910       break;
2911     case 15:
2912       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2913       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2914       break;
2915     default:
2916       B->ops->mult    = MatMult_SeqBAIJ_N;
2917       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2918       break;
2919     }
2920   }
2921   B->ops->sor = MatSOR_SeqBAIJ;
2922   b->mbs = mbs;
2923   b->nbs = nbs;
2924   if (!skipallocation) {
2925     if (!b->imax) {
2926       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2927       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2928 
2929       b->free_imax_ilen = PETSC_TRUE;
2930     }
2931     /* b->ilen will count nonzeros in each block row so far. */
2932     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2933     if (!nnz) {
2934       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2935       else if (nz < 0) nz = 1;
2936       for (i=0; i<mbs; i++) b->imax[i] = nz;
2937       nz = nz*mbs;
2938     } else {
2939       nz = 0;
2940       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2941     }
2942 
2943     /* allocate the matrix space */
2944     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2945     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2946     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2947     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2948     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2949 
2950     b->singlemalloc = PETSC_TRUE;
2951     b->i[0]         = 0;
2952     for (i=1; i<mbs+1; i++) {
2953       b->i[i] = b->i[i-1] + b->imax[i-1];
2954     }
2955     b->free_a  = PETSC_TRUE;
2956     b->free_ij = PETSC_TRUE;
2957   } else {
2958     b->free_a  = PETSC_FALSE;
2959     b->free_ij = PETSC_FALSE;
2960   }
2961 
2962   b->bs2              = bs2;
2963   b->mbs              = mbs;
2964   b->nz               = 0;
2965   b->maxnz            = nz;
2966   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2967   B->was_assembled    = PETSC_FALSE;
2968   B->assembled        = PETSC_FALSE;
2969   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 #undef __FUNCT__
2974 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2975 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2976 {
2977   PetscInt       i,m,nz,nz_max=0,*nnz;
2978   PetscScalar    *values=0;
2979   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2980   PetscErrorCode ierr;
2981 
2982   PetscFunctionBegin;
2983   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2984   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2985   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2986   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2987   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2988   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2989   m    = B->rmap->n/bs;
2990 
2991   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2992   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2993   for (i=0; i<m; i++) {
2994     nz = ii[i+1]- ii[i];
2995     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2996     nz_max = PetscMax(nz_max, nz);
2997     nnz[i] = nz;
2998   }
2999   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3000   ierr = PetscFree(nnz);CHKERRQ(ierr);
3001 
3002   values = (PetscScalar*)V;
3003   if (!values) {
3004     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
3005   }
3006   for (i=0; i<m; i++) {
3007     PetscInt          ncols  = ii[i+1] - ii[i];
3008     const PetscInt    *icols = jj + ii[i];
3009     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3010     if (!roworiented) {
3011       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3012     } else {
3013       PetscInt j;
3014       for (j=0; j<ncols; j++) {
3015         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3016         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3017       }
3018     }
3019   }
3020   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3021   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3022   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3023   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3024   PetscFunctionReturn(0);
3025 }
3026 
3027 /*MC
3028    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3029    block sparse compressed row format.
3030 
3031    Options Database Keys:
3032 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3033 
3034   Level: beginner
3035 
3036 .seealso: MatCreateSeqBAIJ()
3037 M*/
3038 
3039 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3040 
3041 #undef __FUNCT__
3042 #define __FUNCT__ "MatCreate_SeqBAIJ"
3043 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3044 {
3045   PetscErrorCode ierr;
3046   PetscMPIInt    size;
3047   Mat_SeqBAIJ    *b;
3048 
3049   PetscFunctionBegin;
3050   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3051   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3052 
3053   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3054   B->data = (void*)b;
3055   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3056 
3057   b->row          = 0;
3058   b->col          = 0;
3059   b->icol         = 0;
3060   b->reallocs     = 0;
3061   b->saved_values = 0;
3062 
3063   b->roworiented        = PETSC_TRUE;
3064   b->nonew              = 0;
3065   b->diag               = 0;
3066   B->spptr              = 0;
3067   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3068   b->keepnonzeropattern = PETSC_FALSE;
3069 
3070   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3072   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3073   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3074   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3075   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3076   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3077   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3079 #if defined(PETSC_HAVE_HYPRE)
3080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3081 #endif
3082   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3083   PetscFunctionReturn(0);
3084 }
3085 
3086 #undef __FUNCT__
3087 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3088 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3089 {
3090   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3091   PetscErrorCode ierr;
3092   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3093 
3094   PetscFunctionBegin;
3095   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3096 
3097   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3098     c->imax           = a->imax;
3099     c->ilen           = a->ilen;
3100     c->free_imax_ilen = PETSC_FALSE;
3101   } else {
3102     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3103     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3104     for (i=0; i<mbs; i++) {
3105       c->imax[i] = a->imax[i];
3106       c->ilen[i] = a->ilen[i];
3107     }
3108     c->free_imax_ilen = PETSC_TRUE;
3109   }
3110 
3111   /* allocate the matrix space */
3112   if (mallocmatspace) {
3113     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3114       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3115       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3116 
3117       c->i            = a->i;
3118       c->j            = a->j;
3119       c->singlemalloc = PETSC_FALSE;
3120       c->free_a       = PETSC_TRUE;
3121       c->free_ij      = PETSC_FALSE;
3122       c->parent       = A;
3123       C->preallocated = PETSC_TRUE;
3124       C->assembled    = PETSC_TRUE;
3125 
3126       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3127       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3128       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3129     } else {
3130       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3131       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3132 
3133       c->singlemalloc = PETSC_TRUE;
3134       c->free_a       = PETSC_TRUE;
3135       c->free_ij      = PETSC_TRUE;
3136 
3137       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3138       if (mbs > 0) {
3139         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3140         if (cpvalues == MAT_COPY_VALUES) {
3141           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3142         } else {
3143           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3144         }
3145       }
3146       C->preallocated = PETSC_TRUE;
3147       C->assembled    = PETSC_TRUE;
3148     }
3149   }
3150 
3151   c->roworiented = a->roworiented;
3152   c->nonew       = a->nonew;
3153 
3154   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3155   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3156 
3157   c->bs2         = a->bs2;
3158   c->mbs         = a->mbs;
3159   c->nbs         = a->nbs;
3160 
3161   if (a->diag) {
3162     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3163       c->diag      = a->diag;
3164       c->free_diag = PETSC_FALSE;
3165     } else {
3166       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3167       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3168       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3169       c->free_diag = PETSC_TRUE;
3170     }
3171   } else c->diag = 0;
3172 
3173   c->nz         = a->nz;
3174   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3175   c->solve_work = NULL;
3176   c->mult_work  = NULL;
3177   c->sor_workt  = NULL;
3178   c->sor_work   = NULL;
3179 
3180   c->compressedrow.use   = a->compressedrow.use;
3181   c->compressedrow.nrows = a->compressedrow.nrows;
3182   if (a->compressedrow.use) {
3183     i    = a->compressedrow.nrows;
3184     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3185     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3186     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3187     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3188   } else {
3189     c->compressedrow.use    = PETSC_FALSE;
3190     c->compressedrow.i      = NULL;
3191     c->compressedrow.rindex = NULL;
3192   }
3193   C->nonzerostate = A->nonzerostate;
3194 
3195   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3196   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3197   PetscFunctionReturn(0);
3198 }
3199 
3200 #undef __FUNCT__
3201 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3202 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3203 {
3204   PetscErrorCode ierr;
3205 
3206   PetscFunctionBegin;
3207   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3208   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3209   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3210   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3211   PetscFunctionReturn(0);
3212 }
3213 
3214 #undef __FUNCT__
3215 #define __FUNCT__ "MatLoad_SeqBAIJ"
3216 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3217 {
3218   Mat_SeqBAIJ    *a;
3219   PetscErrorCode ierr;
3220   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3221   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3222   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3223   PetscInt       *masked,nmask,tmp,bs2,ishift;
3224   PetscMPIInt    size;
3225   int            fd;
3226   PetscScalar    *aa;
3227   MPI_Comm       comm;
3228 
3229   PetscFunctionBegin;
3230   /* force binary viewer to load .info file if it has not yet done so */
3231   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3232   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3233   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3234   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3235   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3236   if (bs < 0) bs = 1;
3237   bs2  = bs*bs;
3238 
3239   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3240   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3241   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3242   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3243   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3244   M = header[1]; N = header[2]; nz = header[3];
3245 
3246   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3247   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3248 
3249   /*
3250      This code adds extra rows to make sure the number of rows is
3251     divisible by the blocksize
3252   */
3253   mbs        = M/bs;
3254   extra_rows = bs - M + bs*(mbs);
3255   if (extra_rows == bs) extra_rows = 0;
3256   else mbs++;
3257   if (extra_rows) {
3258     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3259   }
3260 
3261   /* Set global sizes if not already set */
3262   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3263     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3264   } else { /* Check if the matrix global sizes are correct */
3265     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3266     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3267       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3268     }
3269     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3270   }
3271 
3272   /* read in row lengths */
3273   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3274   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3275   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3276 
3277   /* read in column indices */
3278   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3279   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3280   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3281 
3282   /* loop over row lengths determining block row lengths */
3283   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3284   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3285   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3286   rowcount = 0;
3287   nzcount  = 0;
3288   for (i=0; i<mbs; i++) {
3289     nmask = 0;
3290     for (j=0; j<bs; j++) {
3291       kmax = rowlengths[rowcount];
3292       for (k=0; k<kmax; k++) {
3293         tmp = jj[nzcount++]/bs;
3294         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3295       }
3296       rowcount++;
3297     }
3298     browlengths[i] += nmask;
3299     /* zero out the mask elements we set */
3300     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3301   }
3302 
3303   /* Do preallocation  */
3304   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3305   a    = (Mat_SeqBAIJ*)newmat->data;
3306 
3307   /* set matrix "i" values */
3308   a->i[0] = 0;
3309   for (i=1; i<= mbs; i++) {
3310     a->i[i]      = a->i[i-1] + browlengths[i-1];
3311     a->ilen[i-1] = browlengths[i-1];
3312   }
3313   a->nz = 0;
3314   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3315 
3316   /* read in nonzero values */
3317   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3318   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3319   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3320 
3321   /* set "a" and "j" values into matrix */
3322   nzcount = 0; jcount = 0;
3323   for (i=0; i<mbs; i++) {
3324     nzcountb = nzcount;
3325     nmask    = 0;
3326     for (j=0; j<bs; j++) {
3327       kmax = rowlengths[i*bs+j];
3328       for (k=0; k<kmax; k++) {
3329         tmp = jj[nzcount++]/bs;
3330         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3331       }
3332     }
3333     /* sort the masked values */
3334     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3335 
3336     /* set "j" values into matrix */
3337     maskcount = 1;
3338     for (j=0; j<nmask; j++) {
3339       a->j[jcount++]  = masked[j];
3340       mask[masked[j]] = maskcount++;
3341     }
3342     /* set "a" values into matrix */
3343     ishift = bs2*a->i[i];
3344     for (j=0; j<bs; j++) {
3345       kmax = rowlengths[i*bs+j];
3346       for (k=0; k<kmax; k++) {
3347         tmp       = jj[nzcountb]/bs;
3348         block     = mask[tmp] - 1;
3349         point     = jj[nzcountb] - bs*tmp;
3350         idx       = ishift + bs2*block + j + bs*point;
3351         a->a[idx] = (MatScalar)aa[nzcountb++];
3352       }
3353     }
3354     /* zero out the mask elements we set */
3355     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3356   }
3357   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3358 
3359   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3360   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3361   ierr = PetscFree(aa);CHKERRQ(ierr);
3362   ierr = PetscFree(jj);CHKERRQ(ierr);
3363   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3364 
3365   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3366   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3367   PetscFunctionReturn(0);
3368 }
3369 
3370 #undef __FUNCT__
3371 #define __FUNCT__ "MatCreateSeqBAIJ"
3372 /*@C
3373    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3374    compressed row) format.  For good matrix assembly performance the
3375    user should preallocate the matrix storage by setting the parameter nz
3376    (or the array nnz).  By setting these parameters accurately, performance
3377    during matrix assembly can be increased by more than a factor of 50.
3378 
3379    Collective on MPI_Comm
3380 
3381    Input Parameters:
3382 +  comm - MPI communicator, set to PETSC_COMM_SELF
3383 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3384           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3385 .  m - number of rows
3386 .  n - number of columns
3387 .  nz - number of nonzero blocks  per block row (same for all rows)
3388 -  nnz - array containing the number of nonzero blocks in the various block rows
3389          (possibly different for each block row) or NULL
3390 
3391    Output Parameter:
3392 .  A - the matrix
3393 
3394    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3395    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3396    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3397 
3398    Options Database Keys:
3399 .   -mat_no_unroll - uses code that does not unroll the loops in the
3400                      block calculations (much slower)
3401 .    -mat_block_size - size of the blocks to use
3402 
3403    Level: intermediate
3404 
3405    Notes:
3406    The number of rows and columns must be divisible by blocksize.
3407 
3408    If the nnz parameter is given then the nz parameter is ignored
3409 
3410    A nonzero block is any block that as 1 or more nonzeros in it
3411 
3412    The block AIJ format is fully compatible with standard Fortran 77
3413    storage.  That is, the stored row and column indices can begin at
3414    either one (as in Fortran) or zero.  See the users' manual for details.
3415 
3416    Specify the preallocated storage with either nz or nnz (not both).
3417    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3418    allocation.  See Users-Manual: ch_mat for details.
3419    matrices.
3420 
3421 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3422 @*/
3423 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3424 {
3425   PetscErrorCode ierr;
3426 
3427   PetscFunctionBegin;
3428   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3429   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3430   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3431   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3432   PetscFunctionReturn(0);
3433 }
3434 
3435 #undef __FUNCT__
3436 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3437 /*@C
3438    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3439    per row in the matrix. For good matrix assembly performance the
3440    user should preallocate the matrix storage by setting the parameter nz
3441    (or the array nnz).  By setting these parameters accurately, performance
3442    during matrix assembly can be increased by more than a factor of 50.
3443 
3444    Collective on MPI_Comm
3445 
3446    Input Parameters:
3447 +  B - the matrix
3448 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3449           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3450 .  nz - number of block nonzeros per block row (same for all rows)
3451 -  nnz - array containing the number of block nonzeros in the various block rows
3452          (possibly different for each block row) or NULL
3453 
3454    Options Database Keys:
3455 .   -mat_no_unroll - uses code that does not unroll the loops in the
3456                      block calculations (much slower)
3457 .   -mat_block_size - size of the blocks to use
3458 
3459    Level: intermediate
3460 
3461    Notes:
3462    If the nnz parameter is given then the nz parameter is ignored
3463 
3464    You can call MatGetInfo() to get information on how effective the preallocation was;
3465    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3466    You can also run with the option -info and look for messages with the string
3467    malloc in them to see if additional memory allocation was needed.
3468 
3469    The block AIJ format is fully compatible with standard Fortran 77
3470    storage.  That is, the stored row and column indices can begin at
3471    either one (as in Fortran) or zero.  See the users' manual for details.
3472 
3473    Specify the preallocated storage with either nz or nnz (not both).
3474    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3475    allocation.  See Users-Manual: ch_mat for details.
3476 
3477 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3478 @*/
3479 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3480 {
3481   PetscErrorCode ierr;
3482 
3483   PetscFunctionBegin;
3484   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3485   PetscValidType(B,1);
3486   PetscValidLogicalCollectiveInt(B,bs,2);
3487   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3488   PetscFunctionReturn(0);
3489 }
3490 
3491 #undef __FUNCT__
3492 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3493 /*@C
3494    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3495    (the default sequential PETSc format).
3496 
3497    Collective on MPI_Comm
3498 
3499    Input Parameters:
3500 +  B - the matrix
3501 .  i - the indices into j for the start of each local row (starts with zero)
3502 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3503 -  v - optional values in the matrix
3504 
3505    Level: developer
3506 
3507    Notes:
3508    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3509    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3510    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3511    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3512    block column and the second index is over columns within a block.
3513 
3514 .keywords: matrix, aij, compressed row, sparse
3515 
3516 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3517 @*/
3518 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3519 {
3520   PetscErrorCode ierr;
3521 
3522   PetscFunctionBegin;
3523   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3524   PetscValidType(B,1);
3525   PetscValidLogicalCollectiveInt(B,bs,2);
3526   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3527   PetscFunctionReturn(0);
3528 }
3529 
3530 
3531 #undef __FUNCT__
3532 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3533 /*@
3534      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3535 
3536      Collective on MPI_Comm
3537 
3538    Input Parameters:
3539 +  comm - must be an MPI communicator of size 1
3540 .  bs - size of block
3541 .  m - number of rows
3542 .  n - number of columns
3543 .  i - row indices
3544 .  j - column indices
3545 -  a - matrix values
3546 
3547    Output Parameter:
3548 .  mat - the matrix
3549 
3550    Level: advanced
3551 
3552    Notes:
3553        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3554     once the matrix is destroyed
3555 
3556        You cannot set new nonzero locations into this matrix, that will generate an error.
3557 
3558        The i and j indices are 0 based
3559 
3560        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3561 
3562       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3563       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3564       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3565       with column-major ordering within blocks.
3566 
3567 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3568 
3569 @*/
3570 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3571 {
3572   PetscErrorCode ierr;
3573   PetscInt       ii;
3574   Mat_SeqBAIJ    *baij;
3575 
3576   PetscFunctionBegin;
3577   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3578   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3579 
3580   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3581   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3582   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3583   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3584   baij = (Mat_SeqBAIJ*)(*mat)->data;
3585   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3586   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3587 
3588   baij->i = i;
3589   baij->j = j;
3590   baij->a = a;
3591 
3592   baij->singlemalloc = PETSC_FALSE;
3593   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3594   baij->free_a       = PETSC_FALSE;
3595   baij->free_ij      = PETSC_FALSE;
3596 
3597   for (ii=0; ii<m; ii++) {
3598     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3599 #if defined(PETSC_USE_DEBUG)
3600     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3601 #endif
3602   }
3603 #if defined(PETSC_USE_DEBUG)
3604   for (ii=0; ii<baij->i[m]; ii++) {
3605     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3606     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3607   }
3608 #endif
3609 
3610   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3611   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3612   PetscFunctionReturn(0);
3613 }
3614 
3615 #undef __FUNCT__
3616 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3617 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3618 {
3619   PetscErrorCode ierr;
3620 
3621   PetscFunctionBegin;
3622   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3623   PetscFunctionReturn(0);
3624 }
3625