xref: /petsc/src/mat/impls/baij/seq/baij.c (revision c688d0420b4e513ff34944d1e1ad7d4e50aafa8d)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
21 
22   PetscFunctionBegin;
23   allowzeropivot = PetscNot(A->erroriffailure);
24 
25   if (a->idiagvalid) {
26     if (values) *values = a->idiag;
27     PetscFunctionReturn(0);
28   }
29   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
30   diag_offset = a->diag;
31   if (!a->idiag) {
32     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
33     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
34   }
35   diag  = a->idiag;
36   mdiag = a->idiag+bs2*mbs;
37   if (values) *values = a->idiag;
38   /* factor and invert each block */
39   switch (bs) {
40   case 1:
41     for (i=0; i<mbs; i++) {
42       odiag    = v + 1*diag_offset[i];
43       diag[0]  = odiag[0];
44       mdiag[0] = odiag[0];
45 
46       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
47         if (allowzeropivot) {
48           A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
49           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
50         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",i);
51       }
52 
53       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
54       diag    += 1;
55       mdiag   += 1;
56     }
57     break;
58   case 2:
59     for (i=0; i<mbs; i++) {
60       odiag    = v + 4*diag_offset[i];
61       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
62       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
63       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
64       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
65       diag    += 4;
66       mdiag   += 4;
67     }
68     break;
69   case 3:
70     for (i=0; i<mbs; i++) {
71       odiag    = v + 9*diag_offset[i];
72       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
73       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
74       diag[8]  = odiag[8];
75       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
76       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
77       mdiag[8] = odiag[8];
78       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
79       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
80       diag    += 9;
81       mdiag   += 9;
82     }
83     break;
84   case 4:
85     for (i=0; i<mbs; i++) {
86       odiag  = v + 16*diag_offset[i];
87       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
88       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92       mdiag += 16;
93     }
94     break;
95   case 5:
96     for (i=0; i<mbs; i++) {
97       odiag  = v + 25*diag_offset[i];
98       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
99       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
100       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
101       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
102       diag  += 25;
103       mdiag += 25;
104     }
105     break;
106   case 6:
107     for (i=0; i<mbs; i++) {
108       odiag  = v + 36*diag_offset[i];
109       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
110       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
111       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
112       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
113       diag  += 36;
114       mdiag += 36;
115     }
116     break;
117   case 7:
118     for (i=0; i<mbs; i++) {
119       odiag  = v + 49*diag_offset[i];
120       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
121       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
122       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
123       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
124       diag  += 49;
125       mdiag += 49;
126     }
127     break;
128   default:
129     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
130     for (i=0; i<mbs; i++) {
131       odiag  = v + bs2*diag_offset[i];
132       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
133       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
134       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
135       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
136       diag  += bs2;
137       mdiag += bs2;
138     }
139     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
140   }
141   a->idiagvalid = PETSC_TRUE;
142   PetscFunctionReturn(0);
143 }
144 
145 #undef __FUNCT__
146 #define __FUNCT__ "MatSOR_SeqBAIJ"
147 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
148 {
149   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
150   PetscScalar       *x,*work,*w,*workt,*t;
151   const MatScalar   *v,*aa = a->a, *idiag;
152   const PetscScalar *b,*xb;
153   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
154   PetscErrorCode    ierr;
155   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
156   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
157 
158   PetscFunctionBegin;
159   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
160   its = its*lits;
161   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
162   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
163   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
164   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
165   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
166 
167   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
168 
169   if (!m) PetscFunctionReturn(0);
170   diag  = a->diag;
171   idiag = a->idiag;
172   k    = PetscMax(A->rmap->n,A->cmap->n);
173   if (!a->mult_work) {
174     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
175   }
176   if (!a->sor_workt) {
177     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
178   }
179   if (!a->sor_work) {
180     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
181   }
182   work = a->mult_work;
183   t    = a->sor_workt;
184   w    = a->sor_work;
185 
186   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
187   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
188 
189   if (flag & SOR_ZERO_INITIAL_GUESS) {
190     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
191       switch (bs) {
192       case 1:
193         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
194         t[0] = b[0];
195         i2     = 1;
196         idiag += 1;
197         for (i=1; i<m; i++) {
198           v  = aa + ai[i];
199           vi = aj + ai[i];
200           nz = diag[i] - ai[i];
201           s[0] = b[i2];
202           for (j=0; j<nz; j++) {
203             xw[0] = x[vi[j]];
204             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
205           }
206           t[i2] = s[0];
207           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
208           x[i2]  = xw[0];
209           idiag += 1;
210           i2    += 1;
211         }
212         break;
213       case 2:
214         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
215         t[0] = b[0]; t[1] = b[1];
216         i2     = 2;
217         idiag += 4;
218         for (i=1; i<m; i++) {
219           v  = aa + 4*ai[i];
220           vi = aj + ai[i];
221           nz = diag[i] - ai[i];
222           s[0] = b[i2]; s[1] = b[i2+1];
223           for (j=0; j<nz; j++) {
224             idx = 2*vi[j];
225             it  = 4*j;
226             xw[0] = x[idx]; xw[1] = x[1+idx];
227             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
228           }
229           t[i2] = s[0]; t[i2+1] = s[1];
230           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
231           x[i2]   = xw[0]; x[i2+1] = xw[1];
232           idiag  += 4;
233           i2     += 2;
234         }
235         break;
236       case 3:
237         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
238         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
239         i2     = 3;
240         idiag += 9;
241         for (i=1; i<m; i++) {
242           v  = aa + 9*ai[i];
243           vi = aj + ai[i];
244           nz = diag[i] - ai[i];
245           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
246           while (nz--) {
247             idx = 3*(*vi++);
248             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
249             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
250             v  += 9;
251           }
252           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
253           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
254           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
255           idiag  += 9;
256           i2     += 3;
257         }
258         break;
259       case 4:
260         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
261         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
262         i2     = 4;
263         idiag += 16;
264         for (i=1; i<m; i++) {
265           v  = aa + 16*ai[i];
266           vi = aj + ai[i];
267           nz = diag[i] - ai[i];
268           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
269           while (nz--) {
270             idx = 4*(*vi++);
271             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
272             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
273             v  += 16;
274           }
275           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
276           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
277           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
278           idiag  += 16;
279           i2     += 4;
280         }
281         break;
282       case 5:
283         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
284         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
285         i2     = 5;
286         idiag += 25;
287         for (i=1; i<m; i++) {
288           v  = aa + 25*ai[i];
289           vi = aj + ai[i];
290           nz = diag[i] - ai[i];
291           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
292           while (nz--) {
293             idx = 5*(*vi++);
294             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
295             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
296             v  += 25;
297           }
298           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
299           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
300           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
301           idiag  += 25;
302           i2     += 5;
303         }
304         break;
305       case 6:
306         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
307         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
308         i2     = 6;
309         idiag += 36;
310         for (i=1; i<m; i++) {
311           v  = aa + 36*ai[i];
312           vi = aj + ai[i];
313           nz = diag[i] - ai[i];
314           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
315           while (nz--) {
316             idx = 6*(*vi++);
317             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
318             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
319             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
320             v  += 36;
321           }
322           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
323           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
324           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
325           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
326           idiag  += 36;
327           i2     += 6;
328         }
329         break;
330       case 7:
331         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
332         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
333         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
334         i2     = 7;
335         idiag += 49;
336         for (i=1; i<m; i++) {
337           v  = aa + 49*ai[i];
338           vi = aj + ai[i];
339           nz = diag[i] - ai[i];
340           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
341           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
342           while (nz--) {
343             idx = 7*(*vi++);
344             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
345             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
346             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
347             v  += 49;
348           }
349           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
350           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
351           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
352           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
353           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
354           idiag  += 49;
355           i2     += 7;
356         }
357         break;
358       default:
359         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
360         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
361         i2     = bs;
362         idiag += bs2;
363         for (i=1; i<m; i++) {
364           v  = aa + bs2*ai[i];
365           vi = aj + ai[i];
366           nz = diag[i] - ai[i];
367 
368           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
369           /* copy all rows of x that are needed into contiguous space */
370           workt = work;
371           for (j=0; j<nz; j++) {
372             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
373             workt += bs;
374           }
375           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
376           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
377           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
378 
379           idiag += bs2;
380           i2    += bs;
381         }
382         break;
383       }
384       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
385       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
386       xb = t;
387     }
388     else xb = b;
389     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
390       idiag = a->idiag+bs2*(a->mbs-1);
391       i2 = bs * (m-1);
392       switch (bs) {
393       case 1:
394         s[0]  = xb[i2];
395         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396         x[i2] = xw[0];
397         i2   -= 1;
398         for (i=m-2; i>=0; i--) {
399           v  = aa + (diag[i]+1);
400           vi = aj + diag[i] + 1;
401           nz = ai[i+1] - diag[i] - 1;
402           s[0] = xb[i2];
403           for (j=0; j<nz; j++) {
404             xw[0] = x[vi[j]];
405             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
406           }
407           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
408           x[i2]  = xw[0];
409           idiag -= 1;
410           i2    -= 1;
411         }
412         break;
413       case 2:
414         s[0]  = xb[i2]; s[1] = xb[i2+1];
415         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
416         x[i2] = xw[0]; x[i2+1] = xw[1];
417         i2    -= 2;
418         idiag -= 4;
419         for (i=m-2; i>=0; i--) {
420           v  = aa + 4*(diag[i] + 1);
421           vi = aj + diag[i] + 1;
422           nz = ai[i+1] - diag[i] - 1;
423           s[0] = xb[i2]; s[1] = xb[i2+1];
424           for (j=0; j<nz; j++) {
425             idx = 2*vi[j];
426             it  = 4*j;
427             xw[0] = x[idx]; xw[1] = x[1+idx];
428             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
429           }
430           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
431           x[i2]   = xw[0]; x[i2+1] = xw[1];
432           idiag  -= 4;
433           i2     -= 2;
434         }
435         break;
436       case 3:
437         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
438         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
439         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
440         i2    -= 3;
441         idiag -= 9;
442         for (i=m-2; i>=0; i--) {
443           v  = aa + 9*(diag[i]+1);
444           vi = aj + diag[i] + 1;
445           nz = ai[i+1] - diag[i] - 1;
446           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
447           while (nz--) {
448             idx = 3*(*vi++);
449             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
450             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
451             v  += 9;
452           }
453           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
454           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
455           idiag  -= 9;
456           i2     -= 3;
457         }
458         break;
459       case 4:
460         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
461         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
462         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
463         i2    -= 4;
464         idiag -= 16;
465         for (i=m-2; i>=0; i--) {
466           v  = aa + 16*(diag[i]+1);
467           vi = aj + diag[i] + 1;
468           nz = ai[i+1] - diag[i] - 1;
469           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
470           while (nz--) {
471             idx = 4*(*vi++);
472             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
473             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
474             v  += 16;
475           }
476           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
477           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
478           idiag  -= 16;
479           i2     -= 4;
480         }
481         break;
482       case 5:
483         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
484         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
485         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
486         i2    -= 5;
487         idiag -= 25;
488         for (i=m-2; i>=0; i--) {
489           v  = aa + 25*(diag[i]+1);
490           vi = aj + diag[i] + 1;
491           nz = ai[i+1] - diag[i] - 1;
492           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
493           while (nz--) {
494             idx = 5*(*vi++);
495             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
496             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
497             v  += 25;
498           }
499           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
500           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
501           idiag  -= 25;
502           i2     -= 5;
503         }
504         break;
505       case 6:
506         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
507         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
508         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
509         i2    -= 6;
510         idiag -= 36;
511         for (i=m-2; i>=0; i--) {
512           v  = aa + 36*(diag[i]+1);
513           vi = aj + diag[i] + 1;
514           nz = ai[i+1] - diag[i] - 1;
515           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
516           while (nz--) {
517             idx = 6*(*vi++);
518             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
519             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
520             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
521             v  += 36;
522           }
523           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
524           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
525           idiag  -= 36;
526           i2     -= 6;
527         }
528         break;
529       case 7:
530         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
531         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
532         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
533         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
534         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
535         i2    -= 7;
536         idiag -= 49;
537         for (i=m-2; i>=0; i--) {
538           v  = aa + 49*(diag[i]+1);
539           vi = aj + diag[i] + 1;
540           nz = ai[i+1] - diag[i] - 1;
541           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
542           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
543           while (nz--) {
544             idx = 7*(*vi++);
545             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
546             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
547             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
548             v  += 49;
549           }
550           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
551           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
552           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
553           idiag  -= 49;
554           i2     -= 7;
555         }
556         break;
557       default:
558         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
559         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
560         i2    -= bs;
561         idiag -= bs2;
562         for (i=m-2; i>=0; i--) {
563           v  = aa + bs2*(diag[i]+1);
564           vi = aj + diag[i] + 1;
565           nz = ai[i+1] - diag[i] - 1;
566 
567           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
568           /* copy all rows of x that are needed into contiguous space */
569           workt = work;
570           for (j=0; j<nz; j++) {
571             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
572             workt += bs;
573           }
574           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
575           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
576 
577           idiag -= bs2;
578           i2    -= bs;
579         }
580         break;
581       }
582       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
583     }
584     its--;
585   }
586   while (its--) {
587     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
588       idiag = a->idiag;
589       i2 = 0;
590       switch (bs) {
591       case 1:
592         for (i=0; i<m; i++) {
593           v  = aa + ai[i];
594           vi = aj + ai[i];
595           nz = ai[i+1] - ai[i];
596           s[0] = b[i2];
597           for (j=0; j<nz; j++) {
598             xw[0] = x[vi[j]];
599             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
600           }
601           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
602           x[i2] += xw[0];
603           idiag += 1;
604           i2    += 1;
605         }
606         break;
607       case 2:
608         for (i=0; i<m; i++) {
609           v  = aa + 4*ai[i];
610           vi = aj + ai[i];
611           nz = ai[i+1] - ai[i];
612           s[0] = b[i2]; s[1] = b[i2+1];
613           for (j=0; j<nz; j++) {
614             idx = 2*vi[j];
615             it  = 4*j;
616             xw[0] = x[idx]; xw[1] = x[1+idx];
617             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
618           }
619           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
620           x[i2]  += xw[0]; x[i2+1] += xw[1];
621           idiag  += 4;
622           i2     += 2;
623         }
624         break;
625       case 3:
626         for (i=0; i<m; i++) {
627           v  = aa + 9*ai[i];
628           vi = aj + ai[i];
629           nz = ai[i+1] - ai[i];
630           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
631           while (nz--) {
632             idx = 3*(*vi++);
633             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
634             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
635             v  += 9;
636           }
637           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
638           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
639           idiag  += 9;
640           i2     += 3;
641         }
642         break;
643       case 4:
644         for (i=0; i<m; i++) {
645           v  = aa + 16*ai[i];
646           vi = aj + ai[i];
647           nz = ai[i+1] - ai[i];
648           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
649           while (nz--) {
650             idx = 4*(*vi++);
651             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
652             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
653             v  += 16;
654           }
655           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
656           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
657           idiag  += 16;
658           i2     += 4;
659         }
660         break;
661       case 5:
662         for (i=0; i<m; i++) {
663           v  = aa + 25*ai[i];
664           vi = aj + ai[i];
665           nz = ai[i+1] - ai[i];
666           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
667           while (nz--) {
668             idx = 5*(*vi++);
669             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
670             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
671             v  += 25;
672           }
673           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
674           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
675           idiag  += 25;
676           i2     += 5;
677         }
678         break;
679       case 6:
680         for (i=0; i<m; i++) {
681           v  = aa + 36*ai[i];
682           vi = aj + ai[i];
683           nz = ai[i+1] - ai[i];
684           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
685           while (nz--) {
686             idx = 6*(*vi++);
687             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
688             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
689             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
690             v  += 36;
691           }
692           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
693           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
694           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
695           idiag  += 36;
696           i2     += 6;
697         }
698         break;
699       case 7:
700         for (i=0; i<m; i++) {
701           v  = aa + 49*ai[i];
702           vi = aj + ai[i];
703           nz = ai[i+1] - ai[i];
704           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
705           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
706           while (nz--) {
707             idx = 7*(*vi++);
708             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
709             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
710             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
711             v  += 49;
712           }
713           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
714           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
715           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
716           idiag  += 49;
717           i2     += 7;
718         }
719         break;
720       default:
721         for (i=0; i<m; i++) {
722           v  = aa + bs2*ai[i];
723           vi = aj + ai[i];
724           nz = ai[i+1] - ai[i];
725 
726           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
727           /* copy all rows of x that are needed into contiguous space */
728           workt = work;
729           for (j=0; j<nz; j++) {
730             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
731             workt += bs;
732           }
733           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
734           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
735 
736           idiag += bs2;
737           i2    += bs;
738         }
739         break;
740       }
741       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
742     }
743     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
744       idiag = a->idiag+bs2*(a->mbs-1);
745       i2 = bs * (m-1);
746       switch (bs) {
747       case 1:
748         for (i=m-1; i>=0; i--) {
749           v  = aa + ai[i];
750           vi = aj + ai[i];
751           nz = ai[i+1] - ai[i];
752           s[0] = b[i2];
753           for (j=0; j<nz; j++) {
754             xw[0] = x[vi[j]];
755             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
756           }
757           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
758           x[i2] += xw[0];
759           idiag -= 1;
760           i2    -= 1;
761         }
762         break;
763       case 2:
764         for (i=m-1; i>=0; i--) {
765           v  = aa + 4*ai[i];
766           vi = aj + ai[i];
767           nz = ai[i+1] - ai[i];
768           s[0] = b[i2]; s[1] = b[i2+1];
769           for (j=0; j<nz; j++) {
770             idx = 2*vi[j];
771             it  = 4*j;
772             xw[0] = x[idx]; xw[1] = x[1+idx];
773             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
774           }
775           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
776           x[i2]  += xw[0]; x[i2+1] += xw[1];
777           idiag  -= 4;
778           i2     -= 2;
779         }
780         break;
781       case 3:
782         for (i=m-1; i>=0; i--) {
783           v  = aa + 9*ai[i];
784           vi = aj + ai[i];
785           nz = ai[i+1] - ai[i];
786           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
787           while (nz--) {
788             idx = 3*(*vi++);
789             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
790             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
791             v  += 9;
792           }
793           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
794           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
795           idiag  -= 9;
796           i2     -= 3;
797         }
798         break;
799       case 4:
800         for (i=m-1; i>=0; i--) {
801           v  = aa + 16*ai[i];
802           vi = aj + ai[i];
803           nz = ai[i+1] - ai[i];
804           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
805           while (nz--) {
806             idx = 4*(*vi++);
807             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
808             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
809             v  += 16;
810           }
811           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
812           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
813           idiag  -= 16;
814           i2     -= 4;
815         }
816         break;
817       case 5:
818         for (i=m-1; i>=0; i--) {
819           v  = aa + 25*ai[i];
820           vi = aj + ai[i];
821           nz = ai[i+1] - ai[i];
822           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
823           while (nz--) {
824             idx = 5*(*vi++);
825             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
826             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
827             v  += 25;
828           }
829           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
830           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
831           idiag  -= 25;
832           i2     -= 5;
833         }
834         break;
835       case 6:
836         for (i=m-1; i>=0; i--) {
837           v  = aa + 36*ai[i];
838           vi = aj + ai[i];
839           nz = ai[i+1] - ai[i];
840           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
841           while (nz--) {
842             idx = 6*(*vi++);
843             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
844             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
845             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
846             v  += 36;
847           }
848           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
849           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
850           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
851           idiag  -= 36;
852           i2     -= 6;
853         }
854         break;
855       case 7:
856         for (i=m-1; i>=0; i--) {
857           v  = aa + 49*ai[i];
858           vi = aj + ai[i];
859           nz = ai[i+1] - ai[i];
860           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
861           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
862           while (nz--) {
863             idx = 7*(*vi++);
864             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
865             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
866             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
867             v  += 49;
868           }
869           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
870           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
871           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
872           idiag  -= 49;
873           i2     -= 7;
874         }
875         break;
876       default:
877         for (i=m-1; i>=0; i--) {
878           v  = aa + bs2*ai[i];
879           vi = aj + ai[i];
880           nz = ai[i+1] - ai[i];
881 
882           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
883           /* copy all rows of x that are needed into contiguous space */
884           workt = work;
885           for (j=0; j<nz; j++) {
886             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
887             workt += bs;
888           }
889           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
890           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
891 
892           idiag -= bs2;
893           i2    -= bs;
894         }
895         break;
896       }
897       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
898     }
899   }
900   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
901   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
902   PetscFunctionReturn(0);
903 }
904 
905 
906 /*
907     Special version for direct calls from Fortran (Used in PETSc-fun3d)
908 */
909 #if defined(PETSC_HAVE_FORTRAN_CAPS)
910 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
911 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
912 #define matsetvaluesblocked4_ matsetvaluesblocked4
913 #endif
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "matsetvaluesblocked4_"
917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
918 {
919   Mat               A  = *AA;
920   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
921   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
922   PetscInt          *ai    =a->i,*ailen=a->ilen;
923   PetscInt          *aj    =a->j,stepval,lastcol = -1;
924   const PetscScalar *value = v;
925   MatScalar         *ap,*aa = a->a,*bap;
926 
927   PetscFunctionBegin;
928   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
929   stepval = (n-1)*4;
930   for (k=0; k<m; k++) { /* loop over added rows */
931     row  = im[k];
932     rp   = aj + ai[row];
933     ap   = aa + 16*ai[row];
934     nrow = ailen[row];
935     low  = 0;
936     high = nrow;
937     for (l=0; l<n; l++) { /* loop over added columns */
938       col = in[l];
939       if (col <= lastcol)  low = 0;
940       else                high = nrow;
941       lastcol = col;
942       value   = v + k*(stepval+4 + l)*4;
943       while (high-low > 7) {
944         t = (low+high)/2;
945         if (rp[t] > col) high = t;
946         else             low  = t;
947       }
948       for (i=low; i<high; i++) {
949         if (rp[i] > col) break;
950         if (rp[i] == col) {
951           bap = ap +  16*i;
952           for (ii=0; ii<4; ii++,value+=stepval) {
953             for (jj=ii; jj<16; jj+=4) {
954               bap[jj] += *value++;
955             }
956           }
957           goto noinsert2;
958         }
959       }
960       N = nrow++ - 1;
961       high++; /* added new column index thus must search to one higher than before */
962       /* shift up all the later entries in this row */
963       for (ii=N; ii>=i; ii--) {
964         rp[ii+1] = rp[ii];
965         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
966       }
967       if (N >= i) {
968         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
969       }
970       rp[i] = col;
971       bap   = ap +  16*i;
972       for (ii=0; ii<4; ii++,value+=stepval) {
973         for (jj=ii; jj<16; jj+=4) {
974           bap[jj] = *value++;
975         }
976       }
977       noinsert2:;
978       low = i;
979     }
980     ailen[row] = nrow;
981   }
982   PetscFunctionReturnVoid();
983 }
984 
985 #if defined(PETSC_HAVE_FORTRAN_CAPS)
986 #define matsetvalues4_ MATSETVALUES4
987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
988 #define matsetvalues4_ matsetvalues4
989 #endif
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatSetValues4_"
993 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
994 {
995   Mat         A  = *AA;
996   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
997   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
998   PetscInt    *ai=a->i,*ailen=a->ilen;
999   PetscInt    *aj=a->j,brow,bcol;
1000   PetscInt    ridx,cidx,lastcol = -1;
1001   MatScalar   *ap,value,*aa=a->a,*bap;
1002 
1003   PetscFunctionBegin;
1004   for (k=0; k<m; k++) { /* loop over added rows */
1005     row  = im[k]; brow = row/4;
1006     rp   = aj + ai[brow];
1007     ap   = aa + 16*ai[brow];
1008     nrow = ailen[brow];
1009     low  = 0;
1010     high = nrow;
1011     for (l=0; l<n; l++) { /* loop over added columns */
1012       col   = in[l]; bcol = col/4;
1013       ridx  = row % 4; cidx = col % 4;
1014       value = v[l + k*n];
1015       if (col <= lastcol)  low = 0;
1016       else                high = nrow;
1017       lastcol = col;
1018       while (high-low > 7) {
1019         t = (low+high)/2;
1020         if (rp[t] > bcol) high = t;
1021         else              low  = t;
1022       }
1023       for (i=low; i<high; i++) {
1024         if (rp[i] > bcol) break;
1025         if (rp[i] == bcol) {
1026           bap   = ap +  16*i + 4*cidx + ridx;
1027           *bap += value;
1028           goto noinsert1;
1029         }
1030       }
1031       N = nrow++ - 1;
1032       high++; /* added new column thus must search to one higher than before */
1033       /* shift up all the later entries in this row */
1034       for (ii=N; ii>=i; ii--) {
1035         rp[ii+1] = rp[ii];
1036         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1037       }
1038       if (N>=i) {
1039         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1040       }
1041       rp[i]                    = bcol;
1042       ap[16*i + 4*cidx + ridx] = value;
1043 noinsert1:;
1044       low = i;
1045     }
1046     ailen[brow] = nrow;
1047   }
1048   PetscFunctionReturnVoid();
1049 }
1050 
1051 /*
1052      Checks for missing diagonals
1053 */
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1056 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1057 {
1058   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1059   PetscErrorCode ierr;
1060   PetscInt       *diag,*ii = a->i,i;
1061 
1062   PetscFunctionBegin;
1063   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1064   *missing = PETSC_FALSE;
1065   if (A->rmap->n > 0 && !ii) {
1066     *missing = PETSC_TRUE;
1067     if (d) *d = 0;
1068     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1069   } else {
1070     diag = a->diag;
1071     for (i=0; i<a->mbs; i++) {
1072       if (diag[i] >= ii[i+1]) {
1073         *missing = PETSC_TRUE;
1074         if (d) *d = i;
1075         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1076         break;
1077       }
1078     }
1079   }
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1085 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1086 {
1087   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1088   PetscErrorCode ierr;
1089   PetscInt       i,j,m = a->mbs;
1090 
1091   PetscFunctionBegin;
1092   if (!a->diag) {
1093     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1094     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1095     a->free_diag = PETSC_TRUE;
1096   }
1097   for (i=0; i<m; i++) {
1098     a->diag[i] = a->i[i+1];
1099     for (j=a->i[i]; j<a->i[i+1]; j++) {
1100       if (a->j[j] == i) {
1101         a->diag[i] = j;
1102         break;
1103       }
1104     }
1105   }
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1112 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1113 {
1114   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1115   PetscErrorCode ierr;
1116   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1117   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1118 
1119   PetscFunctionBegin;
1120   *nn = n;
1121   if (!ia) PetscFunctionReturn(0);
1122   if (symmetric) {
1123     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1124     nz   = tia[n];
1125   } else {
1126     tia = a->i; tja = a->j;
1127   }
1128 
1129   if (!blockcompressed && bs > 1) {
1130     (*nn) *= bs;
1131     /* malloc & create the natural set of indices */
1132     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1133     if (n) {
1134       (*ia)[0] = oshift;
1135       for (j=1; j<bs; j++) {
1136         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1137       }
1138     }
1139 
1140     for (i=1; i<n; i++) {
1141       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1142       for (j=1; j<bs; j++) {
1143         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1144       }
1145     }
1146     if (n) {
1147       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1148     }
1149 
1150     if (inja) {
1151       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1152       cnt = 0;
1153       for (i=0; i<n; i++) {
1154         for (j=0; j<bs; j++) {
1155           for (k=tia[i]; k<tia[i+1]; k++) {
1156             for (l=0; l<bs; l++) {
1157               (*ja)[cnt++] = bs*tja[k] + l;
1158             }
1159           }
1160         }
1161       }
1162     }
1163 
1164     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1165       ierr = PetscFree(tia);CHKERRQ(ierr);
1166       ierr = PetscFree(tja);CHKERRQ(ierr);
1167     }
1168   } else if (oshift == 1) {
1169     if (symmetric) {
1170       nz = tia[A->rmap->n/bs];
1171       /*  add 1 to i and j indices */
1172       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1173       *ia = tia;
1174       if (ja) {
1175         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1176         *ja = tja;
1177       }
1178     } else {
1179       nz = a->i[A->rmap->n/bs];
1180       /* malloc space and  add 1 to i and j indices */
1181       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1182       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1183       if (ja) {
1184         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1185         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1186       }
1187     }
1188   } else {
1189     *ia = tia;
1190     if (ja) *ja = tja;
1191   }
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1197 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1198 {
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   if (!ia) PetscFunctionReturn(0);
1203   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1204     ierr = PetscFree(*ia);CHKERRQ(ierr);
1205     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1206   }
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1212 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1213 {
1214   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218 #if defined(PETSC_USE_LOG)
1219   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1220 #endif
1221   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1222   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1223   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1224   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1225   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1226   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1227   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1228   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1229   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1230   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1231   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1232   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1233   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1234 
1235   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1236   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1237   ierr = PetscFree(A->data);CHKERRQ(ierr);
1238 
1239   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1255 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1256 {
1257   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   switch (op) {
1262   case MAT_ROW_ORIENTED:
1263     a->roworiented = flg;
1264     break;
1265   case MAT_KEEP_NONZERO_PATTERN:
1266     a->keepnonzeropattern = flg;
1267     break;
1268   case MAT_NEW_NONZERO_LOCATIONS:
1269     a->nonew = (flg ? 0 : 1);
1270     break;
1271   case MAT_NEW_NONZERO_LOCATION_ERR:
1272     a->nonew = (flg ? -1 : 0);
1273     break;
1274   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1275     a->nonew = (flg ? -2 : 0);
1276     break;
1277   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1278     a->nounused = (flg ? -1 : 0);
1279     break;
1280   case MAT_NEW_DIAGONALS:
1281   case MAT_IGNORE_OFF_PROC_ENTRIES:
1282   case MAT_USE_HASH_TABLE:
1283     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1284     break;
1285   case MAT_SPD:
1286   case MAT_SYMMETRIC:
1287   case MAT_STRUCTURALLY_SYMMETRIC:
1288   case MAT_HERMITIAN:
1289   case MAT_SYMMETRY_ETERNAL:
1290     /* These options are handled directly by MatSetOption() */
1291     break;
1292   default:
1293     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1294   }
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1301 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1302 {
1303   PetscErrorCode ierr;
1304   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1305   MatScalar      *aa_i;
1306   PetscScalar    *v_i;
1307 
1308   PetscFunctionBegin;
1309   bs  = A->rmap->bs;
1310   bs2 = bs*bs;
1311   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1312 
1313   bn  = row/bs;   /* Block number */
1314   bp  = row % bs; /* Block Position */
1315   M   = ai[bn+1] - ai[bn];
1316   *nz = bs*M;
1317 
1318   if (v) {
1319     *v = 0;
1320     if (*nz) {
1321       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1322       for (i=0; i<M; i++) { /* for each block in the block row */
1323         v_i  = *v + i*bs;
1324         aa_i = aa + bs2*(ai[bn] + i);
1325         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1326       }
1327     }
1328   }
1329 
1330   if (idx) {
1331     *idx = 0;
1332     if (*nz) {
1333       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1334       for (i=0; i<M; i++) { /* for each block in the block row */
1335         idx_i = *idx + i*bs;
1336         itmp  = bs*aj[ai[bn] + i];
1337         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1338       }
1339     }
1340   }
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1346 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1347 {
1348   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1349   PetscErrorCode ierr;
1350 
1351   PetscFunctionBegin;
1352   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1358 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1359 {
1360   PetscErrorCode ierr;
1361 
1362   PetscFunctionBegin;
1363   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1364   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1372 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1373 {
1374   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1375   Mat            C;
1376   PetscErrorCode ierr;
1377   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1378   PetscInt       *rows,*cols,bs2=a->bs2;
1379   MatScalar      *array;
1380 
1381   PetscFunctionBegin;
1382   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1383   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1384     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1385 
1386     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1387     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1388     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1389     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1390     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1391     ierr = PetscFree(col);CHKERRQ(ierr);
1392   } else {
1393     C = *B;
1394   }
1395 
1396   array = a->a;
1397   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1398   for (i=0; i<mbs; i++) {
1399     cols[0] = i*bs;
1400     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1401     len = ai[i+1] - ai[i];
1402     for (j=0; j<len; j++) {
1403       rows[0] = (*aj++)*bs;
1404       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1405       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1406       array += bs2;
1407     }
1408   }
1409   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1410 
1411   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1412   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1413 
1414   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1415     *B = C;
1416   } else {
1417     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1418   }
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1424 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1425 {
1426   PetscErrorCode ierr;
1427   Mat            Btrans;
1428 
1429   PetscFunctionBegin;
1430   *f   = PETSC_FALSE;
1431   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1432   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1433   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1439 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1440 {
1441   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1442   PetscErrorCode ierr;
1443   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1444   int            fd;
1445   PetscScalar    *aa;
1446   FILE           *file;
1447 
1448   PetscFunctionBegin;
1449   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1450   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1451   col_lens[0] = MAT_FILE_CLASSID;
1452 
1453   col_lens[1] = A->rmap->N;
1454   col_lens[2] = A->cmap->n;
1455   col_lens[3] = a->nz*bs2;
1456 
1457   /* store lengths of each row and write (including header) to file */
1458   count = 0;
1459   for (i=0; i<a->mbs; i++) {
1460     for (j=0; j<bs; j++) {
1461       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1462     }
1463   }
1464   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1465   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1466 
1467   /* store column indices (zero start index) */
1468   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1469   count = 0;
1470   for (i=0; i<a->mbs; i++) {
1471     for (j=0; j<bs; j++) {
1472       for (k=a->i[i]; k<a->i[i+1]; k++) {
1473         for (l=0; l<bs; l++) {
1474           jj[count++] = bs*a->j[k] + l;
1475         }
1476       }
1477     }
1478   }
1479   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1480   ierr = PetscFree(jj);CHKERRQ(ierr);
1481 
1482   /* store nonzero values */
1483   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1484   count = 0;
1485   for (i=0; i<a->mbs; i++) {
1486     for (j=0; j<bs; j++) {
1487       for (k=a->i[i]; k<a->i[i+1]; k++) {
1488         for (l=0; l<bs; l++) {
1489           aa[count++] = a->a[bs2*k + l*bs + j];
1490         }
1491       }
1492     }
1493   }
1494   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1495   ierr = PetscFree(aa);CHKERRQ(ierr);
1496 
1497   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1498   if (file) {
1499     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1506 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1507 {
1508   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1509   PetscErrorCode    ierr;
1510   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1511   PetscViewerFormat format;
1512 
1513   PetscFunctionBegin;
1514   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1515   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1516     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1517   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1518     const char *matname;
1519     Mat        aij;
1520     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1521     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1522     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1523     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1524     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1525   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1526       PetscFunctionReturn(0);
1527   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1528     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1529     for (i=0; i<a->mbs; i++) {
1530       for (j=0; j<bs; j++) {
1531         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1532         for (k=a->i[i]; k<a->i[i+1]; k++) {
1533           for (l=0; l<bs; l++) {
1534 #if defined(PETSC_USE_COMPLEX)
1535             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1536               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1537                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1538             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1539               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1540                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1541             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1542               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1543             }
1544 #else
1545             if (a->a[bs2*k + l*bs + j] != 0.0) {
1546               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1547             }
1548 #endif
1549           }
1550         }
1551         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1552       }
1553     }
1554     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1555   } else {
1556     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1557     for (i=0; i<a->mbs; i++) {
1558       for (j=0; j<bs; j++) {
1559         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1560         for (k=a->i[i]; k<a->i[i+1]; k++) {
1561           for (l=0; l<bs; l++) {
1562 #if defined(PETSC_USE_COMPLEX)
1563             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1564               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1565                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1566             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1568                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             } else {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1571             }
1572 #else
1573             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1574 #endif
1575           }
1576         }
1577         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1578       }
1579     }
1580     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1581   }
1582   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #include <petscdraw.h>
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1589 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1590 {
1591   Mat               A = (Mat) Aa;
1592   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1593   PetscErrorCode    ierr;
1594   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1595   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1596   MatScalar         *aa;
1597   PetscViewer       viewer;
1598   PetscViewerFormat format;
1599 
1600   PetscFunctionBegin;
1601   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1602   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1603   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1604 
1605   /* loop over matrix elements drawing boxes */
1606 
1607   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1608     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1609     /* Blue for negative, Cyan for zero and  Red for positive */
1610     color = PETSC_DRAW_BLUE;
1611     for (i=0,row=0; i<mbs; i++,row+=bs) {
1612       for (j=a->i[i]; j<a->i[i+1]; j++) {
1613         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1614         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1615         aa  = a->a + j*bs2;
1616         for (k=0; k<bs; k++) {
1617           for (l=0; l<bs; l++) {
1618             if (PetscRealPart(*aa++) >=  0.) continue;
1619             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1620           }
1621         }
1622       }
1623     }
1624     color = PETSC_DRAW_CYAN;
1625     for (i=0,row=0; i<mbs; i++,row+=bs) {
1626       for (j=a->i[i]; j<a->i[i+1]; j++) {
1627         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1628         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1629         aa  = a->a + j*bs2;
1630         for (k=0; k<bs; k++) {
1631           for (l=0; l<bs; l++) {
1632             if (PetscRealPart(*aa++) != 0.) continue;
1633             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1634           }
1635         }
1636       }
1637     }
1638     color = PETSC_DRAW_RED;
1639     for (i=0,row=0; i<mbs; i++,row+=bs) {
1640       for (j=a->i[i]; j<a->i[i+1]; j++) {
1641         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1642         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1643         aa  = a->a + j*bs2;
1644         for (k=0; k<bs; k++) {
1645           for (l=0; l<bs; l++) {
1646             if (PetscRealPart(*aa++) <= 0.) continue;
1647             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1648           }
1649         }
1650       }
1651     }
1652     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1653   } else {
1654     /* use contour shading to indicate magnitude of values */
1655     /* first determine max of all nonzero values */
1656     PetscReal minv = 0.0, maxv = 0.0;
1657     PetscDraw popup;
1658 
1659     for (i=0; i<a->nz*a->bs2; i++) {
1660       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1661     }
1662     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1663     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1664     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1665 
1666     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1667     for (i=0,row=0; i<mbs; i++,row+=bs) {
1668       for (j=a->i[i]; j<a->i[i+1]; j++) {
1669         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1670         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1671         aa  = a->a + j*bs2;
1672         for (k=0; k<bs; k++) {
1673           for (l=0; l<bs; l++) {
1674             MatScalar v = *aa++;
1675             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1676             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1677           }
1678         }
1679       }
1680     }
1681     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1682   }
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1688 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1689 {
1690   PetscErrorCode ierr;
1691   PetscReal      xl,yl,xr,yr,w,h;
1692   PetscDraw      draw;
1693   PetscBool      isnull;
1694 
1695   PetscFunctionBegin;
1696   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1697   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1698   if (isnull) PetscFunctionReturn(0);
1699 
1700   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1701   xr  += w;          yr += h;        xl = -w;     yl = -h;
1702   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1703   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1704   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1705   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1706   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatView_SeqBAIJ"
1712 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1713 {
1714   PetscErrorCode ierr;
1715   PetscBool      iascii,isbinary,isdraw;
1716 
1717   PetscFunctionBegin;
1718   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1719   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1720   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1721   if (iascii) {
1722     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1723   } else if (isbinary) {
1724     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1725   } else if (isdraw) {
1726     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1727   } else {
1728     Mat B;
1729     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1730     ierr = MatView(B,viewer);CHKERRQ(ierr);
1731     ierr = MatDestroy(&B);CHKERRQ(ierr);
1732   }
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 
1737 #undef __FUNCT__
1738 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1739 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1740 {
1741   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1742   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1743   PetscInt    *ai = a->i,*ailen = a->ilen;
1744   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1745   MatScalar   *ap,*aa = a->a;
1746 
1747   PetscFunctionBegin;
1748   for (k=0; k<m; k++) { /* loop over rows */
1749     row = im[k]; brow = row/bs;
1750     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1751     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1752     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1753     nrow = ailen[brow];
1754     for (l=0; l<n; l++) { /* loop over columns */
1755       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1756       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1757       col  = in[l];
1758       bcol = col/bs;
1759       cidx = col%bs;
1760       ridx = row%bs;
1761       high = nrow;
1762       low  = 0; /* assume unsorted */
1763       while (high-low > 5) {
1764         t = (low+high)/2;
1765         if (rp[t] > bcol) high = t;
1766         else             low  = t;
1767       }
1768       for (i=low; i<high; i++) {
1769         if (rp[i] > bcol) break;
1770         if (rp[i] == bcol) {
1771           *v++ = ap[bs2*i+bs*cidx+ridx];
1772           goto finished;
1773         }
1774       }
1775       *v++ = 0.0;
1776 finished:;
1777     }
1778   }
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1784 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1785 {
1786   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1787   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1788   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1789   PetscErrorCode    ierr;
1790   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1791   PetscBool         roworiented=a->roworiented;
1792   const PetscScalar *value     = v;
1793   MatScalar         *ap,*aa = a->a,*bap;
1794 
1795   PetscFunctionBegin;
1796   if (roworiented) {
1797     stepval = (n-1)*bs;
1798   } else {
1799     stepval = (m-1)*bs;
1800   }
1801   for (k=0; k<m; k++) { /* loop over added rows */
1802     row = im[k];
1803     if (row < 0) continue;
1804 #if defined(PETSC_USE_DEBUG)
1805     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1806 #endif
1807     rp   = aj + ai[row];
1808     ap   = aa + bs2*ai[row];
1809     rmax = imax[row];
1810     nrow = ailen[row];
1811     low  = 0;
1812     high = nrow;
1813     for (l=0; l<n; l++) { /* loop over added columns */
1814       if (in[l] < 0) continue;
1815 #if defined(PETSC_USE_DEBUG)
1816       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1817 #endif
1818       col = in[l];
1819       if (roworiented) {
1820         value = v + (k*(stepval+bs) + l)*bs;
1821       } else {
1822         value = v + (l*(stepval+bs) + k)*bs;
1823       }
1824       if (col <= lastcol) low = 0;
1825       else high = nrow;
1826       lastcol = col;
1827       while (high-low > 7) {
1828         t = (low+high)/2;
1829         if (rp[t] > col) high = t;
1830         else             low  = t;
1831       }
1832       for (i=low; i<high; i++) {
1833         if (rp[i] > col) break;
1834         if (rp[i] == col) {
1835           bap = ap +  bs2*i;
1836           if (roworiented) {
1837             if (is == ADD_VALUES) {
1838               for (ii=0; ii<bs; ii++,value+=stepval) {
1839                 for (jj=ii; jj<bs2; jj+=bs) {
1840                   bap[jj] += *value++;
1841                 }
1842               }
1843             } else {
1844               for (ii=0; ii<bs; ii++,value+=stepval) {
1845                 for (jj=ii; jj<bs2; jj+=bs) {
1846                   bap[jj] = *value++;
1847                 }
1848               }
1849             }
1850           } else {
1851             if (is == ADD_VALUES) {
1852               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1853                 for (jj=0; jj<bs; jj++) {
1854                   bap[jj] += value[jj];
1855                 }
1856                 bap += bs;
1857               }
1858             } else {
1859               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1860                 for (jj=0; jj<bs; jj++) {
1861                   bap[jj]  = value[jj];
1862                 }
1863                 bap += bs;
1864               }
1865             }
1866           }
1867           goto noinsert2;
1868         }
1869       }
1870       if (nonew == 1) goto noinsert2;
1871       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1872       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1873       N = nrow++ - 1; high++;
1874       /* shift up all the later entries in this row */
1875       for (ii=N; ii>=i; ii--) {
1876         rp[ii+1] = rp[ii];
1877         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1878       }
1879       if (N >= i) {
1880         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1881       }
1882       rp[i] = col;
1883       bap   = ap +  bs2*i;
1884       if (roworiented) {
1885         for (ii=0; ii<bs; ii++,value+=stepval) {
1886           for (jj=ii; jj<bs2; jj+=bs) {
1887             bap[jj] = *value++;
1888           }
1889         }
1890       } else {
1891         for (ii=0; ii<bs; ii++,value+=stepval) {
1892           for (jj=0; jj<bs; jj++) {
1893             *bap++ = *value++;
1894           }
1895         }
1896       }
1897 noinsert2:;
1898       low = i;
1899     }
1900     ailen[row] = nrow;
1901   }
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 #undef __FUNCT__
1906 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1907 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1908 {
1909   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1910   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1911   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1912   PetscErrorCode ierr;
1913   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1914   MatScalar      *aa  = a->a,*ap;
1915   PetscReal      ratio=0.6;
1916 
1917   PetscFunctionBegin;
1918   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1919 
1920   if (m) rmax = ailen[0];
1921   for (i=1; i<mbs; i++) {
1922     /* move each row back by the amount of empty slots (fshift) before it*/
1923     fshift += imax[i-1] - ailen[i-1];
1924     rmax    = PetscMax(rmax,ailen[i]);
1925     if (fshift) {
1926       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1927       N  = ailen[i];
1928       for (j=0; j<N; j++) {
1929         ip[j-fshift] = ip[j];
1930 
1931         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1932       }
1933     }
1934     ai[i] = ai[i-1] + ailen[i-1];
1935   }
1936   if (mbs) {
1937     fshift += imax[mbs-1] - ailen[mbs-1];
1938     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1939   }
1940 
1941   /* reset ilen and imax for each row */
1942   a->nonzerorowcnt = 0;
1943   for (i=0; i<mbs; i++) {
1944     ailen[i] = imax[i] = ai[i+1] - ai[i];
1945     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1946   }
1947   a->nz = ai[mbs];
1948 
1949   /* diagonals may have moved, so kill the diagonal pointers */
1950   a->idiagvalid = PETSC_FALSE;
1951   if (fshift && a->diag) {
1952     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1953     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1954     a->diag = 0;
1955   }
1956   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1957   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1958   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1959   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1960 
1961   A->info.mallocs    += a->reallocs;
1962   a->reallocs         = 0;
1963   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1964   a->rmax             = rmax;
1965 
1966   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 /*
1971    This function returns an array of flags which indicate the locations of contiguous
1972    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1973    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1974    Assume: sizes should be long enough to hold all the values.
1975 */
1976 #undef __FUNCT__
1977 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1978 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1979 {
1980   PetscInt  i,j,k,row;
1981   PetscBool flg;
1982 
1983   PetscFunctionBegin;
1984   for (i=0,j=0; i<n; j++) {
1985     row = idx[i];
1986     if (row%bs!=0) { /* Not the begining of a block */
1987       sizes[j] = 1;
1988       i++;
1989     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1990       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1991       i++;
1992     } else { /* Begining of the block, so check if the complete block exists */
1993       flg = PETSC_TRUE;
1994       for (k=1; k<bs; k++) {
1995         if (row+k != idx[i+k]) { /* break in the block */
1996           flg = PETSC_FALSE;
1997           break;
1998         }
1999       }
2000       if (flg) { /* No break in the bs */
2001         sizes[j] = bs;
2002         i       += bs;
2003       } else {
2004         sizes[j] = 1;
2005         i++;
2006       }
2007     }
2008   }
2009   *bs_max = j;
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 #undef __FUNCT__
2014 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2015 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2016 {
2017   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2018   PetscErrorCode    ierr;
2019   PetscInt          i,j,k,count,*rows;
2020   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2021   PetscScalar       zero = 0.0;
2022   MatScalar         *aa;
2023   const PetscScalar *xx;
2024   PetscScalar       *bb;
2025 
2026   PetscFunctionBegin;
2027   /* fix right hand side if needed */
2028   if (x && b) {
2029     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2030     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2031     for (i=0; i<is_n; i++) {
2032       bb[is_idx[i]] = diag*xx[is_idx[i]];
2033     }
2034     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2035     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2036   }
2037 
2038   /* Make a copy of the IS and  sort it */
2039   /* allocate memory for rows,sizes */
2040   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2041 
2042   /* copy IS values to rows, and sort them */
2043   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2044   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2045 
2046   if (baij->keepnonzeropattern) {
2047     for (i=0; i<is_n; i++) sizes[i] = 1;
2048     bs_max          = is_n;
2049   } else {
2050     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2051     A->nonzerostate++;
2052   }
2053 
2054   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2055     row = rows[j];
2056     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2057     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2058     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2059     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2060       if (diag != (PetscScalar)0.0) {
2061         if (baij->ilen[row/bs] > 0) {
2062           baij->ilen[row/bs]       = 1;
2063           baij->j[baij->i[row/bs]] = row/bs;
2064 
2065           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2066         }
2067         /* Now insert all the diagonal values for this bs */
2068         for (k=0; k<bs; k++) {
2069           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2070         }
2071       } else { /* (diag == 0.0) */
2072         baij->ilen[row/bs] = 0;
2073       } /* end (diag == 0.0) */
2074     } else { /* (sizes[i] != bs) */
2075 #if defined(PETSC_USE_DEBUG)
2076       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2077 #endif
2078       for (k=0; k<count; k++) {
2079         aa[0] =  zero;
2080         aa   += bs;
2081       }
2082       if (diag != (PetscScalar)0.0) {
2083         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2084       }
2085     }
2086   }
2087 
2088   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2089   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 #undef __FUNCT__
2094 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2095 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2096 {
2097   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2098   PetscErrorCode    ierr;
2099   PetscInt          i,j,k,count;
2100   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2101   PetscScalar       zero = 0.0;
2102   MatScalar         *aa;
2103   const PetscScalar *xx;
2104   PetscScalar       *bb;
2105   PetscBool         *zeroed,vecs = PETSC_FALSE;
2106 
2107   PetscFunctionBegin;
2108   /* fix right hand side if needed */
2109   if (x && b) {
2110     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2111     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2112     vecs = PETSC_TRUE;
2113   }
2114 
2115   /* zero the columns */
2116   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2117   for (i=0; i<is_n; i++) {
2118     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2119     zeroed[is_idx[i]] = PETSC_TRUE;
2120   }
2121   for (i=0; i<A->rmap->N; i++) {
2122     if (!zeroed[i]) {
2123       row = i/bs;
2124       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2125         for (k=0; k<bs; k++) {
2126           col = bs*baij->j[j] + k;
2127           if (zeroed[col]) {
2128             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2129             if (vecs) bb[i] -= aa[0]*xx[col];
2130             aa[0] = 0.0;
2131           }
2132         }
2133       }
2134     } else if (vecs) bb[i] = diag*xx[i];
2135   }
2136   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2137   if (vecs) {
2138     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2139     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2140   }
2141 
2142   /* zero the rows */
2143   for (i=0; i<is_n; i++) {
2144     row   = is_idx[i];
2145     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2146     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2147     for (k=0; k<count; k++) {
2148       aa[0] =  zero;
2149       aa   += bs;
2150     }
2151     if (diag != (PetscScalar)0.0) {
2152       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2153     }
2154   }
2155   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 #undef __FUNCT__
2160 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2161 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2162 {
2163   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2164   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2165   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2166   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2167   PetscErrorCode ierr;
2168   PetscInt       ridx,cidx,bs2=a->bs2;
2169   PetscBool      roworiented=a->roworiented;
2170   MatScalar      *ap,value,*aa=a->a,*bap;
2171 
2172   PetscFunctionBegin;
2173   for (k=0; k<m; k++) { /* loop over added rows */
2174     row  = im[k];
2175     brow = row/bs;
2176     if (row < 0) continue;
2177 #if defined(PETSC_USE_DEBUG)
2178     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2179 #endif
2180     rp   = aj + ai[brow];
2181     ap   = aa + bs2*ai[brow];
2182     rmax = imax[brow];
2183     nrow = ailen[brow];
2184     low  = 0;
2185     high = nrow;
2186     for (l=0; l<n; l++) { /* loop over added columns */
2187       if (in[l] < 0) continue;
2188 #if defined(PETSC_USE_DEBUG)
2189       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2190 #endif
2191       col  = in[l]; bcol = col/bs;
2192       ridx = row % bs; cidx = col % bs;
2193       if (roworiented) {
2194         value = v[l + k*n];
2195       } else {
2196         value = v[k + l*m];
2197       }
2198       if (col <= lastcol) low = 0; else high = nrow;
2199       lastcol = col;
2200       while (high-low > 7) {
2201         t = (low+high)/2;
2202         if (rp[t] > bcol) high = t;
2203         else              low  = t;
2204       }
2205       for (i=low; i<high; i++) {
2206         if (rp[i] > bcol) break;
2207         if (rp[i] == bcol) {
2208           bap = ap +  bs2*i + bs*cidx + ridx;
2209           if (is == ADD_VALUES) *bap += value;
2210           else                  *bap  = value;
2211           goto noinsert1;
2212         }
2213       }
2214       if (nonew == 1) goto noinsert1;
2215       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2216       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2217       N = nrow++ - 1; high++;
2218       /* shift up all the later entries in this row */
2219       for (ii=N; ii>=i; ii--) {
2220         rp[ii+1] = rp[ii];
2221         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2222       }
2223       if (N>=i) {
2224         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2225       }
2226       rp[i]                      = bcol;
2227       ap[bs2*i + bs*cidx + ridx] = value;
2228       a->nz++;
2229       A->nonzerostate++;
2230 noinsert1:;
2231       low = i;
2232     }
2233     ailen[brow] = nrow;
2234   }
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2240 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2241 {
2242   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2243   Mat            outA;
2244   PetscErrorCode ierr;
2245   PetscBool      row_identity,col_identity;
2246 
2247   PetscFunctionBegin;
2248   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2249   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2250   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2251   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2252 
2253   outA            = inA;
2254   inA->factortype = MAT_FACTOR_LU;
2255   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2256   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2257 
2258   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2259 
2260   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2261   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2262   a->row = row;
2263   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2264   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2265   a->col = col;
2266 
2267   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2268   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2269   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2270   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2271 
2272   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2273   if (!a->solve_work) {
2274     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2275     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2276   }
2277   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2283 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2284 {
2285   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2286   PetscInt    i,nz,mbs;
2287 
2288   PetscFunctionBegin;
2289   nz  = baij->maxnz;
2290   mbs = baij->mbs;
2291   for (i=0; i<nz; i++) {
2292     baij->j[i] = indices[i];
2293   }
2294   baij->nz = nz;
2295   for (i=0; i<mbs; i++) {
2296     baij->ilen[i] = baij->imax[i];
2297   }
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 #undef __FUNCT__
2302 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2303 /*@
2304     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2305        in the matrix.
2306 
2307   Input Parameters:
2308 +  mat - the SeqBAIJ matrix
2309 -  indices - the column indices
2310 
2311   Level: advanced
2312 
2313   Notes:
2314     This can be called if you have precomputed the nonzero structure of the
2315   matrix and want to provide it to the matrix object to improve the performance
2316   of the MatSetValues() operation.
2317 
2318     You MUST have set the correct numbers of nonzeros per row in the call to
2319   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2320 
2321     MUST be called before any calls to MatSetValues();
2322 
2323 @*/
2324 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2325 {
2326   PetscErrorCode ierr;
2327 
2328   PetscFunctionBegin;
2329   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2330   PetscValidPointer(indices,2);
2331   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2332   PetscFunctionReturn(0);
2333 }
2334 
2335 #undef __FUNCT__
2336 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2337 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2338 {
2339   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2340   PetscErrorCode ierr;
2341   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2342   PetscReal      atmp;
2343   PetscScalar    *x,zero = 0.0;
2344   MatScalar      *aa;
2345   PetscInt       ncols,brow,krow,kcol;
2346 
2347   PetscFunctionBegin;
2348   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2349   bs  = A->rmap->bs;
2350   aa  = a->a;
2351   ai  = a->i;
2352   aj  = a->j;
2353   mbs = a->mbs;
2354 
2355   ierr = VecSet(v,zero);CHKERRQ(ierr);
2356   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2357   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2358   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2359   for (i=0; i<mbs; i++) {
2360     ncols = ai[1] - ai[0]; ai++;
2361     brow  = bs*i;
2362     for (j=0; j<ncols; j++) {
2363       for (kcol=0; kcol<bs; kcol++) {
2364         for (krow=0; krow<bs; krow++) {
2365           atmp = PetscAbsScalar(*aa);aa++;
2366           row  = brow + krow;   /* row index */
2367           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2368         }
2369       }
2370       aj++;
2371     }
2372   }
2373   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 #undef __FUNCT__
2378 #define __FUNCT__ "MatCopy_SeqBAIJ"
2379 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2380 {
2381   PetscErrorCode ierr;
2382 
2383   PetscFunctionBegin;
2384   /* If the two matrices have the same copy implementation, use fast copy. */
2385   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2386     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2387     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2388     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2389 
2390     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2391     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2392     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2393   } else {
2394     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2395   }
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2401 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2402 {
2403   PetscErrorCode ierr;
2404 
2405   PetscFunctionBegin;
2406   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 #undef __FUNCT__
2411 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2412 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2413 {
2414   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2415 
2416   PetscFunctionBegin;
2417   *array = a->a;
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2423 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2424 {
2425   PetscFunctionBegin;
2426   PetscFunctionReturn(0);
2427 }
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2431 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2432 {
2433   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2434   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2435   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2436   PetscErrorCode ierr;
2437 
2438   PetscFunctionBegin;
2439   /* Set the number of nonzeros in the new matrix */
2440   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2441   PetscFunctionReturn(0);
2442 }
2443 
2444 #undef __FUNCT__
2445 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2446 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2447 {
2448   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2449   PetscErrorCode ierr;
2450   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2451   PetscBLASInt   one=1;
2452 
2453   PetscFunctionBegin;
2454   if (str == SAME_NONZERO_PATTERN) {
2455     PetscScalar  alpha = a;
2456     PetscBLASInt bnz;
2457     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2458     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2459     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2460   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2461     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2462   } else {
2463     Mat      B;
2464     PetscInt *nnz;
2465     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2466     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2467     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2468     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2469     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2470     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2471     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2472     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2473     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2474     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2475     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2476     ierr = PetscFree(nnz);CHKERRQ(ierr);
2477   }
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2483 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2484 {
2485   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2486   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2487   MatScalar   *aa = a->a;
2488 
2489   PetscFunctionBegin;
2490   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 #undef __FUNCT__
2495 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2496 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2497 {
2498   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2499   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2500   MatScalar   *aa = a->a;
2501 
2502   PetscFunctionBegin;
2503   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2504   PetscFunctionReturn(0);
2505 }
2506 
2507 #undef __FUNCT__
2508 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2509 /*
2510     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2511 */
2512 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2513 {
2514   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2515   PetscErrorCode ierr;
2516   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2517   PetscInt       nz = a->i[m],row,*jj,mr,col;
2518 
2519   PetscFunctionBegin;
2520   *nn = n;
2521   if (!ia) PetscFunctionReturn(0);
2522   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2523   else {
2524     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2525     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2526     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2527     jj   = a->j;
2528     for (i=0; i<nz; i++) {
2529       collengths[jj[i]]++;
2530     }
2531     cia[0] = oshift;
2532     for (i=0; i<n; i++) {
2533       cia[i+1] = cia[i] + collengths[i];
2534     }
2535     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2536     jj   = a->j;
2537     for (row=0; row<m; row++) {
2538       mr = a->i[row+1] - a->i[row];
2539       for (i=0; i<mr; i++) {
2540         col = *jj++;
2541 
2542         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2543       }
2544     }
2545     ierr = PetscFree(collengths);CHKERRQ(ierr);
2546     *ia  = cia; *ja = cja;
2547   }
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 #undef __FUNCT__
2552 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2553 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2554 {
2555   PetscErrorCode ierr;
2556 
2557   PetscFunctionBegin;
2558   if (!ia) PetscFunctionReturn(0);
2559   ierr = PetscFree(*ia);CHKERRQ(ierr);
2560   ierr = PetscFree(*ja);CHKERRQ(ierr);
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 /*
2565  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2566  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2567  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2568  */
2569 #undef __FUNCT__
2570 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2571 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2572 {
2573   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2574   PetscErrorCode ierr;
2575   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2576   PetscInt       nz = a->i[m],row,*jj,mr,col;
2577   PetscInt       *cspidx;
2578 
2579   PetscFunctionBegin;
2580   *nn = n;
2581   if (!ia) PetscFunctionReturn(0);
2582 
2583   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2584   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2585   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2586   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2587   jj   = a->j;
2588   for (i=0; i<nz; i++) {
2589     collengths[jj[i]]++;
2590   }
2591   cia[0] = oshift;
2592   for (i=0; i<n; i++) {
2593     cia[i+1] = cia[i] + collengths[i];
2594   }
2595   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2596   jj   = a->j;
2597   for (row=0; row<m; row++) {
2598     mr = a->i[row+1] - a->i[row];
2599     for (i=0; i<mr; i++) {
2600       col = *jj++;
2601       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2602       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2603     }
2604   }
2605   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2606   *ia    = cia; *ja = cja;
2607   *spidx = cspidx;
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 #undef __FUNCT__
2612 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2613 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2614 {
2615   PetscErrorCode ierr;
2616 
2617   PetscFunctionBegin;
2618   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2619   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2620   PetscFunctionReturn(0);
2621 }
2622 
2623 #undef __FUNCT__
2624 #define __FUNCT__ "MatShift_SeqBAIJ"
2625 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2626 {
2627   PetscErrorCode ierr;
2628   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2629 
2630   PetscFunctionBegin;
2631   if (!Y->preallocated || !aij->nz) {
2632     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2633   }
2634   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2635   PetscFunctionReturn(0);
2636 }
2637 
2638 /* -------------------------------------------------------------------*/
2639 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2640                                        MatGetRow_SeqBAIJ,
2641                                        MatRestoreRow_SeqBAIJ,
2642                                        MatMult_SeqBAIJ_N,
2643                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2644                                        MatMultTranspose_SeqBAIJ,
2645                                        MatMultTransposeAdd_SeqBAIJ,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                /* 10*/ 0,
2650                                        MatLUFactor_SeqBAIJ,
2651                                        0,
2652                                        0,
2653                                        MatTranspose_SeqBAIJ,
2654                                /* 15*/ MatGetInfo_SeqBAIJ,
2655                                        MatEqual_SeqBAIJ,
2656                                        MatGetDiagonal_SeqBAIJ,
2657                                        MatDiagonalScale_SeqBAIJ,
2658                                        MatNorm_SeqBAIJ,
2659                                /* 20*/ 0,
2660                                        MatAssemblyEnd_SeqBAIJ,
2661                                        MatSetOption_SeqBAIJ,
2662                                        MatZeroEntries_SeqBAIJ,
2663                                /* 24*/ MatZeroRows_SeqBAIJ,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                /* 29*/ MatSetUp_SeqBAIJ,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                /* 34*/ MatDuplicate_SeqBAIJ,
2674                                        0,
2675                                        0,
2676                                        MatILUFactor_SeqBAIJ,
2677                                        0,
2678                                /* 39*/ MatAXPY_SeqBAIJ,
2679                                        MatGetSubMatrices_SeqBAIJ,
2680                                        MatIncreaseOverlap_SeqBAIJ,
2681                                        MatGetValues_SeqBAIJ,
2682                                        MatCopy_SeqBAIJ,
2683                                /* 44*/ 0,
2684                                        MatScale_SeqBAIJ,
2685                                        MatShift_SeqBAIJ,
2686                                        0,
2687                                        MatZeroRowsColumns_SeqBAIJ,
2688                                /* 49*/ 0,
2689                                        MatGetRowIJ_SeqBAIJ,
2690                                        MatRestoreRowIJ_SeqBAIJ,
2691                                        MatGetColumnIJ_SeqBAIJ,
2692                                        MatRestoreColumnIJ_SeqBAIJ,
2693                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                        MatSetValuesBlocked_SeqBAIJ,
2698                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2699                                        MatDestroy_SeqBAIJ,
2700                                        MatView_SeqBAIJ,
2701                                        0,
2702                                        0,
2703                                /* 64*/ 0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2709                                        0,
2710                                        MatConvert_Basic,
2711                                        0,
2712                                        0,
2713                                /* 74*/ 0,
2714                                        MatFDColoringApply_BAIJ,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                /* 79*/ 0,
2719                                        0,
2720                                        0,
2721                                        0,
2722                                        MatLoad_SeqBAIJ,
2723                                /* 84*/ 0,
2724                                        0,
2725                                        0,
2726                                        0,
2727                                        0,
2728                                /* 89*/ 0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                        0,
2733                                /* 94*/ 0,
2734                                        0,
2735                                        0,
2736                                        0,
2737                                        0,
2738                                /* 99*/ 0,
2739                                        0,
2740                                        0,
2741                                        0,
2742                                        0,
2743                                /*104*/ 0,
2744                                        MatRealPart_SeqBAIJ,
2745                                        MatImaginaryPart_SeqBAIJ,
2746                                        0,
2747                                        0,
2748                                /*109*/ 0,
2749                                        0,
2750                                        0,
2751                                        0,
2752                                        MatMissingDiagonal_SeqBAIJ,
2753                                /*114*/ 0,
2754                                        0,
2755                                        0,
2756                                        0,
2757                                        0,
2758                                /*119*/ 0,
2759                                        0,
2760                                        MatMultHermitianTranspose_SeqBAIJ,
2761                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2762                                        0,
2763                                /*124*/ 0,
2764                                        0,
2765                                        MatInvertBlockDiagonal_SeqBAIJ,
2766                                        0,
2767                                        0,
2768                                /*129*/ 0,
2769                                        0,
2770                                        0,
2771                                        0,
2772                                        0,
2773                                /*134*/ 0,
2774                                        0,
2775                                        0,
2776                                        0,
2777                                        0,
2778                                /*139*/ 0,
2779                                        0,
2780                                        0,
2781                                        MatFDColoringSetUp_SeqXAIJ,
2782                                        0,
2783                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2784 };
2785 
2786 #undef __FUNCT__
2787 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2788 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2789 {
2790   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2791   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2792   PetscErrorCode ierr;
2793 
2794   PetscFunctionBegin;
2795   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2796 
2797   /* allocate space for values if not already there */
2798   if (!aij->saved_values) {
2799     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2800     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2801   }
2802 
2803   /* copy values over */
2804   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 #undef __FUNCT__
2809 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2810 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2811 {
2812   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2813   PetscErrorCode ierr;
2814   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2815 
2816   PetscFunctionBegin;
2817   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2818   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2819 
2820   /* copy values over */
2821   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2822   PetscFunctionReturn(0);
2823 }
2824 
2825 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2826 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2827 
2828 #undef __FUNCT__
2829 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2830 static PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2831 {
2832   Mat_SeqBAIJ    *b;
2833   PetscErrorCode ierr;
2834   PetscInt       i,mbs,nbs,bs2;
2835   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2836 
2837   PetscFunctionBegin;
2838   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2839   if (nz == MAT_SKIP_ALLOCATION) {
2840     skipallocation = PETSC_TRUE;
2841     nz             = 0;
2842   }
2843 
2844   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2845   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2846   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2847   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2848 
2849   B->preallocated = PETSC_TRUE;
2850 
2851   mbs = B->rmap->n/bs;
2852   nbs = B->cmap->n/bs;
2853   bs2 = bs*bs;
2854 
2855   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2856 
2857   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2858   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2859   if (nnz) {
2860     for (i=0; i<mbs; i++) {
2861       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2862       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2863     }
2864   }
2865 
2866   b    = (Mat_SeqBAIJ*)B->data;
2867   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2868   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2869   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2870 
2871   if (!flg) {
2872     switch (bs) {
2873     case 1:
2874       B->ops->mult    = MatMult_SeqBAIJ_1;
2875       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2876       break;
2877     case 2:
2878       B->ops->mult    = MatMult_SeqBAIJ_2;
2879       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2880       break;
2881     case 3:
2882       B->ops->mult    = MatMult_SeqBAIJ_3;
2883       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2884       break;
2885     case 4:
2886       B->ops->mult    = MatMult_SeqBAIJ_4;
2887       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2888       break;
2889     case 5:
2890       B->ops->mult    = MatMult_SeqBAIJ_5;
2891       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2892       break;
2893     case 6:
2894       B->ops->mult    = MatMult_SeqBAIJ_6;
2895       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2896       break;
2897     case 7:
2898       B->ops->mult    = MatMult_SeqBAIJ_7;
2899       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2900       break;
2901     case 15:
2902       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2903       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2904       break;
2905     default:
2906       B->ops->mult    = MatMult_SeqBAIJ_N;
2907       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2908       break;
2909     }
2910   }
2911   B->ops->sor = MatSOR_SeqBAIJ;
2912   b->mbs = mbs;
2913   b->nbs = nbs;
2914   if (!skipallocation) {
2915     if (!b->imax) {
2916       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2917       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2918 
2919       b->free_imax_ilen = PETSC_TRUE;
2920     }
2921     /* b->ilen will count nonzeros in each block row so far. */
2922     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2923     if (!nnz) {
2924       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2925       else if (nz < 0) nz = 1;
2926       for (i=0; i<mbs; i++) b->imax[i] = nz;
2927       nz = nz*mbs;
2928     } else {
2929       nz = 0;
2930       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2931     }
2932 
2933     /* allocate the matrix space */
2934     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2935     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2936     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2937     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2938     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2939 
2940     b->singlemalloc = PETSC_TRUE;
2941     b->i[0]         = 0;
2942     for (i=1; i<mbs+1; i++) {
2943       b->i[i] = b->i[i-1] + b->imax[i-1];
2944     }
2945     b->free_a  = PETSC_TRUE;
2946     b->free_ij = PETSC_TRUE;
2947   } else {
2948     b->free_a  = PETSC_FALSE;
2949     b->free_ij = PETSC_FALSE;
2950   }
2951 
2952   b->bs2              = bs2;
2953   b->mbs              = mbs;
2954   b->nz               = 0;
2955   b->maxnz            = nz;
2956   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2957   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2958   PetscFunctionReturn(0);
2959 }
2960 
2961 #undef __FUNCT__
2962 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2963 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2964 {
2965   PetscInt       i,m,nz,nz_max=0,*nnz;
2966   PetscScalar    *values=0;
2967   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2968   PetscErrorCode ierr;
2969 
2970   PetscFunctionBegin;
2971   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2972   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2973   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2974   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2975   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2976   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2977   m    = B->rmap->n/bs;
2978 
2979   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2980   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2981   for (i=0; i<m; i++) {
2982     nz = ii[i+1]- ii[i];
2983     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2984     nz_max = PetscMax(nz_max, nz);
2985     nnz[i] = nz;
2986   }
2987   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2988   ierr = PetscFree(nnz);CHKERRQ(ierr);
2989 
2990   values = (PetscScalar*)V;
2991   if (!values) {
2992     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2993   }
2994   for (i=0; i<m; i++) {
2995     PetscInt          ncols  = ii[i+1] - ii[i];
2996     const PetscInt    *icols = jj + ii[i];
2997     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2998     if (!roworiented) {
2999       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3000     } else {
3001       PetscInt j;
3002       for (j=0; j<ncols; j++) {
3003         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3004         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3005       }
3006     }
3007   }
3008   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3009   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3010   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3011   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3012   PetscFunctionReturn(0);
3013 }
3014 
3015 /*MC
3016    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3017    block sparse compressed row format.
3018 
3019    Options Database Keys:
3020 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3021 
3022   Level: beginner
3023 
3024 .seealso: MatCreateSeqBAIJ()
3025 M*/
3026 
3027 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3028 
3029 #undef __FUNCT__
3030 #define __FUNCT__ "MatCreate_SeqBAIJ"
3031 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3032 {
3033   PetscErrorCode ierr;
3034   PetscMPIInt    size;
3035   Mat_SeqBAIJ    *b;
3036 
3037   PetscFunctionBegin;
3038   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3039   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3040 
3041   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3042   B->data = (void*)b;
3043   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3044 
3045   b->row          = 0;
3046   b->col          = 0;
3047   b->icol         = 0;
3048   b->reallocs     = 0;
3049   b->saved_values = 0;
3050 
3051   b->roworiented        = PETSC_TRUE;
3052   b->nonew              = 0;
3053   b->diag               = 0;
3054   B->spptr              = 0;
3055   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3056   b->keepnonzeropattern = PETSC_FALSE;
3057 
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3067   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3073 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3074 {
3075   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3076   PetscErrorCode ierr;
3077   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3078 
3079   PetscFunctionBegin;
3080   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3081 
3082   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3083     c->imax           = a->imax;
3084     c->ilen           = a->ilen;
3085     c->free_imax_ilen = PETSC_FALSE;
3086   } else {
3087     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3088     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3089     for (i=0; i<mbs; i++) {
3090       c->imax[i] = a->imax[i];
3091       c->ilen[i] = a->ilen[i];
3092     }
3093     c->free_imax_ilen = PETSC_TRUE;
3094   }
3095 
3096   /* allocate the matrix space */
3097   if (mallocmatspace) {
3098     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3099       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3100       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3101 
3102       c->i            = a->i;
3103       c->j            = a->j;
3104       c->singlemalloc = PETSC_FALSE;
3105       c->free_a       = PETSC_TRUE;
3106       c->free_ij      = PETSC_FALSE;
3107       c->parent       = A;
3108       C->preallocated = PETSC_TRUE;
3109       C->assembled    = PETSC_TRUE;
3110 
3111       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3112       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3113       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3114     } else {
3115       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3116       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3117 
3118       c->singlemalloc = PETSC_TRUE;
3119       c->free_a       = PETSC_TRUE;
3120       c->free_ij      = PETSC_TRUE;
3121 
3122       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3123       if (mbs > 0) {
3124         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3125         if (cpvalues == MAT_COPY_VALUES) {
3126           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3127         } else {
3128           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3129         }
3130       }
3131       C->preallocated = PETSC_TRUE;
3132       C->assembled    = PETSC_TRUE;
3133     }
3134   }
3135 
3136   c->roworiented = a->roworiented;
3137   c->nonew       = a->nonew;
3138 
3139   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3140   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3141 
3142   c->bs2         = a->bs2;
3143   c->mbs         = a->mbs;
3144   c->nbs         = a->nbs;
3145 
3146   if (a->diag) {
3147     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3148       c->diag      = a->diag;
3149       c->free_diag = PETSC_FALSE;
3150     } else {
3151       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3152       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3153       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3154       c->free_diag = PETSC_TRUE;
3155     }
3156   } else c->diag = 0;
3157 
3158   c->nz         = a->nz;
3159   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3160   c->solve_work = NULL;
3161   c->mult_work  = NULL;
3162   c->sor_workt  = NULL;
3163   c->sor_work   = NULL;
3164 
3165   c->compressedrow.use   = a->compressedrow.use;
3166   c->compressedrow.nrows = a->compressedrow.nrows;
3167   if (a->compressedrow.use) {
3168     i    = a->compressedrow.nrows;
3169     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3170     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3171     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3172     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3173   } else {
3174     c->compressedrow.use    = PETSC_FALSE;
3175     c->compressedrow.i      = NULL;
3176     c->compressedrow.rindex = NULL;
3177   }
3178   C->nonzerostate = A->nonzerostate;
3179 
3180   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3181   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3182   PetscFunctionReturn(0);
3183 }
3184 
3185 #undef __FUNCT__
3186 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3187 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3188 {
3189   PetscErrorCode ierr;
3190 
3191   PetscFunctionBegin;
3192   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3193   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3194   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3195   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3196   PetscFunctionReturn(0);
3197 }
3198 
3199 #undef __FUNCT__
3200 #define __FUNCT__ "MatLoad_SeqBAIJ"
3201 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3202 {
3203   Mat_SeqBAIJ    *a;
3204   PetscErrorCode ierr;
3205   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3206   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3207   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3208   PetscInt       *masked,nmask,tmp,bs2,ishift;
3209   PetscMPIInt    size;
3210   int            fd;
3211   PetscScalar    *aa;
3212   MPI_Comm       comm;
3213 
3214   PetscFunctionBegin;
3215   /* force binary viewer to load .info file if it has not yet done so */
3216   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3217   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3218   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3219   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3220   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3221   if (bs < 0) bs = 1;
3222   bs2  = bs*bs;
3223 
3224   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3225   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3226   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3227   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3228   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3229   M = header[1]; N = header[2]; nz = header[3];
3230 
3231   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3232   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3233 
3234   /*
3235      This code adds extra rows to make sure the number of rows is
3236     divisible by the blocksize
3237   */
3238   mbs        = M/bs;
3239   extra_rows = bs - M + bs*(mbs);
3240   if (extra_rows == bs) extra_rows = 0;
3241   else mbs++;
3242   if (extra_rows) {
3243     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3244   }
3245 
3246   /* Set global sizes if not already set */
3247   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3248     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3249   } else { /* Check if the matrix global sizes are correct */
3250     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3251     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3252       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3253     }
3254     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3255   }
3256 
3257   /* read in row lengths */
3258   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3259   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3260   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3261 
3262   /* read in column indices */
3263   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3264   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3265   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3266 
3267   /* loop over row lengths determining block row lengths */
3268   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3269   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3270   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3271   rowcount = 0;
3272   nzcount  = 0;
3273   for (i=0; i<mbs; i++) {
3274     nmask = 0;
3275     for (j=0; j<bs; j++) {
3276       kmax = rowlengths[rowcount];
3277       for (k=0; k<kmax; k++) {
3278         tmp = jj[nzcount++]/bs;
3279         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3280       }
3281       rowcount++;
3282     }
3283     browlengths[i] += nmask;
3284     /* zero out the mask elements we set */
3285     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3286   }
3287 
3288   /* Do preallocation  */
3289   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3290   a    = (Mat_SeqBAIJ*)newmat->data;
3291 
3292   /* set matrix "i" values */
3293   a->i[0] = 0;
3294   for (i=1; i<= mbs; i++) {
3295     a->i[i]      = a->i[i-1] + browlengths[i-1];
3296     a->ilen[i-1] = browlengths[i-1];
3297   }
3298   a->nz = 0;
3299   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3300 
3301   /* read in nonzero values */
3302   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3303   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3304   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3305 
3306   /* set "a" and "j" values into matrix */
3307   nzcount = 0; jcount = 0;
3308   for (i=0; i<mbs; i++) {
3309     nzcountb = nzcount;
3310     nmask    = 0;
3311     for (j=0; j<bs; j++) {
3312       kmax = rowlengths[i*bs+j];
3313       for (k=0; k<kmax; k++) {
3314         tmp = jj[nzcount++]/bs;
3315         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3316       }
3317     }
3318     /* sort the masked values */
3319     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3320 
3321     /* set "j" values into matrix */
3322     maskcount = 1;
3323     for (j=0; j<nmask; j++) {
3324       a->j[jcount++]  = masked[j];
3325       mask[masked[j]] = maskcount++;
3326     }
3327     /* set "a" values into matrix */
3328     ishift = bs2*a->i[i];
3329     for (j=0; j<bs; j++) {
3330       kmax = rowlengths[i*bs+j];
3331       for (k=0; k<kmax; k++) {
3332         tmp       = jj[nzcountb]/bs;
3333         block     = mask[tmp] - 1;
3334         point     = jj[nzcountb] - bs*tmp;
3335         idx       = ishift + bs2*block + j + bs*point;
3336         a->a[idx] = (MatScalar)aa[nzcountb++];
3337       }
3338     }
3339     /* zero out the mask elements we set */
3340     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3341   }
3342   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3343 
3344   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3345   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3346   ierr = PetscFree(aa);CHKERRQ(ierr);
3347   ierr = PetscFree(jj);CHKERRQ(ierr);
3348   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3349 
3350   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3351   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3352   PetscFunctionReturn(0);
3353 }
3354 
3355 #undef __FUNCT__
3356 #define __FUNCT__ "MatCreateSeqBAIJ"
3357 /*@C
3358    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3359    compressed row) format.  For good matrix assembly performance the
3360    user should preallocate the matrix storage by setting the parameter nz
3361    (or the array nnz).  By setting these parameters accurately, performance
3362    during matrix assembly can be increased by more than a factor of 50.
3363 
3364    Collective on MPI_Comm
3365 
3366    Input Parameters:
3367 +  comm - MPI communicator, set to PETSC_COMM_SELF
3368 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3369           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3370 .  m - number of rows
3371 .  n - number of columns
3372 .  nz - number of nonzero blocks  per block row (same for all rows)
3373 -  nnz - array containing the number of nonzero blocks in the various block rows
3374          (possibly different for each block row) or NULL
3375 
3376    Output Parameter:
3377 .  A - the matrix
3378 
3379    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3380    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3381    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3382 
3383    Options Database Keys:
3384 .   -mat_no_unroll - uses code that does not unroll the loops in the
3385                      block calculations (much slower)
3386 .    -mat_block_size - size of the blocks to use
3387 
3388    Level: intermediate
3389 
3390    Notes:
3391    The number of rows and columns must be divisible by blocksize.
3392 
3393    If the nnz parameter is given then the nz parameter is ignored
3394 
3395    A nonzero block is any block that as 1 or more nonzeros in it
3396 
3397    The block AIJ format is fully compatible with standard Fortran 77
3398    storage.  That is, the stored row and column indices can begin at
3399    either one (as in Fortran) or zero.  See the users' manual for details.
3400 
3401    Specify the preallocated storage with either nz or nnz (not both).
3402    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3403    allocation.  See Users-Manual: ch_mat for details.
3404    matrices.
3405 
3406 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3407 @*/
3408 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3409 {
3410   PetscErrorCode ierr;
3411 
3412   PetscFunctionBegin;
3413   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3414   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3415   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3416   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3417   PetscFunctionReturn(0);
3418 }
3419 
3420 #undef __FUNCT__
3421 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3422 /*@C
3423    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3424    per row in the matrix. For good matrix assembly performance the
3425    user should preallocate the matrix storage by setting the parameter nz
3426    (or the array nnz).  By setting these parameters accurately, performance
3427    during matrix assembly can be increased by more than a factor of 50.
3428 
3429    Collective on MPI_Comm
3430 
3431    Input Parameters:
3432 +  B - the matrix
3433 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3434           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3435 .  nz - number of block nonzeros per block row (same for all rows)
3436 -  nnz - array containing the number of block nonzeros in the various block rows
3437          (possibly different for each block row) or NULL
3438 
3439    Options Database Keys:
3440 .   -mat_no_unroll - uses code that does not unroll the loops in the
3441                      block calculations (much slower)
3442 .   -mat_block_size - size of the blocks to use
3443 
3444    Level: intermediate
3445 
3446    Notes:
3447    If the nnz parameter is given then the nz parameter is ignored
3448 
3449    You can call MatGetInfo() to get information on how effective the preallocation was;
3450    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3451    You can also run with the option -info and look for messages with the string
3452    malloc in them to see if additional memory allocation was needed.
3453 
3454    The block AIJ format is fully compatible with standard Fortran 77
3455    storage.  That is, the stored row and column indices can begin at
3456    either one (as in Fortran) or zero.  See the users' manual for details.
3457 
3458    Specify the preallocated storage with either nz or nnz (not both).
3459    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3460    allocation.  See Users-Manual: ch_mat for details.
3461 
3462 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3463 @*/
3464 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3465 {
3466   PetscErrorCode ierr;
3467 
3468   PetscFunctionBegin;
3469   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3470   PetscValidType(B,1);
3471   PetscValidLogicalCollectiveInt(B,bs,2);
3472   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3473   PetscFunctionReturn(0);
3474 }
3475 
3476 #undef __FUNCT__
3477 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3478 /*@C
3479    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3480    (the default sequential PETSc format).
3481 
3482    Collective on MPI_Comm
3483 
3484    Input Parameters:
3485 +  B - the matrix
3486 .  i - the indices into j for the start of each local row (starts with zero)
3487 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3488 -  v - optional values in the matrix
3489 
3490    Level: developer
3491 
3492    Notes:
3493    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3494    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3495    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3496    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3497    block column and the second index is over columns within a block.
3498 
3499 .keywords: matrix, aij, compressed row, sparse
3500 
3501 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3502 @*/
3503 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3504 {
3505   PetscErrorCode ierr;
3506 
3507   PetscFunctionBegin;
3508   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3509   PetscValidType(B,1);
3510   PetscValidLogicalCollectiveInt(B,bs,2);
3511   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3512   PetscFunctionReturn(0);
3513 }
3514 
3515 
3516 #undef __FUNCT__
3517 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3518 /*@
3519      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3520 
3521      Collective on MPI_Comm
3522 
3523    Input Parameters:
3524 +  comm - must be an MPI communicator of size 1
3525 .  bs - size of block
3526 .  m - number of rows
3527 .  n - number of columns
3528 .  i - row indices
3529 .  j - column indices
3530 -  a - matrix values
3531 
3532    Output Parameter:
3533 .  mat - the matrix
3534 
3535    Level: advanced
3536 
3537    Notes:
3538        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3539     once the matrix is destroyed
3540 
3541        You cannot set new nonzero locations into this matrix, that will generate an error.
3542 
3543        The i and j indices are 0 based
3544 
3545        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3546 
3547       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3548       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3549       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3550       with column-major ordering within blocks.
3551 
3552 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3553 
3554 @*/
3555 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3556 {
3557   PetscErrorCode ierr;
3558   PetscInt       ii;
3559   Mat_SeqBAIJ    *baij;
3560 
3561   PetscFunctionBegin;
3562   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3563   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3564 
3565   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3566   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3567   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3568   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3569   baij = (Mat_SeqBAIJ*)(*mat)->data;
3570   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3571   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3572 
3573   baij->i = i;
3574   baij->j = j;
3575   baij->a = a;
3576 
3577   baij->singlemalloc = PETSC_FALSE;
3578   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3579   baij->free_a       = PETSC_FALSE;
3580   baij->free_ij      = PETSC_FALSE;
3581 
3582   for (ii=0; ii<m; ii++) {
3583     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3584 #if defined(PETSC_USE_DEBUG)
3585     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3586 #endif
3587   }
3588 #if defined(PETSC_USE_DEBUG)
3589   for (ii=0; ii<baij->i[m]; ii++) {
3590     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3591     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3592   }
3593 #endif
3594 
3595   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3596   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3597   PetscFunctionReturn(0);
3598 }
3599 
3600 #undef __FUNCT__
3601 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3602 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3603 {
3604   PetscErrorCode ierr;
3605 
3606   PetscFunctionBegin;
3607   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3608   PetscFunctionReturn(0);
3609 }
3610