xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 00d931fe9835bef04c3bcd2a9a1bf118d64cc4c2)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
21 
22   PetscFunctionBegin;
23   allowzeropivot = PetscNot(A->erroriffailure);
24 
25   if (a->idiagvalid) {
26     if (values) *values = a->idiag;
27     PetscFunctionReturn(0);
28   }
29   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
30   diag_offset = a->diag;
31   if (!a->idiag) {
32     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
33     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
34   }
35   diag  = a->idiag;
36   mdiag = a->idiag+bs2*mbs;
37   if (values) *values = a->idiag;
38   /* factor and invert each block */
39   switch (bs) {
40   case 1:
41     for (i=0; i<mbs; i++) {
42       odiag    = v + 1*diag_offset[i];
43       diag[0]  = odiag[0];
44       mdiag[0] = odiag[0];
45 
46       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
47         if (allowzeropivot) {
48           A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
49           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
50         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",i);
51       }
52 
53       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
54       diag    += 1;
55       mdiag   += 1;
56     }
57     break;
58   case 2:
59     for (i=0; i<mbs; i++) {
60       odiag    = v + 4*diag_offset[i];
61       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
62       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
63       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
64       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
65       diag    += 4;
66       mdiag   += 4;
67     }
68     break;
69   case 3:
70     for (i=0; i<mbs; i++) {
71       odiag    = v + 9*diag_offset[i];
72       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
73       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
74       diag[8]  = odiag[8];
75       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
76       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
77       mdiag[8] = odiag[8];
78       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
79       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
80       diag    += 9;
81       mdiag   += 9;
82     }
83     break;
84   case 4:
85     for (i=0; i<mbs; i++) {
86       odiag  = v + 16*diag_offset[i];
87       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
88       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92       mdiag += 16;
93     }
94     break;
95   case 5:
96     for (i=0; i<mbs; i++) {
97       odiag  = v + 25*diag_offset[i];
98       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
99       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
100       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
101       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
102       diag  += 25;
103       mdiag += 25;
104     }
105     break;
106   case 6:
107     for (i=0; i<mbs; i++) {
108       odiag  = v + 36*diag_offset[i];
109       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
110       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
111       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
112       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
113       diag  += 36;
114       mdiag += 36;
115     }
116     break;
117   case 7:
118     for (i=0; i<mbs; i++) {
119       odiag  = v + 49*diag_offset[i];
120       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
121       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
122       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
123       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
124       diag  += 49;
125       mdiag += 49;
126     }
127     break;
128   default:
129     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
130     for (i=0; i<mbs; i++) {
131       odiag  = v + bs2*diag_offset[i];
132       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
133       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
134       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
135       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
136       diag  += bs2;
137       mdiag += bs2;
138     }
139     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
140   }
141   a->idiagvalid = PETSC_TRUE;
142   PetscFunctionReturn(0);
143 }
144 
145 #undef __FUNCT__
146 #define __FUNCT__ "MatSOR_SeqBAIJ"
147 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
148 {
149   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
150   PetscScalar       *x,*work,*w,*workt,*t;
151   const MatScalar   *v,*aa = a->a, *idiag;
152   const PetscScalar *b,*xb;
153   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
154   PetscErrorCode    ierr;
155   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
156   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
157 
158   PetscFunctionBegin;
159   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
160   its = its*lits;
161   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
162   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
163   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
164   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
165   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
166 
167   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
168 
169   if (!m) PetscFunctionReturn(0);
170   diag  = a->diag;
171   idiag = a->idiag;
172   k    = PetscMax(A->rmap->n,A->cmap->n);
173   if (!a->mult_work) {
174     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
175   }
176   if (!a->sor_workt) {
177     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
178   }
179   if (!a->sor_work) {
180     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
181   }
182   work = a->mult_work;
183   t    = a->sor_workt;
184   w    = a->sor_work;
185 
186   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
187   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
188 
189   if (flag & SOR_ZERO_INITIAL_GUESS) {
190     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
191       switch (bs) {
192       case 1:
193         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
194         t[0] = b[0];
195         i2     = 1;
196         idiag += 1;
197         for (i=1; i<m; i++) {
198           v  = aa + ai[i];
199           vi = aj + ai[i];
200           nz = diag[i] - ai[i];
201           s[0] = b[i2];
202           for (j=0; j<nz; j++) {
203             xw[0] = x[vi[j]];
204             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
205           }
206           t[i2] = s[0];
207           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
208           x[i2]  = xw[0];
209           idiag += 1;
210           i2    += 1;
211         }
212         break;
213       case 2:
214         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
215         t[0] = b[0]; t[1] = b[1];
216         i2     = 2;
217         idiag += 4;
218         for (i=1; i<m; i++) {
219           v  = aa + 4*ai[i];
220           vi = aj + ai[i];
221           nz = diag[i] - ai[i];
222           s[0] = b[i2]; s[1] = b[i2+1];
223           for (j=0; j<nz; j++) {
224             idx = 2*vi[j];
225             it  = 4*j;
226             xw[0] = x[idx]; xw[1] = x[1+idx];
227             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
228           }
229           t[i2] = s[0]; t[i2+1] = s[1];
230           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
231           x[i2]   = xw[0]; x[i2+1] = xw[1];
232           idiag  += 4;
233           i2     += 2;
234         }
235         break;
236       case 3:
237         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
238         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
239         i2     = 3;
240         idiag += 9;
241         for (i=1; i<m; i++) {
242           v  = aa + 9*ai[i];
243           vi = aj + ai[i];
244           nz = diag[i] - ai[i];
245           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
246           while (nz--) {
247             idx = 3*(*vi++);
248             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
249             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
250             v  += 9;
251           }
252           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
253           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
254           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
255           idiag  += 9;
256           i2     += 3;
257         }
258         break;
259       case 4:
260         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
261         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
262         i2     = 4;
263         idiag += 16;
264         for (i=1; i<m; i++) {
265           v  = aa + 16*ai[i];
266           vi = aj + ai[i];
267           nz = diag[i] - ai[i];
268           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
269           while (nz--) {
270             idx = 4*(*vi++);
271             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
272             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
273             v  += 16;
274           }
275           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
276           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
277           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
278           idiag  += 16;
279           i2     += 4;
280         }
281         break;
282       case 5:
283         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
284         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
285         i2     = 5;
286         idiag += 25;
287         for (i=1; i<m; i++) {
288           v  = aa + 25*ai[i];
289           vi = aj + ai[i];
290           nz = diag[i] - ai[i];
291           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
292           while (nz--) {
293             idx = 5*(*vi++);
294             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
295             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
296             v  += 25;
297           }
298           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
299           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
300           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
301           idiag  += 25;
302           i2     += 5;
303         }
304         break;
305       case 6:
306         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
307         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
308         i2     = 6;
309         idiag += 36;
310         for (i=1; i<m; i++) {
311           v  = aa + 36*ai[i];
312           vi = aj + ai[i];
313           nz = diag[i] - ai[i];
314           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
315           while (nz--) {
316             idx = 6*(*vi++);
317             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
318             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
319             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
320             v  += 36;
321           }
322           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
323           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
324           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
325           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
326           idiag  += 36;
327           i2     += 6;
328         }
329         break;
330       case 7:
331         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
332         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
333         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
334         i2     = 7;
335         idiag += 49;
336         for (i=1; i<m; i++) {
337           v  = aa + 49*ai[i];
338           vi = aj + ai[i];
339           nz = diag[i] - ai[i];
340           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
341           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
342           while (nz--) {
343             idx = 7*(*vi++);
344             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
345             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
346             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
347             v  += 49;
348           }
349           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
350           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
351           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
352           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
353           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
354           idiag  += 49;
355           i2     += 7;
356         }
357         break;
358       default:
359         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
360         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
361         i2     = bs;
362         idiag += bs2;
363         for (i=1; i<m; i++) {
364           v  = aa + bs2*ai[i];
365           vi = aj + ai[i];
366           nz = diag[i] - ai[i];
367 
368           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
369           /* copy all rows of x that are needed into contiguous space */
370           workt = work;
371           for (j=0; j<nz; j++) {
372             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
373             workt += bs;
374           }
375           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
376           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
377           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
378 
379           idiag += bs2;
380           i2    += bs;
381         }
382         break;
383       }
384       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
385       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
386       xb = t;
387     }
388     else xb = b;
389     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
390       idiag = a->idiag+bs2*(a->mbs-1);
391       i2 = bs * (m-1);
392       switch (bs) {
393       case 1:
394         s[0]  = xb[i2];
395         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396         x[i2] = xw[0];
397         i2   -= 1;
398         for (i=m-2; i>=0; i--) {
399           v  = aa + (diag[i]+1);
400           vi = aj + diag[i] + 1;
401           nz = ai[i+1] - diag[i] - 1;
402           s[0] = xb[i2];
403           for (j=0; j<nz; j++) {
404             xw[0] = x[vi[j]];
405             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
406           }
407           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
408           x[i2]  = xw[0];
409           idiag -= 1;
410           i2    -= 1;
411         }
412         break;
413       case 2:
414         s[0]  = xb[i2]; s[1] = xb[i2+1];
415         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
416         x[i2] = xw[0]; x[i2+1] = xw[1];
417         i2    -= 2;
418         idiag -= 4;
419         for (i=m-2; i>=0; i--) {
420           v  = aa + 4*(diag[i] + 1);
421           vi = aj + diag[i] + 1;
422           nz = ai[i+1] - diag[i] - 1;
423           s[0] = xb[i2]; s[1] = xb[i2+1];
424           for (j=0; j<nz; j++) {
425             idx = 2*vi[j];
426             it  = 4*j;
427             xw[0] = x[idx]; xw[1] = x[1+idx];
428             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
429           }
430           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
431           x[i2]   = xw[0]; x[i2+1] = xw[1];
432           idiag  -= 4;
433           i2     -= 2;
434         }
435         break;
436       case 3:
437         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
438         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
439         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
440         i2    -= 3;
441         idiag -= 9;
442         for (i=m-2; i>=0; i--) {
443           v  = aa + 9*(diag[i]+1);
444           vi = aj + diag[i] + 1;
445           nz = ai[i+1] - diag[i] - 1;
446           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
447           while (nz--) {
448             idx = 3*(*vi++);
449             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
450             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
451             v  += 9;
452           }
453           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
454           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
455           idiag  -= 9;
456           i2     -= 3;
457         }
458         break;
459       case 4:
460         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
461         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
462         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
463         i2    -= 4;
464         idiag -= 16;
465         for (i=m-2; i>=0; i--) {
466           v  = aa + 16*(diag[i]+1);
467           vi = aj + diag[i] + 1;
468           nz = ai[i+1] - diag[i] - 1;
469           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
470           while (nz--) {
471             idx = 4*(*vi++);
472             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
473             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
474             v  += 16;
475           }
476           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
477           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
478           idiag  -= 16;
479           i2     -= 4;
480         }
481         break;
482       case 5:
483         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
484         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
485         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
486         i2    -= 5;
487         idiag -= 25;
488         for (i=m-2; i>=0; i--) {
489           v  = aa + 25*(diag[i]+1);
490           vi = aj + diag[i] + 1;
491           nz = ai[i+1] - diag[i] - 1;
492           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
493           while (nz--) {
494             idx = 5*(*vi++);
495             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
496             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
497             v  += 25;
498           }
499           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
500           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
501           idiag  -= 25;
502           i2     -= 5;
503         }
504         break;
505       case 6:
506         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
507         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
508         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
509         i2    -= 6;
510         idiag -= 36;
511         for (i=m-2; i>=0; i--) {
512           v  = aa + 36*(diag[i]+1);
513           vi = aj + diag[i] + 1;
514           nz = ai[i+1] - diag[i] - 1;
515           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
516           while (nz--) {
517             idx = 6*(*vi++);
518             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
519             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
520             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
521             v  += 36;
522           }
523           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
524           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
525           idiag  -= 36;
526           i2     -= 6;
527         }
528         break;
529       case 7:
530         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
531         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
532         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
533         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
534         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
535         i2    -= 7;
536         idiag -= 49;
537         for (i=m-2; i>=0; i--) {
538           v  = aa + 49*(diag[i]+1);
539           vi = aj + diag[i] + 1;
540           nz = ai[i+1] - diag[i] - 1;
541           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
542           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
543           while (nz--) {
544             idx = 7*(*vi++);
545             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
546             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
547             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
548             v  += 49;
549           }
550           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
551           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
552           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
553           idiag  -= 49;
554           i2     -= 7;
555         }
556         break;
557       default:
558         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
559         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
560         i2    -= bs;
561         idiag -= bs2;
562         for (i=m-2; i>=0; i--) {
563           v  = aa + bs2*(diag[i]+1);
564           vi = aj + diag[i] + 1;
565           nz = ai[i+1] - diag[i] - 1;
566 
567           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
568           /* copy all rows of x that are needed into contiguous space */
569           workt = work;
570           for (j=0; j<nz; j++) {
571             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
572             workt += bs;
573           }
574           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
575           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
576 
577           idiag -= bs2;
578           i2    -= bs;
579         }
580         break;
581       }
582       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
583     }
584     its--;
585   }
586   while (its--) {
587     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
588       idiag = a->idiag;
589       i2 = 0;
590       switch (bs) {
591       case 1:
592         for (i=0; i<m; i++) {
593           v  = aa + ai[i];
594           vi = aj + ai[i];
595           nz = ai[i+1] - ai[i];
596           s[0] = b[i2];
597           for (j=0; j<nz; j++) {
598             xw[0] = x[vi[j]];
599             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
600           }
601           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
602           x[i2] += xw[0];
603           idiag += 1;
604           i2    += 1;
605         }
606         break;
607       case 2:
608         for (i=0; i<m; i++) {
609           v  = aa + 4*ai[i];
610           vi = aj + ai[i];
611           nz = ai[i+1] - ai[i];
612           s[0] = b[i2]; s[1] = b[i2+1];
613           for (j=0; j<nz; j++) {
614             idx = 2*vi[j];
615             it  = 4*j;
616             xw[0] = x[idx]; xw[1] = x[1+idx];
617             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
618           }
619           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
620           x[i2]  += xw[0]; x[i2+1] += xw[1];
621           idiag  += 4;
622           i2     += 2;
623         }
624         break;
625       case 3:
626         for (i=0; i<m; i++) {
627           v  = aa + 9*ai[i];
628           vi = aj + ai[i];
629           nz = ai[i+1] - ai[i];
630           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
631           while (nz--) {
632             idx = 3*(*vi++);
633             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
634             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
635             v  += 9;
636           }
637           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
638           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
639           idiag  += 9;
640           i2     += 3;
641         }
642         break;
643       case 4:
644         for (i=0; i<m; i++) {
645           v  = aa + 16*ai[i];
646           vi = aj + ai[i];
647           nz = ai[i+1] - ai[i];
648           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
649           while (nz--) {
650             idx = 4*(*vi++);
651             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
652             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
653             v  += 16;
654           }
655           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
656           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
657           idiag  += 16;
658           i2     += 4;
659         }
660         break;
661       case 5:
662         for (i=0; i<m; i++) {
663           v  = aa + 25*ai[i];
664           vi = aj + ai[i];
665           nz = ai[i+1] - ai[i];
666           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
667           while (nz--) {
668             idx = 5*(*vi++);
669             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
670             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
671             v  += 25;
672           }
673           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
674           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
675           idiag  += 25;
676           i2     += 5;
677         }
678         break;
679       case 6:
680         for (i=0; i<m; i++) {
681           v  = aa + 36*ai[i];
682           vi = aj + ai[i];
683           nz = ai[i+1] - ai[i];
684           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
685           while (nz--) {
686             idx = 6*(*vi++);
687             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
688             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
689             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
690             v  += 36;
691           }
692           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
693           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
694           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
695           idiag  += 36;
696           i2     += 6;
697         }
698         break;
699       case 7:
700         for (i=0; i<m; i++) {
701           v  = aa + 49*ai[i];
702           vi = aj + ai[i];
703           nz = ai[i+1] - ai[i];
704           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
705           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
706           while (nz--) {
707             idx = 7*(*vi++);
708             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
709             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
710             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
711             v  += 49;
712           }
713           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
714           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
715           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
716           idiag  += 49;
717           i2     += 7;
718         }
719         break;
720       default:
721         for (i=0; i<m; i++) {
722           v  = aa + bs2*ai[i];
723           vi = aj + ai[i];
724           nz = ai[i+1] - ai[i];
725 
726           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
727           /* copy all rows of x that are needed into contiguous space */
728           workt = work;
729           for (j=0; j<nz; j++) {
730             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
731             workt += bs;
732           }
733           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
734           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
735 
736           idiag += bs2;
737           i2    += bs;
738         }
739         break;
740       }
741       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
742     }
743     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
744       idiag = a->idiag+bs2*(a->mbs-1);
745       i2 = bs * (m-1);
746       switch (bs) {
747       case 1:
748         for (i=m-1; i>=0; i--) {
749           v  = aa + ai[i];
750           vi = aj + ai[i];
751           nz = ai[i+1] - ai[i];
752           s[0] = b[i2];
753           for (j=0; j<nz; j++) {
754             xw[0] = x[vi[j]];
755             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
756           }
757           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
758           x[i2] += xw[0];
759           idiag -= 1;
760           i2    -= 1;
761         }
762         break;
763       case 2:
764         for (i=m-1; i>=0; i--) {
765           v  = aa + 4*ai[i];
766           vi = aj + ai[i];
767           nz = ai[i+1] - ai[i];
768           s[0] = b[i2]; s[1] = b[i2+1];
769           for (j=0; j<nz; j++) {
770             idx = 2*vi[j];
771             it  = 4*j;
772             xw[0] = x[idx]; xw[1] = x[1+idx];
773             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
774           }
775           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
776           x[i2]  += xw[0]; x[i2+1] += xw[1];
777           idiag  -= 4;
778           i2     -= 2;
779         }
780         break;
781       case 3:
782         for (i=m-1; i>=0; i--) {
783           v  = aa + 9*ai[i];
784           vi = aj + ai[i];
785           nz = ai[i+1] - ai[i];
786           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
787           while (nz--) {
788             idx = 3*(*vi++);
789             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
790             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
791             v  += 9;
792           }
793           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
794           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
795           idiag  -= 9;
796           i2     -= 3;
797         }
798         break;
799       case 4:
800         for (i=m-1; i>=0; i--) {
801           v  = aa + 16*ai[i];
802           vi = aj + ai[i];
803           nz = ai[i+1] - ai[i];
804           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
805           while (nz--) {
806             idx = 4*(*vi++);
807             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
808             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
809             v  += 16;
810           }
811           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
812           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
813           idiag  -= 16;
814           i2     -= 4;
815         }
816         break;
817       case 5:
818         for (i=m-1; i>=0; i--) {
819           v  = aa + 25*ai[i];
820           vi = aj + ai[i];
821           nz = ai[i+1] - ai[i];
822           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
823           while (nz--) {
824             idx = 5*(*vi++);
825             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
826             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
827             v  += 25;
828           }
829           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
830           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
831           idiag  -= 25;
832           i2     -= 5;
833         }
834         break;
835       case 6:
836         for (i=m-1; i>=0; i--) {
837           v  = aa + 36*ai[i];
838           vi = aj + ai[i];
839           nz = ai[i+1] - ai[i];
840           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
841           while (nz--) {
842             idx = 6*(*vi++);
843             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
844             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
845             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
846             v  += 36;
847           }
848           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
849           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
850           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
851           idiag  -= 36;
852           i2     -= 6;
853         }
854         break;
855       case 7:
856         for (i=m-1; i>=0; i--) {
857           v  = aa + 49*ai[i];
858           vi = aj + ai[i];
859           nz = ai[i+1] - ai[i];
860           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
861           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
862           while (nz--) {
863             idx = 7*(*vi++);
864             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
865             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
866             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
867             v  += 49;
868           }
869           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
870           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
871           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
872           idiag  -= 49;
873           i2     -= 7;
874         }
875         break;
876       default:
877         for (i=m-1; i>=0; i--) {
878           v  = aa + bs2*ai[i];
879           vi = aj + ai[i];
880           nz = ai[i+1] - ai[i];
881 
882           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
883           /* copy all rows of x that are needed into contiguous space */
884           workt = work;
885           for (j=0; j<nz; j++) {
886             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
887             workt += bs;
888           }
889           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
890           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
891 
892           idiag -= bs2;
893           i2    -= bs;
894         }
895         break;
896       }
897       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
898     }
899   }
900   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
901   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
902   PetscFunctionReturn(0);
903 }
904 
905 
906 /*
907     Special version for direct calls from Fortran (Used in PETSc-fun3d)
908 */
909 #if defined(PETSC_HAVE_FORTRAN_CAPS)
910 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
911 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
912 #define matsetvaluesblocked4_ matsetvaluesblocked4
913 #endif
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "matsetvaluesblocked4_"
917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
918 {
919   Mat               A  = *AA;
920   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
921   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
922   PetscInt          *ai    =a->i,*ailen=a->ilen;
923   PetscInt          *aj    =a->j,stepval,lastcol = -1;
924   const PetscScalar *value = v;
925   MatScalar         *ap,*aa = a->a,*bap;
926 
927   PetscFunctionBegin;
928   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
929   stepval = (n-1)*4;
930   for (k=0; k<m; k++) { /* loop over added rows */
931     row  = im[k];
932     rp   = aj + ai[row];
933     ap   = aa + 16*ai[row];
934     nrow = ailen[row];
935     low  = 0;
936     high = nrow;
937     for (l=0; l<n; l++) { /* loop over added columns */
938       col = in[l];
939       if (col <= lastcol)  low = 0;
940       else                high = nrow;
941       lastcol = col;
942       value   = v + k*(stepval+4 + l)*4;
943       while (high-low > 7) {
944         t = (low+high)/2;
945         if (rp[t] > col) high = t;
946         else             low  = t;
947       }
948       for (i=low; i<high; i++) {
949         if (rp[i] > col) break;
950         if (rp[i] == col) {
951           bap = ap +  16*i;
952           for (ii=0; ii<4; ii++,value+=stepval) {
953             for (jj=ii; jj<16; jj+=4) {
954               bap[jj] += *value++;
955             }
956           }
957           goto noinsert2;
958         }
959       }
960       N = nrow++ - 1;
961       high++; /* added new column index thus must search to one higher than before */
962       /* shift up all the later entries in this row */
963       for (ii=N; ii>=i; ii--) {
964         rp[ii+1] = rp[ii];
965         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
966       }
967       if (N >= i) {
968         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
969       }
970       rp[i] = col;
971       bap   = ap +  16*i;
972       for (ii=0; ii<4; ii++,value+=stepval) {
973         for (jj=ii; jj<16; jj+=4) {
974           bap[jj] = *value++;
975         }
976       }
977       noinsert2:;
978       low = i;
979     }
980     ailen[row] = nrow;
981   }
982   PetscFunctionReturnVoid();
983 }
984 
985 #if defined(PETSC_HAVE_FORTRAN_CAPS)
986 #define matsetvalues4_ MATSETVALUES4
987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
988 #define matsetvalues4_ matsetvalues4
989 #endif
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatSetValues4_"
993 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
994 {
995   Mat         A  = *AA;
996   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
997   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
998   PetscInt    *ai=a->i,*ailen=a->ilen;
999   PetscInt    *aj=a->j,brow,bcol;
1000   PetscInt    ridx,cidx,lastcol = -1;
1001   MatScalar   *ap,value,*aa=a->a,*bap;
1002 
1003   PetscFunctionBegin;
1004   for (k=0; k<m; k++) { /* loop over added rows */
1005     row  = im[k]; brow = row/4;
1006     rp   = aj + ai[brow];
1007     ap   = aa + 16*ai[brow];
1008     nrow = ailen[brow];
1009     low  = 0;
1010     high = nrow;
1011     for (l=0; l<n; l++) { /* loop over added columns */
1012       col   = in[l]; bcol = col/4;
1013       ridx  = row % 4; cidx = col % 4;
1014       value = v[l + k*n];
1015       if (col <= lastcol)  low = 0;
1016       else                high = nrow;
1017       lastcol = col;
1018       while (high-low > 7) {
1019         t = (low+high)/2;
1020         if (rp[t] > bcol) high = t;
1021         else              low  = t;
1022       }
1023       for (i=low; i<high; i++) {
1024         if (rp[i] > bcol) break;
1025         if (rp[i] == bcol) {
1026           bap   = ap +  16*i + 4*cidx + ridx;
1027           *bap += value;
1028           goto noinsert1;
1029         }
1030       }
1031       N = nrow++ - 1;
1032       high++; /* added new column thus must search to one higher than before */
1033       /* shift up all the later entries in this row */
1034       for (ii=N; ii>=i; ii--) {
1035         rp[ii+1] = rp[ii];
1036         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1037       }
1038       if (N>=i) {
1039         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1040       }
1041       rp[i]                    = bcol;
1042       ap[16*i + 4*cidx + ridx] = value;
1043 noinsert1:;
1044       low = i;
1045     }
1046     ailen[brow] = nrow;
1047   }
1048   PetscFunctionReturnVoid();
1049 }
1050 
1051 /*
1052      Checks for missing diagonals
1053 */
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1056 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1057 {
1058   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1059   PetscErrorCode ierr;
1060   PetscInt       *diag,*ii = a->i,i;
1061 
1062   PetscFunctionBegin;
1063   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1064   *missing = PETSC_FALSE;
1065   if (A->rmap->n > 0 && !ii) {
1066     *missing = PETSC_TRUE;
1067     if (d) *d = 0;
1068     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1069   } else {
1070     diag = a->diag;
1071     for (i=0; i<a->mbs; i++) {
1072       if (diag[i] >= ii[i+1]) {
1073         *missing = PETSC_TRUE;
1074         if (d) *d = i;
1075         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1076         break;
1077       }
1078     }
1079   }
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1085 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1086 {
1087   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1088   PetscErrorCode ierr;
1089   PetscInt       i,j,m = a->mbs;
1090 
1091   PetscFunctionBegin;
1092   if (!a->diag) {
1093     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1094     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1095     a->free_diag = PETSC_TRUE;
1096   }
1097   for (i=0; i<m; i++) {
1098     a->diag[i] = a->i[i+1];
1099     for (j=a->i[i]; j<a->i[i+1]; j++) {
1100       if (a->j[j] == i) {
1101         a->diag[i] = j;
1102         break;
1103       }
1104     }
1105   }
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1112 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1113 {
1114   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1115   PetscErrorCode ierr;
1116   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1117   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1118 
1119   PetscFunctionBegin;
1120   *nn = n;
1121   if (!ia) PetscFunctionReturn(0);
1122   if (symmetric) {
1123     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1124     nz   = tia[n];
1125   } else {
1126     tia = a->i; tja = a->j;
1127   }
1128 
1129   if (!blockcompressed && bs > 1) {
1130     (*nn) *= bs;
1131     /* malloc & create the natural set of indices */
1132     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1133     if (n) {
1134       (*ia)[0] = 0;
1135       for (j=1; j<bs; j++) {
1136         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1137       }
1138     }
1139 
1140     for (i=1; i<n; i++) {
1141       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1142       for (j=1; j<bs; j++) {
1143         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1144       }
1145     }
1146     if (n) {
1147       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1148     }
1149 
1150     if (inja) {
1151       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1152       cnt = 0;
1153       for (i=0; i<n; i++) {
1154         for (j=0; j<bs; j++) {
1155           for (k=tia[i]; k<tia[i+1]; k++) {
1156             for (l=0; l<bs; l++) {
1157               (*ja)[cnt++] = bs*tja[k] + l;
1158             }
1159           }
1160         }
1161       }
1162     }
1163 
1164     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1165       ierr = PetscFree(tia);CHKERRQ(ierr);
1166       ierr = PetscFree(tja);CHKERRQ(ierr);
1167     }
1168   } else if (oshift == 1) {
1169     if (symmetric) {
1170       nz = tia[A->rmap->n/bs];
1171       /*  add 1 to i and j indices */
1172       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1173       *ia = tia;
1174       if (ja) {
1175         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1176         *ja = tja;
1177       }
1178     } else {
1179       nz = a->i[A->rmap->n/bs];
1180       /* malloc space and  add 1 to i and j indices */
1181       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1182       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1183       if (ja) {
1184         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1185         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1186       }
1187     }
1188   } else {
1189     *ia = tia;
1190     if (ja) *ja = tja;
1191   }
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1197 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1198 {
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   if (!ia) PetscFunctionReturn(0);
1203   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1204     ierr = PetscFree(*ia);CHKERRQ(ierr);
1205     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1206   }
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1212 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1213 {
1214   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218 #if defined(PETSC_USE_LOG)
1219   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1220 #endif
1221   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1222   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1223   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1224   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1225   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1226   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1227   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1228   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1229   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1230   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1231   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1232   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1233   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1234 
1235   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1236   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1237   ierr = PetscFree(A->data);CHKERRQ(ierr);
1238 
1239   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1255 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1256 {
1257   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   switch (op) {
1262   case MAT_ROW_ORIENTED:
1263     a->roworiented = flg;
1264     break;
1265   case MAT_KEEP_NONZERO_PATTERN:
1266     a->keepnonzeropattern = flg;
1267     break;
1268   case MAT_NEW_NONZERO_LOCATIONS:
1269     a->nonew = (flg ? 0 : 1);
1270     break;
1271   case MAT_NEW_NONZERO_LOCATION_ERR:
1272     a->nonew = (flg ? -1 : 0);
1273     break;
1274   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1275     a->nonew = (flg ? -2 : 0);
1276     break;
1277   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1278     a->nounused = (flg ? -1 : 0);
1279     break;
1280   case MAT_NEW_DIAGONALS:
1281   case MAT_IGNORE_OFF_PROC_ENTRIES:
1282   case MAT_USE_HASH_TABLE:
1283     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1284     break;
1285   case MAT_SPD:
1286   case MAT_SYMMETRIC:
1287   case MAT_STRUCTURALLY_SYMMETRIC:
1288   case MAT_HERMITIAN:
1289   case MAT_SYMMETRY_ETERNAL:
1290     /* These options are handled directly by MatSetOption() */
1291     break;
1292   default:
1293     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1294   }
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1301 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1302 {
1303   PetscErrorCode ierr;
1304   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1305   MatScalar      *aa_i;
1306   PetscScalar    *v_i;
1307 
1308   PetscFunctionBegin;
1309   bs  = A->rmap->bs;
1310   bs2 = bs*bs;
1311   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1312 
1313   bn  = row/bs;   /* Block number */
1314   bp  = row % bs; /* Block Position */
1315   M   = ai[bn+1] - ai[bn];
1316   *nz = bs*M;
1317 
1318   if (v) {
1319     *v = 0;
1320     if (*nz) {
1321       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1322       for (i=0; i<M; i++) { /* for each block in the block row */
1323         v_i  = *v + i*bs;
1324         aa_i = aa + bs2*(ai[bn] + i);
1325         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1326       }
1327     }
1328   }
1329 
1330   if (idx) {
1331     *idx = 0;
1332     if (*nz) {
1333       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1334       for (i=0; i<M; i++) { /* for each block in the block row */
1335         idx_i = *idx + i*bs;
1336         itmp  = bs*aj[ai[bn] + i];
1337         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1338       }
1339     }
1340   }
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1346 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1347 {
1348   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1349   PetscErrorCode ierr;
1350 
1351   PetscFunctionBegin;
1352   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1358 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1359 {
1360   PetscErrorCode ierr;
1361 
1362   PetscFunctionBegin;
1363   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1364   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1372 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1373 {
1374   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1375   Mat            C;
1376   PetscErrorCode ierr;
1377   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1378   PetscInt       *rows,*cols,bs2=a->bs2;
1379   MatScalar      *array;
1380 
1381   PetscFunctionBegin;
1382   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1383   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1384     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1385 
1386     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1387     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1388     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1389     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1390     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1391     ierr = PetscFree(col);CHKERRQ(ierr);
1392   } else {
1393     C = *B;
1394   }
1395 
1396   array = a->a;
1397   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1398   for (i=0; i<mbs; i++) {
1399     cols[0] = i*bs;
1400     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1401     len = ai[i+1] - ai[i];
1402     for (j=0; j<len; j++) {
1403       rows[0] = (*aj++)*bs;
1404       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1405       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1406       array += bs2;
1407     }
1408   }
1409   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1410 
1411   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1412   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1413 
1414   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1415     *B = C;
1416   } else {
1417     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1418   }
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1424 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1425 {
1426   PetscErrorCode ierr;
1427   Mat            Btrans;
1428 
1429   PetscFunctionBegin;
1430   *f   = PETSC_FALSE;
1431   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1432   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1433   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1439 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1440 {
1441   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1442   PetscErrorCode ierr;
1443   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1444   int            fd;
1445   PetscScalar    *aa;
1446   FILE           *file;
1447 
1448   PetscFunctionBegin;
1449   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1450   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1451   col_lens[0] = MAT_FILE_CLASSID;
1452 
1453   col_lens[1] = A->rmap->N;
1454   col_lens[2] = A->cmap->n;
1455   col_lens[3] = a->nz*bs2;
1456 
1457   /* store lengths of each row and write (including header) to file */
1458   count = 0;
1459   for (i=0; i<a->mbs; i++) {
1460     for (j=0; j<bs; j++) {
1461       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1462     }
1463   }
1464   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1465   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1466 
1467   /* store column indices (zero start index) */
1468   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1469   count = 0;
1470   for (i=0; i<a->mbs; i++) {
1471     for (j=0; j<bs; j++) {
1472       for (k=a->i[i]; k<a->i[i+1]; k++) {
1473         for (l=0; l<bs; l++) {
1474           jj[count++] = bs*a->j[k] + l;
1475         }
1476       }
1477     }
1478   }
1479   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1480   ierr = PetscFree(jj);CHKERRQ(ierr);
1481 
1482   /* store nonzero values */
1483   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1484   count = 0;
1485   for (i=0; i<a->mbs; i++) {
1486     for (j=0; j<bs; j++) {
1487       for (k=a->i[i]; k<a->i[i+1]; k++) {
1488         for (l=0; l<bs; l++) {
1489           aa[count++] = a->a[bs2*k + l*bs + j];
1490         }
1491       }
1492     }
1493   }
1494   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1495   ierr = PetscFree(aa);CHKERRQ(ierr);
1496 
1497   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1498   if (file) {
1499     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1506 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1507 {
1508   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1509   PetscErrorCode    ierr;
1510   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1511   PetscViewerFormat format;
1512 
1513   PetscFunctionBegin;
1514   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1515   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1516     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1517   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1518     const char *matname;
1519     Mat        aij;
1520     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1521     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1522     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1523     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1524     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1525   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1526       PetscFunctionReturn(0);
1527   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1528     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1529     for (i=0; i<a->mbs; i++) {
1530       for (j=0; j<bs; j++) {
1531         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1532         for (k=a->i[i]; k<a->i[i+1]; k++) {
1533           for (l=0; l<bs; l++) {
1534 #if defined(PETSC_USE_COMPLEX)
1535             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1536               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1537                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1538             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1539               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1540                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1541             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1542               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1543             }
1544 #else
1545             if (a->a[bs2*k + l*bs + j] != 0.0) {
1546               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1547             }
1548 #endif
1549           }
1550         }
1551         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1552       }
1553     }
1554     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1555   } else {
1556     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1557     for (i=0; i<a->mbs; i++) {
1558       for (j=0; j<bs; j++) {
1559         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1560         for (k=a->i[i]; k<a->i[i+1]; k++) {
1561           for (l=0; l<bs; l++) {
1562 #if defined(PETSC_USE_COMPLEX)
1563             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1564               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1565                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1566             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1568                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             } else {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1571             }
1572 #else
1573             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1574 #endif
1575           }
1576         }
1577         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1578       }
1579     }
1580     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1581   }
1582   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #include <petscdraw.h>
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1589 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1590 {
1591   Mat               A = (Mat) Aa;
1592   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1593   PetscErrorCode    ierr;
1594   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1595   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1596   MatScalar         *aa;
1597   PetscViewer       viewer;
1598   PetscViewerFormat format;
1599 
1600   PetscFunctionBegin;
1601   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1602   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1603 
1604   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1605 
1606   /* loop over matrix elements drawing boxes */
1607 
1608   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1609     color = PETSC_DRAW_BLUE;
1610     for (i=0,row=0; i<mbs; i++,row+=bs) {
1611       for (j=a->i[i]; j<a->i[i+1]; j++) {
1612         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1613         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1614         aa  = a->a + j*bs2;
1615         for (k=0; k<bs; k++) {
1616           for (l=0; l<bs; l++) {
1617             if (PetscRealPart(*aa++) >=  0.) continue;
1618             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1619           }
1620         }
1621       }
1622     }
1623     color = PETSC_DRAW_CYAN;
1624     for (i=0,row=0; i<mbs; i++,row+=bs) {
1625       for (j=a->i[i]; j<a->i[i+1]; j++) {
1626         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1627         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1628         aa  = a->a + j*bs2;
1629         for (k=0; k<bs; k++) {
1630           for (l=0; l<bs; l++) {
1631             if (PetscRealPart(*aa++) != 0.) continue;
1632             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1633           }
1634         }
1635       }
1636     }
1637     color = PETSC_DRAW_RED;
1638     for (i=0,row=0; i<mbs; i++,row+=bs) {
1639       for (j=a->i[i]; j<a->i[i+1]; j++) {
1640         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1641         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1642         aa  = a->a + j*bs2;
1643         for (k=0; k<bs; k++) {
1644           for (l=0; l<bs; l++) {
1645             if (PetscRealPart(*aa++) <= 0.) continue;
1646             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1647           }
1648         }
1649       }
1650     }
1651   } else {
1652     /* use contour shading to indicate magnitude of values */
1653     /* first determine max of all nonzero values */
1654     PetscDraw popup;
1655     PetscReal scale,maxv = 0.0;
1656 
1657     for (i=0; i<a->nz*a->bs2; i++) {
1658       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1659     }
1660     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1661     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1662     if (popup) {
1663       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1664     }
1665     for (i=0,row=0; i<mbs; i++,row+=bs) {
1666       for (j=a->i[i]; j<a->i[i+1]; j++) {
1667         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1668         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1669         aa  = a->a + j*bs2;
1670         for (k=0; k<bs; k++) {
1671           for (l=0; l<bs; l++) {
1672             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1673             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1674           }
1675         }
1676       }
1677     }
1678   }
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1684 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1685 {
1686   PetscErrorCode ierr;
1687   PetscReal      xl,yl,xr,yr,w,h;
1688   PetscDraw      draw;
1689   PetscBool      isnull;
1690 
1691   PetscFunctionBegin;
1692   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1693   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1694 
1695   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1696   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1697   xr  += w;    yr += h;  xl = -w;     yl = -h;
1698   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1699   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1700   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatView_SeqBAIJ"
1706 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1707 {
1708   PetscErrorCode ierr;
1709   PetscBool      iascii,isbinary,isdraw;
1710 
1711   PetscFunctionBegin;
1712   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1713   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1714   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1715   if (iascii) {
1716     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1717   } else if (isbinary) {
1718     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1719   } else if (isdraw) {
1720     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1721   } else {
1722     Mat B;
1723     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1724     ierr = MatView(B,viewer);CHKERRQ(ierr);
1725     ierr = MatDestroy(&B);CHKERRQ(ierr);
1726   }
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1733 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1734 {
1735   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1736   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1737   PetscInt    *ai = a->i,*ailen = a->ilen;
1738   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1739   MatScalar   *ap,*aa = a->a;
1740 
1741   PetscFunctionBegin;
1742   for (k=0; k<m; k++) { /* loop over rows */
1743     row = im[k]; brow = row/bs;
1744     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1745     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1746     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1747     nrow = ailen[brow];
1748     for (l=0; l<n; l++) { /* loop over columns */
1749       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1750       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1751       col  = in[l];
1752       bcol = col/bs;
1753       cidx = col%bs;
1754       ridx = row%bs;
1755       high = nrow;
1756       low  = 0; /* assume unsorted */
1757       while (high-low > 5) {
1758         t = (low+high)/2;
1759         if (rp[t] > bcol) high = t;
1760         else             low  = t;
1761       }
1762       for (i=low; i<high; i++) {
1763         if (rp[i] > bcol) break;
1764         if (rp[i] == bcol) {
1765           *v++ = ap[bs2*i+bs*cidx+ridx];
1766           goto finished;
1767         }
1768       }
1769       *v++ = 0.0;
1770 finished:;
1771     }
1772   }
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1778 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1779 {
1780   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1781   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1782   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1783   PetscErrorCode    ierr;
1784   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1785   PetscBool         roworiented=a->roworiented;
1786   const PetscScalar *value     = v;
1787   MatScalar         *ap,*aa = a->a,*bap;
1788 
1789   PetscFunctionBegin;
1790   if (roworiented) {
1791     stepval = (n-1)*bs;
1792   } else {
1793     stepval = (m-1)*bs;
1794   }
1795   for (k=0; k<m; k++) { /* loop over added rows */
1796     row = im[k];
1797     if (row < 0) continue;
1798 #if defined(PETSC_USE_DEBUG)
1799     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1800 #endif
1801     rp   = aj + ai[row];
1802     ap   = aa + bs2*ai[row];
1803     rmax = imax[row];
1804     nrow = ailen[row];
1805     low  = 0;
1806     high = nrow;
1807     for (l=0; l<n; l++) { /* loop over added columns */
1808       if (in[l] < 0) continue;
1809 #if defined(PETSC_USE_DEBUG)
1810       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1811 #endif
1812       col = in[l];
1813       if (roworiented) {
1814         value = v + (k*(stepval+bs) + l)*bs;
1815       } else {
1816         value = v + (l*(stepval+bs) + k)*bs;
1817       }
1818       if (col <= lastcol) low = 0;
1819       else high = nrow;
1820       lastcol = col;
1821       while (high-low > 7) {
1822         t = (low+high)/2;
1823         if (rp[t] > col) high = t;
1824         else             low  = t;
1825       }
1826       for (i=low; i<high; i++) {
1827         if (rp[i] > col) break;
1828         if (rp[i] == col) {
1829           bap = ap +  bs2*i;
1830           if (roworiented) {
1831             if (is == ADD_VALUES) {
1832               for (ii=0; ii<bs; ii++,value+=stepval) {
1833                 for (jj=ii; jj<bs2; jj+=bs) {
1834                   bap[jj] += *value++;
1835                 }
1836               }
1837             } else {
1838               for (ii=0; ii<bs; ii++,value+=stepval) {
1839                 for (jj=ii; jj<bs2; jj+=bs) {
1840                   bap[jj] = *value++;
1841                 }
1842               }
1843             }
1844           } else {
1845             if (is == ADD_VALUES) {
1846               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1847                 for (jj=0; jj<bs; jj++) {
1848                   bap[jj] += value[jj];
1849                 }
1850                 bap += bs;
1851               }
1852             } else {
1853               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1854                 for (jj=0; jj<bs; jj++) {
1855                   bap[jj]  = value[jj];
1856                 }
1857                 bap += bs;
1858               }
1859             }
1860           }
1861           goto noinsert2;
1862         }
1863       }
1864       if (nonew == 1) goto noinsert2;
1865       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1866       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1867       N = nrow++ - 1; high++;
1868       /* shift up all the later entries in this row */
1869       for (ii=N; ii>=i; ii--) {
1870         rp[ii+1] = rp[ii];
1871         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1872       }
1873       if (N >= i) {
1874         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1875       }
1876       rp[i] = col;
1877       bap   = ap +  bs2*i;
1878       if (roworiented) {
1879         for (ii=0; ii<bs; ii++,value+=stepval) {
1880           for (jj=ii; jj<bs2; jj+=bs) {
1881             bap[jj] = *value++;
1882           }
1883         }
1884       } else {
1885         for (ii=0; ii<bs; ii++,value+=stepval) {
1886           for (jj=0; jj<bs; jj++) {
1887             *bap++ = *value++;
1888           }
1889         }
1890       }
1891 noinsert2:;
1892       low = i;
1893     }
1894     ailen[row] = nrow;
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1901 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1902 {
1903   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1904   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1905   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1906   PetscErrorCode ierr;
1907   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1908   MatScalar      *aa  = a->a,*ap;
1909   PetscReal      ratio=0.6;
1910 
1911   PetscFunctionBegin;
1912   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1913 
1914   if (m) rmax = ailen[0];
1915   for (i=1; i<mbs; i++) {
1916     /* move each row back by the amount of empty slots (fshift) before it*/
1917     fshift += imax[i-1] - ailen[i-1];
1918     rmax    = PetscMax(rmax,ailen[i]);
1919     if (fshift) {
1920       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1921       N  = ailen[i];
1922       for (j=0; j<N; j++) {
1923         ip[j-fshift] = ip[j];
1924 
1925         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1926       }
1927     }
1928     ai[i] = ai[i-1] + ailen[i-1];
1929   }
1930   if (mbs) {
1931     fshift += imax[mbs-1] - ailen[mbs-1];
1932     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1933   }
1934 
1935   /* reset ilen and imax for each row */
1936   a->nonzerorowcnt = 0;
1937   for (i=0; i<mbs; i++) {
1938     ailen[i] = imax[i] = ai[i+1] - ai[i];
1939     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1940   }
1941   a->nz = ai[mbs];
1942 
1943   /* diagonals may have moved, so kill the diagonal pointers */
1944   a->idiagvalid = PETSC_FALSE;
1945   if (fshift && a->diag) {
1946     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1947     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1948     a->diag = 0;
1949   }
1950   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1951   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1952   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1953   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1954 
1955   A->info.mallocs    += a->reallocs;
1956   a->reallocs         = 0;
1957   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1958   a->rmax             = rmax;
1959 
1960   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 /*
1965    This function returns an array of flags which indicate the locations of contiguous
1966    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1967    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1968    Assume: sizes should be long enough to hold all the values.
1969 */
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1972 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1973 {
1974   PetscInt  i,j,k,row;
1975   PetscBool flg;
1976 
1977   PetscFunctionBegin;
1978   for (i=0,j=0; i<n; j++) {
1979     row = idx[i];
1980     if (row%bs!=0) { /* Not the begining of a block */
1981       sizes[j] = 1;
1982       i++;
1983     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1984       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1985       i++;
1986     } else { /* Begining of the block, so check if the complete block exists */
1987       flg = PETSC_TRUE;
1988       for (k=1; k<bs; k++) {
1989         if (row+k != idx[i+k]) { /* break in the block */
1990           flg = PETSC_FALSE;
1991           break;
1992         }
1993       }
1994       if (flg) { /* No break in the bs */
1995         sizes[j] = bs;
1996         i       += bs;
1997       } else {
1998         sizes[j] = 1;
1999         i++;
2000       }
2001     }
2002   }
2003   *bs_max = j;
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2009 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2010 {
2011   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2012   PetscErrorCode    ierr;
2013   PetscInt          i,j,k,count,*rows;
2014   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2015   PetscScalar       zero = 0.0;
2016   MatScalar         *aa;
2017   const PetscScalar *xx;
2018   PetscScalar       *bb;
2019 
2020   PetscFunctionBegin;
2021   /* fix right hand side if needed */
2022   if (x && b) {
2023     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2024     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2025     for (i=0; i<is_n; i++) {
2026       bb[is_idx[i]] = diag*xx[is_idx[i]];
2027     }
2028     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2029     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2030   }
2031 
2032   /* Make a copy of the IS and  sort it */
2033   /* allocate memory for rows,sizes */
2034   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2035 
2036   /* copy IS values to rows, and sort them */
2037   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2038   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2039 
2040   if (baij->keepnonzeropattern) {
2041     for (i=0; i<is_n; i++) sizes[i] = 1;
2042     bs_max          = is_n;
2043   } else {
2044     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2045     A->nonzerostate++;
2046   }
2047 
2048   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2049     row = rows[j];
2050     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2051     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2052     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2053     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2054       if (diag != (PetscScalar)0.0) {
2055         if (baij->ilen[row/bs] > 0) {
2056           baij->ilen[row/bs]       = 1;
2057           baij->j[baij->i[row/bs]] = row/bs;
2058 
2059           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2060         }
2061         /* Now insert all the diagonal values for this bs */
2062         for (k=0; k<bs; k++) {
2063           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2064         }
2065       } else { /* (diag == 0.0) */
2066         baij->ilen[row/bs] = 0;
2067       } /* end (diag == 0.0) */
2068     } else { /* (sizes[i] != bs) */
2069 #if defined(PETSC_USE_DEBUG)
2070       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2071 #endif
2072       for (k=0; k<count; k++) {
2073         aa[0] =  zero;
2074         aa   += bs;
2075       }
2076       if (diag != (PetscScalar)0.0) {
2077         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2078       }
2079     }
2080   }
2081 
2082   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2083   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2089 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2090 {
2091   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2092   PetscErrorCode    ierr;
2093   PetscInt          i,j,k,count;
2094   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2095   PetscScalar       zero = 0.0;
2096   MatScalar         *aa;
2097   const PetscScalar *xx;
2098   PetscScalar       *bb;
2099   PetscBool         *zeroed,vecs = PETSC_FALSE;
2100 
2101   PetscFunctionBegin;
2102   /* fix right hand side if needed */
2103   if (x && b) {
2104     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2105     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2106     vecs = PETSC_TRUE;
2107   }
2108 
2109   /* zero the columns */
2110   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2111   for (i=0; i<is_n; i++) {
2112     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2113     zeroed[is_idx[i]] = PETSC_TRUE;
2114   }
2115   for (i=0; i<A->rmap->N; i++) {
2116     if (!zeroed[i]) {
2117       row = i/bs;
2118       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2119         for (k=0; k<bs; k++) {
2120           col = bs*baij->j[j] + k;
2121           if (zeroed[col]) {
2122             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2123             if (vecs) bb[i] -= aa[0]*xx[col];
2124             aa[0] = 0.0;
2125           }
2126         }
2127       }
2128     } else if (vecs) bb[i] = diag*xx[i];
2129   }
2130   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2131   if (vecs) {
2132     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2133     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2134   }
2135 
2136   /* zero the rows */
2137   for (i=0; i<is_n; i++) {
2138     row   = is_idx[i];
2139     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2140     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2141     for (k=0; k<count; k++) {
2142       aa[0] =  zero;
2143       aa   += bs;
2144     }
2145     if (diag != (PetscScalar)0.0) {
2146       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2147     }
2148   }
2149   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2155 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2156 {
2157   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2158   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2159   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2160   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2161   PetscErrorCode ierr;
2162   PetscInt       ridx,cidx,bs2=a->bs2;
2163   PetscBool      roworiented=a->roworiented;
2164   MatScalar      *ap,value,*aa=a->a,*bap;
2165 
2166   PetscFunctionBegin;
2167   for (k=0; k<m; k++) { /* loop over added rows */
2168     row  = im[k];
2169     brow = row/bs;
2170     if (row < 0) continue;
2171 #if defined(PETSC_USE_DEBUG)
2172     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2173 #endif
2174     rp   = aj + ai[brow];
2175     ap   = aa + bs2*ai[brow];
2176     rmax = imax[brow];
2177     nrow = ailen[brow];
2178     low  = 0;
2179     high = nrow;
2180     for (l=0; l<n; l++) { /* loop over added columns */
2181       if (in[l] < 0) continue;
2182 #if defined(PETSC_USE_DEBUG)
2183       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2184 #endif
2185       col  = in[l]; bcol = col/bs;
2186       ridx = row % bs; cidx = col % bs;
2187       if (roworiented) {
2188         value = v[l + k*n];
2189       } else {
2190         value = v[k + l*m];
2191       }
2192       if (col <= lastcol) low = 0; else high = nrow;
2193       lastcol = col;
2194       while (high-low > 7) {
2195         t = (low+high)/2;
2196         if (rp[t] > bcol) high = t;
2197         else              low  = t;
2198       }
2199       for (i=low; i<high; i++) {
2200         if (rp[i] > bcol) break;
2201         if (rp[i] == bcol) {
2202           bap = ap +  bs2*i + bs*cidx + ridx;
2203           if (is == ADD_VALUES) *bap += value;
2204           else                  *bap  = value;
2205           goto noinsert1;
2206         }
2207       }
2208       if (nonew == 1) goto noinsert1;
2209       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2210       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2211       N = nrow++ - 1; high++;
2212       /* shift up all the later entries in this row */
2213       for (ii=N; ii>=i; ii--) {
2214         rp[ii+1] = rp[ii];
2215         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2216       }
2217       if (N>=i) {
2218         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2219       }
2220       rp[i]                      = bcol;
2221       ap[bs2*i + bs*cidx + ridx] = value;
2222       a->nz++;
2223       A->nonzerostate++;
2224 noinsert1:;
2225       low = i;
2226     }
2227     ailen[brow] = nrow;
2228   }
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2234 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2235 {
2236   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2237   Mat            outA;
2238   PetscErrorCode ierr;
2239   PetscBool      row_identity,col_identity;
2240 
2241   PetscFunctionBegin;
2242   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2243   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2244   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2245   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2246 
2247   outA            = inA;
2248   inA->factortype = MAT_FACTOR_LU;
2249 
2250   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2251 
2252   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2253   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2254   a->row = row;
2255   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2256   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2257   a->col = col;
2258 
2259   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2260   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2261   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2262   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2263 
2264   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2265   if (!a->solve_work) {
2266     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2267     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2268   }
2269   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2275 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2276 {
2277   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2278   PetscInt    i,nz,mbs;
2279 
2280   PetscFunctionBegin;
2281   nz  = baij->maxnz;
2282   mbs = baij->mbs;
2283   for (i=0; i<nz; i++) {
2284     baij->j[i] = indices[i];
2285   }
2286   baij->nz = nz;
2287   for (i=0; i<mbs; i++) {
2288     baij->ilen[i] = baij->imax[i];
2289   }
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 #undef __FUNCT__
2294 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2295 /*@
2296     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2297        in the matrix.
2298 
2299   Input Parameters:
2300 +  mat - the SeqBAIJ matrix
2301 -  indices - the column indices
2302 
2303   Level: advanced
2304 
2305   Notes:
2306     This can be called if you have precomputed the nonzero structure of the
2307   matrix and want to provide it to the matrix object to improve the performance
2308   of the MatSetValues() operation.
2309 
2310     You MUST have set the correct numbers of nonzeros per row in the call to
2311   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2312 
2313     MUST be called before any calls to MatSetValues();
2314 
2315 @*/
2316 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2317 {
2318   PetscErrorCode ierr;
2319 
2320   PetscFunctionBegin;
2321   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2322   PetscValidPointer(indices,2);
2323   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2324   PetscFunctionReturn(0);
2325 }
2326 
2327 #undef __FUNCT__
2328 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2329 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2330 {
2331   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2332   PetscErrorCode ierr;
2333   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2334   PetscReal      atmp;
2335   PetscScalar    *x,zero = 0.0;
2336   MatScalar      *aa;
2337   PetscInt       ncols,brow,krow,kcol;
2338 
2339   PetscFunctionBegin;
2340   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2341   bs  = A->rmap->bs;
2342   aa  = a->a;
2343   ai  = a->i;
2344   aj  = a->j;
2345   mbs = a->mbs;
2346 
2347   ierr = VecSet(v,zero);CHKERRQ(ierr);
2348   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2349   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2350   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2351   for (i=0; i<mbs; i++) {
2352     ncols = ai[1] - ai[0]; ai++;
2353     brow  = bs*i;
2354     for (j=0; j<ncols; j++) {
2355       for (kcol=0; kcol<bs; kcol++) {
2356         for (krow=0; krow<bs; krow++) {
2357           atmp = PetscAbsScalar(*aa);aa++;
2358           row  = brow + krow;   /* row index */
2359           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2360         }
2361       }
2362       aj++;
2363     }
2364   }
2365   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatCopy_SeqBAIJ"
2371 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2372 {
2373   PetscErrorCode ierr;
2374 
2375   PetscFunctionBegin;
2376   /* If the two matrices have the same copy implementation, use fast copy. */
2377   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2378     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2379     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2380     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2381 
2382     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2383     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2384     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2385   } else {
2386     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2387   }
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2393 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2394 {
2395   PetscErrorCode ierr;
2396 
2397   PetscFunctionBegin;
2398   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2404 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2405 {
2406   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2407 
2408   PetscFunctionBegin;
2409   *array = a->a;
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 #undef __FUNCT__
2414 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2415 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2416 {
2417   PetscFunctionBegin;
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2423 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2424 {
2425   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2426   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2427   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2428   PetscErrorCode ierr;
2429 
2430   PetscFunctionBegin;
2431   /* Set the number of nonzeros in the new matrix */
2432   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2433   PetscFunctionReturn(0);
2434 }
2435 
2436 #undef __FUNCT__
2437 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2438 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2439 {
2440   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2441   PetscErrorCode ierr;
2442   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2443   PetscBLASInt   one=1;
2444 
2445   PetscFunctionBegin;
2446   if (str == SAME_NONZERO_PATTERN) {
2447     PetscScalar  alpha = a;
2448     PetscBLASInt bnz;
2449     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2450     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2451     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2452   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2453     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2454   } else {
2455     Mat      B;
2456     PetscInt *nnz;
2457     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2458     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2459     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2460     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2461     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2462     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2463     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2464     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2465     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2466     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2467     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2468     ierr = PetscFree(nnz);CHKERRQ(ierr);
2469   }
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 #undef __FUNCT__
2474 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2475 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2476 {
2477   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2478   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2479   MatScalar   *aa = a->a;
2480 
2481   PetscFunctionBegin;
2482   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2488 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2489 {
2490   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2491   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2492   MatScalar   *aa = a->a;
2493 
2494   PetscFunctionBegin;
2495   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 #undef __FUNCT__
2500 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2501 /*
2502     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2503 */
2504 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2505 {
2506   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2507   PetscErrorCode ierr;
2508   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2509   PetscInt       nz = a->i[m],row,*jj,mr,col;
2510 
2511   PetscFunctionBegin;
2512   *nn = n;
2513   if (!ia) PetscFunctionReturn(0);
2514   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2515   else {
2516     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2517     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2518     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2519     jj   = a->j;
2520     for (i=0; i<nz; i++) {
2521       collengths[jj[i]]++;
2522     }
2523     cia[0] = oshift;
2524     for (i=0; i<n; i++) {
2525       cia[i+1] = cia[i] + collengths[i];
2526     }
2527     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2528     jj   = a->j;
2529     for (row=0; row<m; row++) {
2530       mr = a->i[row+1] - a->i[row];
2531       for (i=0; i<mr; i++) {
2532         col = *jj++;
2533 
2534         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2535       }
2536     }
2537     ierr = PetscFree(collengths);CHKERRQ(ierr);
2538     *ia  = cia; *ja = cja;
2539   }
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 #undef __FUNCT__
2544 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2545 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2546 {
2547   PetscErrorCode ierr;
2548 
2549   PetscFunctionBegin;
2550   if (!ia) PetscFunctionReturn(0);
2551   ierr = PetscFree(*ia);CHKERRQ(ierr);
2552   ierr = PetscFree(*ja);CHKERRQ(ierr);
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 /*
2557  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2558  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2559  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2560  */
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2563 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2564 {
2565   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2566   PetscErrorCode ierr;
2567   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2568   PetscInt       nz = a->i[m],row,*jj,mr,col;
2569   PetscInt       *cspidx;
2570 
2571   PetscFunctionBegin;
2572   *nn = n;
2573   if (!ia) PetscFunctionReturn(0);
2574 
2575   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2576   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2577   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2578   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2579   jj   = a->j;
2580   for (i=0; i<nz; i++) {
2581     collengths[jj[i]]++;
2582   }
2583   cia[0] = oshift;
2584   for (i=0; i<n; i++) {
2585     cia[i+1] = cia[i] + collengths[i];
2586   }
2587   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2588   jj   = a->j;
2589   for (row=0; row<m; row++) {
2590     mr = a->i[row+1] - a->i[row];
2591     for (i=0; i<mr; i++) {
2592       col = *jj++;
2593       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2594       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2595     }
2596   }
2597   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2598   *ia    = cia; *ja = cja;
2599   *spidx = cspidx;
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 #undef __FUNCT__
2604 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2605 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2606 {
2607   PetscErrorCode ierr;
2608 
2609   PetscFunctionBegin;
2610   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2611   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2612   PetscFunctionReturn(0);
2613 }
2614 
2615 #undef __FUNCT__
2616 #define __FUNCT__ "MatShift_SeqBAIJ"
2617 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2618 {
2619   PetscErrorCode ierr;
2620   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2621 
2622   PetscFunctionBegin;
2623   if (!Y->preallocated || !aij->nz) {
2624     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2625   }
2626   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 /* -------------------------------------------------------------------*/
2631 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2632                                        MatGetRow_SeqBAIJ,
2633                                        MatRestoreRow_SeqBAIJ,
2634                                        MatMult_SeqBAIJ_N,
2635                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2636                                        MatMultTranspose_SeqBAIJ,
2637                                        MatMultTransposeAdd_SeqBAIJ,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                /* 10*/ 0,
2642                                        MatLUFactor_SeqBAIJ,
2643                                        0,
2644                                        0,
2645                                        MatTranspose_SeqBAIJ,
2646                                /* 15*/ MatGetInfo_SeqBAIJ,
2647                                        MatEqual_SeqBAIJ,
2648                                        MatGetDiagonal_SeqBAIJ,
2649                                        MatDiagonalScale_SeqBAIJ,
2650                                        MatNorm_SeqBAIJ,
2651                                /* 20*/ 0,
2652                                        MatAssemblyEnd_SeqBAIJ,
2653                                        MatSetOption_SeqBAIJ,
2654                                        MatZeroEntries_SeqBAIJ,
2655                                /* 24*/ MatZeroRows_SeqBAIJ,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                /* 29*/ MatSetUp_SeqBAIJ,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                /* 34*/ MatDuplicate_SeqBAIJ,
2666                                        0,
2667                                        0,
2668                                        MatILUFactor_SeqBAIJ,
2669                                        0,
2670                                /* 39*/ MatAXPY_SeqBAIJ,
2671                                        MatGetSubMatrices_SeqBAIJ,
2672                                        MatIncreaseOverlap_SeqBAIJ,
2673                                        MatGetValues_SeqBAIJ,
2674                                        MatCopy_SeqBAIJ,
2675                                /* 44*/ 0,
2676                                        MatScale_SeqBAIJ,
2677                                        MatShift_SeqBAIJ,
2678                                        0,
2679                                        MatZeroRowsColumns_SeqBAIJ,
2680                                /* 49*/ 0,
2681                                        MatGetRowIJ_SeqBAIJ,
2682                                        MatRestoreRowIJ_SeqBAIJ,
2683                                        MatGetColumnIJ_SeqBAIJ,
2684                                        MatRestoreColumnIJ_SeqBAIJ,
2685                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                        MatSetValuesBlocked_SeqBAIJ,
2690                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2691                                        MatDestroy_SeqBAIJ,
2692                                        MatView_SeqBAIJ,
2693                                        0,
2694                                        0,
2695                                /* 64*/ 0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2701                                        0,
2702                                        MatConvert_Basic,
2703                                        0,
2704                                        0,
2705                                /* 74*/ 0,
2706                                        MatFDColoringApply_BAIJ,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                /* 79*/ 0,
2711                                        0,
2712                                        0,
2713                                        0,
2714                                        MatLoad_SeqBAIJ,
2715                                /* 84*/ 0,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                /* 89*/ 0,
2721                                        0,
2722                                        0,
2723                                        0,
2724                                        0,
2725                                /* 94*/ 0,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                        0,
2730                                /* 99*/ 0,
2731                                        0,
2732                                        0,
2733                                        0,
2734                                        0,
2735                                /*104*/ 0,
2736                                        MatRealPart_SeqBAIJ,
2737                                        MatImaginaryPart_SeqBAIJ,
2738                                        0,
2739                                        0,
2740                                /*109*/ 0,
2741                                        0,
2742                                        0,
2743                                        0,
2744                                        MatMissingDiagonal_SeqBAIJ,
2745                                /*114*/ 0,
2746                                        0,
2747                                        0,
2748                                        0,
2749                                        0,
2750                                /*119*/ 0,
2751                                        0,
2752                                        MatMultHermitianTranspose_SeqBAIJ,
2753                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2754                                        0,
2755                                /*124*/ 0,
2756                                        0,
2757                                        MatInvertBlockDiagonal_SeqBAIJ,
2758                                        0,
2759                                        0,
2760                                /*129*/ 0,
2761                                        0,
2762                                        0,
2763                                        0,
2764                                        0,
2765                                /*134*/ 0,
2766                                        0,
2767                                        0,
2768                                        0,
2769                                        0,
2770                                /*139*/ 0,
2771                                        0,
2772                                        0,
2773                                        MatFDColoringSetUp_SeqXAIJ,
2774                                        0,
2775                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2776 };
2777 
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2780 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2781 {
2782   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2783   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2784   PetscErrorCode ierr;
2785 
2786   PetscFunctionBegin;
2787   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2788 
2789   /* allocate space for values if not already there */
2790   if (!aij->saved_values) {
2791     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2792     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2793   }
2794 
2795   /* copy values over */
2796   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2797   PetscFunctionReturn(0);
2798 }
2799 
2800 #undef __FUNCT__
2801 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2802 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2803 {
2804   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2805   PetscErrorCode ierr;
2806   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2807 
2808   PetscFunctionBegin;
2809   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2810   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2811 
2812   /* copy values over */
2813   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2814   PetscFunctionReturn(0);
2815 }
2816 
2817 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2818 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2819 
2820 #undef __FUNCT__
2821 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2822 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2823 {
2824   Mat_SeqBAIJ    *b;
2825   PetscErrorCode ierr;
2826   PetscInt       i,mbs,nbs,bs2;
2827   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2828 
2829   PetscFunctionBegin;
2830   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2831   if (nz == MAT_SKIP_ALLOCATION) {
2832     skipallocation = PETSC_TRUE;
2833     nz             = 0;
2834   }
2835 
2836   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2837   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2838   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2839   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2840 
2841   B->preallocated = PETSC_TRUE;
2842 
2843   mbs = B->rmap->n/bs;
2844   nbs = B->cmap->n/bs;
2845   bs2 = bs*bs;
2846 
2847   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2848 
2849   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2850   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2851   if (nnz) {
2852     for (i=0; i<mbs; i++) {
2853       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2854       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2855     }
2856   }
2857 
2858   b    = (Mat_SeqBAIJ*)B->data;
2859   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2860   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2861   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2862 
2863   if (!flg) {
2864     switch (bs) {
2865     case 1:
2866       B->ops->mult    = MatMult_SeqBAIJ_1;
2867       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2868       break;
2869     case 2:
2870       B->ops->mult    = MatMult_SeqBAIJ_2;
2871       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2872       break;
2873     case 3:
2874       B->ops->mult    = MatMult_SeqBAIJ_3;
2875       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2876       break;
2877     case 4:
2878       B->ops->mult    = MatMult_SeqBAIJ_4;
2879       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2880       break;
2881     case 5:
2882       B->ops->mult    = MatMult_SeqBAIJ_5;
2883       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2884       break;
2885     case 6:
2886       B->ops->mult    = MatMult_SeqBAIJ_6;
2887       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2888       break;
2889     case 7:
2890       B->ops->mult    = MatMult_SeqBAIJ_7;
2891       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2892       break;
2893     case 15:
2894       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2895       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2896       break;
2897     default:
2898       B->ops->mult    = MatMult_SeqBAIJ_N;
2899       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2900       break;
2901     }
2902   }
2903   B->ops->sor = MatSOR_SeqBAIJ;
2904   b->mbs = mbs;
2905   b->nbs = nbs;
2906   if (!skipallocation) {
2907     if (!b->imax) {
2908       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2909       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2910 
2911       b->free_imax_ilen = PETSC_TRUE;
2912     }
2913     /* b->ilen will count nonzeros in each block row so far. */
2914     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2915     if (!nnz) {
2916       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2917       else if (nz < 0) nz = 1;
2918       for (i=0; i<mbs; i++) b->imax[i] = nz;
2919       nz = nz*mbs;
2920     } else {
2921       nz = 0;
2922       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2923     }
2924 
2925     /* allocate the matrix space */
2926     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2927     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2928     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2929     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2930     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2931 
2932     b->singlemalloc = PETSC_TRUE;
2933     b->i[0]         = 0;
2934     for (i=1; i<mbs+1; i++) {
2935       b->i[i] = b->i[i-1] + b->imax[i-1];
2936     }
2937     b->free_a  = PETSC_TRUE;
2938     b->free_ij = PETSC_TRUE;
2939   } else {
2940     b->free_a  = PETSC_FALSE;
2941     b->free_ij = PETSC_FALSE;
2942   }
2943 
2944   b->bs2              = bs2;
2945   b->mbs              = mbs;
2946   b->nz               = 0;
2947   b->maxnz            = nz;
2948   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2949   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2950   PetscFunctionReturn(0);
2951 }
2952 
2953 #undef __FUNCT__
2954 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2955 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2956 {
2957   PetscInt       i,m,nz,nz_max=0,*nnz;
2958   PetscScalar    *values=0;
2959   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2960   PetscErrorCode ierr;
2961 
2962   PetscFunctionBegin;
2963   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2964   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2965   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2966   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2967   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2968   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2969   m    = B->rmap->n/bs;
2970 
2971   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2972   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2973   for (i=0; i<m; i++) {
2974     nz = ii[i+1]- ii[i];
2975     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2976     nz_max = PetscMax(nz_max, nz);
2977     nnz[i] = nz;
2978   }
2979   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2980   ierr = PetscFree(nnz);CHKERRQ(ierr);
2981 
2982   values = (PetscScalar*)V;
2983   if (!values) {
2984     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2985   }
2986   for (i=0; i<m; i++) {
2987     PetscInt          ncols  = ii[i+1] - ii[i];
2988     const PetscInt    *icols = jj + ii[i];
2989     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2990     if (!roworiented) {
2991       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2992     } else {
2993       PetscInt j;
2994       for (j=0; j<ncols; j++) {
2995         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2996         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2997       }
2998     }
2999   }
3000   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3001   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3002   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3003   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3004   PetscFunctionReturn(0);
3005 }
3006 
3007 /*MC
3008    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3009    block sparse compressed row format.
3010 
3011    Options Database Keys:
3012 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3013 
3014   Level: beginner
3015 
3016 .seealso: MatCreateSeqBAIJ()
3017 M*/
3018 
3019 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3020 
3021 #undef __FUNCT__
3022 #define __FUNCT__ "MatCreate_SeqBAIJ"
3023 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3024 {
3025   PetscErrorCode ierr;
3026   PetscMPIInt    size;
3027   Mat_SeqBAIJ    *b;
3028 
3029   PetscFunctionBegin;
3030   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3031   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3032 
3033   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3034   B->data = (void*)b;
3035   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3036 
3037   b->row          = 0;
3038   b->col          = 0;
3039   b->icol         = 0;
3040   b->reallocs     = 0;
3041   b->saved_values = 0;
3042 
3043   b->roworiented        = PETSC_TRUE;
3044   b->nonew              = 0;
3045   b->diag               = 0;
3046   B->spptr              = 0;
3047   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3048   b->keepnonzeropattern = PETSC_FALSE;
3049 
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3060   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3061   PetscFunctionReturn(0);
3062 }
3063 
3064 #undef __FUNCT__
3065 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3066 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3067 {
3068   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3069   PetscErrorCode ierr;
3070   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3071 
3072   PetscFunctionBegin;
3073   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3074 
3075   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3076     c->imax           = a->imax;
3077     c->ilen           = a->ilen;
3078     c->free_imax_ilen = PETSC_FALSE;
3079   } else {
3080     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3081     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3082     for (i=0; i<mbs; i++) {
3083       c->imax[i] = a->imax[i];
3084       c->ilen[i] = a->ilen[i];
3085     }
3086     c->free_imax_ilen = PETSC_TRUE;
3087   }
3088 
3089   /* allocate the matrix space */
3090   if (mallocmatspace) {
3091     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3092       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3093       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3094 
3095       c->i            = a->i;
3096       c->j            = a->j;
3097       c->singlemalloc = PETSC_FALSE;
3098       c->free_a       = PETSC_TRUE;
3099       c->free_ij      = PETSC_FALSE;
3100       c->parent       = A;
3101       C->preallocated = PETSC_TRUE;
3102       C->assembled    = PETSC_TRUE;
3103 
3104       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3105       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3106       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3107     } else {
3108       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3109       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3110 
3111       c->singlemalloc = PETSC_TRUE;
3112       c->free_a       = PETSC_TRUE;
3113       c->free_ij      = PETSC_TRUE;
3114 
3115       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3116       if (mbs > 0) {
3117         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3118         if (cpvalues == MAT_COPY_VALUES) {
3119           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3120         } else {
3121           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3122         }
3123       }
3124       C->preallocated = PETSC_TRUE;
3125       C->assembled    = PETSC_TRUE;
3126     }
3127   }
3128 
3129   c->roworiented = a->roworiented;
3130   c->nonew       = a->nonew;
3131 
3132   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3133   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3134 
3135   c->bs2         = a->bs2;
3136   c->mbs         = a->mbs;
3137   c->nbs         = a->nbs;
3138 
3139   if (a->diag) {
3140     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3141       c->diag      = a->diag;
3142       c->free_diag = PETSC_FALSE;
3143     } else {
3144       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3145       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3146       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3147       c->free_diag = PETSC_TRUE;
3148     }
3149   } else c->diag = 0;
3150 
3151   c->nz         = a->nz;
3152   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3153   c->solve_work = NULL;
3154   c->mult_work  = NULL;
3155   c->sor_workt  = NULL;
3156   c->sor_work   = NULL;
3157 
3158   c->compressedrow.use   = a->compressedrow.use;
3159   c->compressedrow.nrows = a->compressedrow.nrows;
3160   if (a->compressedrow.use) {
3161     i    = a->compressedrow.nrows;
3162     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3163     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3164     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3165     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3166   } else {
3167     c->compressedrow.use    = PETSC_FALSE;
3168     c->compressedrow.i      = NULL;
3169     c->compressedrow.rindex = NULL;
3170   }
3171   C->nonzerostate = A->nonzerostate;
3172 
3173   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3174   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3175   PetscFunctionReturn(0);
3176 }
3177 
3178 #undef __FUNCT__
3179 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3180 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3181 {
3182   PetscErrorCode ierr;
3183 
3184   PetscFunctionBegin;
3185   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3186   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3187   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3188   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3189   PetscFunctionReturn(0);
3190 }
3191 
3192 #undef __FUNCT__
3193 #define __FUNCT__ "MatLoad_SeqBAIJ"
3194 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3195 {
3196   Mat_SeqBAIJ    *a;
3197   PetscErrorCode ierr;
3198   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3199   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3200   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3201   PetscInt       *masked,nmask,tmp,bs2,ishift;
3202   PetscMPIInt    size;
3203   int            fd;
3204   PetscScalar    *aa;
3205   MPI_Comm       comm;
3206 
3207   PetscFunctionBegin;
3208   /* force binary viewer to load .info file if it has not yet done so */
3209   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3210   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3211   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3212   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3213   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3214   if (bs < 0) bs = 1;
3215   bs2  = bs*bs;
3216 
3217   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3218   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3219   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3220   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3221   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3222   M = header[1]; N = header[2]; nz = header[3];
3223 
3224   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3225   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3226 
3227   /*
3228      This code adds extra rows to make sure the number of rows is
3229     divisible by the blocksize
3230   */
3231   mbs        = M/bs;
3232   extra_rows = bs - M + bs*(mbs);
3233   if (extra_rows == bs) extra_rows = 0;
3234   else mbs++;
3235   if (extra_rows) {
3236     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3237   }
3238 
3239   /* Set global sizes if not already set */
3240   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3241     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3242   } else { /* Check if the matrix global sizes are correct */
3243     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3244     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3245       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3246     }
3247     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3248   }
3249 
3250   /* read in row lengths */
3251   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3252   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3253   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3254 
3255   /* read in column indices */
3256   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3257   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3258   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3259 
3260   /* loop over row lengths determining block row lengths */
3261   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3262   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3263   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3264   rowcount = 0;
3265   nzcount  = 0;
3266   for (i=0; i<mbs; i++) {
3267     nmask = 0;
3268     for (j=0; j<bs; j++) {
3269       kmax = rowlengths[rowcount];
3270       for (k=0; k<kmax; k++) {
3271         tmp = jj[nzcount++]/bs;
3272         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3273       }
3274       rowcount++;
3275     }
3276     browlengths[i] += nmask;
3277     /* zero out the mask elements we set */
3278     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3279   }
3280 
3281   /* Do preallocation  */
3282   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3283   a    = (Mat_SeqBAIJ*)newmat->data;
3284 
3285   /* set matrix "i" values */
3286   a->i[0] = 0;
3287   for (i=1; i<= mbs; i++) {
3288     a->i[i]      = a->i[i-1] + browlengths[i-1];
3289     a->ilen[i-1] = browlengths[i-1];
3290   }
3291   a->nz = 0;
3292   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3293 
3294   /* read in nonzero values */
3295   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3296   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3297   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3298 
3299   /* set "a" and "j" values into matrix */
3300   nzcount = 0; jcount = 0;
3301   for (i=0; i<mbs; i++) {
3302     nzcountb = nzcount;
3303     nmask    = 0;
3304     for (j=0; j<bs; j++) {
3305       kmax = rowlengths[i*bs+j];
3306       for (k=0; k<kmax; k++) {
3307         tmp = jj[nzcount++]/bs;
3308         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3309       }
3310     }
3311     /* sort the masked values */
3312     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3313 
3314     /* set "j" values into matrix */
3315     maskcount = 1;
3316     for (j=0; j<nmask; j++) {
3317       a->j[jcount++]  = masked[j];
3318       mask[masked[j]] = maskcount++;
3319     }
3320     /* set "a" values into matrix */
3321     ishift = bs2*a->i[i];
3322     for (j=0; j<bs; j++) {
3323       kmax = rowlengths[i*bs+j];
3324       for (k=0; k<kmax; k++) {
3325         tmp       = jj[nzcountb]/bs;
3326         block     = mask[tmp] - 1;
3327         point     = jj[nzcountb] - bs*tmp;
3328         idx       = ishift + bs2*block + j + bs*point;
3329         a->a[idx] = (MatScalar)aa[nzcountb++];
3330       }
3331     }
3332     /* zero out the mask elements we set */
3333     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3334   }
3335   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3336 
3337   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3338   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3339   ierr = PetscFree(aa);CHKERRQ(ierr);
3340   ierr = PetscFree(jj);CHKERRQ(ierr);
3341   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3342 
3343   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3344   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3345   PetscFunctionReturn(0);
3346 }
3347 
3348 #undef __FUNCT__
3349 #define __FUNCT__ "MatCreateSeqBAIJ"
3350 /*@C
3351    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3352    compressed row) format.  For good matrix assembly performance the
3353    user should preallocate the matrix storage by setting the parameter nz
3354    (or the array nnz).  By setting these parameters accurately, performance
3355    during matrix assembly can be increased by more than a factor of 50.
3356 
3357    Collective on MPI_Comm
3358 
3359    Input Parameters:
3360 +  comm - MPI communicator, set to PETSC_COMM_SELF
3361 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3362           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3363 .  m - number of rows
3364 .  n - number of columns
3365 .  nz - number of nonzero blocks  per block row (same for all rows)
3366 -  nnz - array containing the number of nonzero blocks in the various block rows
3367          (possibly different for each block row) or NULL
3368 
3369    Output Parameter:
3370 .  A - the matrix
3371 
3372    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3373    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3374    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3375 
3376    Options Database Keys:
3377 .   -mat_no_unroll - uses code that does not unroll the loops in the
3378                      block calculations (much slower)
3379 .    -mat_block_size - size of the blocks to use
3380 
3381    Level: intermediate
3382 
3383    Notes:
3384    The number of rows and columns must be divisible by blocksize.
3385 
3386    If the nnz parameter is given then the nz parameter is ignored
3387 
3388    A nonzero block is any block that as 1 or more nonzeros in it
3389 
3390    The block AIJ format is fully compatible with standard Fortran 77
3391    storage.  That is, the stored row and column indices can begin at
3392    either one (as in Fortran) or zero.  See the users' manual for details.
3393 
3394    Specify the preallocated storage with either nz or nnz (not both).
3395    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3396    allocation.  See Users-Manual: ch_mat for details.
3397    matrices.
3398 
3399 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3400 @*/
3401 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3402 {
3403   PetscErrorCode ierr;
3404 
3405   PetscFunctionBegin;
3406   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3407   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3408   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3409   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3410   PetscFunctionReturn(0);
3411 }
3412 
3413 #undef __FUNCT__
3414 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3415 /*@C
3416    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3417    per row in the matrix. For good matrix assembly performance the
3418    user should preallocate the matrix storage by setting the parameter nz
3419    (or the array nnz).  By setting these parameters accurately, performance
3420    during matrix assembly can be increased by more than a factor of 50.
3421 
3422    Collective on MPI_Comm
3423 
3424    Input Parameters:
3425 +  B - the matrix
3426 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3427           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3428 .  nz - number of block nonzeros per block row (same for all rows)
3429 -  nnz - array containing the number of block nonzeros in the various block rows
3430          (possibly different for each block row) or NULL
3431 
3432    Options Database Keys:
3433 .   -mat_no_unroll - uses code that does not unroll the loops in the
3434                      block calculations (much slower)
3435 .   -mat_block_size - size of the blocks to use
3436 
3437    Level: intermediate
3438 
3439    Notes:
3440    If the nnz parameter is given then the nz parameter is ignored
3441 
3442    You can call MatGetInfo() to get information on how effective the preallocation was;
3443    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3444    You can also run with the option -info and look for messages with the string
3445    malloc in them to see if additional memory allocation was needed.
3446 
3447    The block AIJ format is fully compatible with standard Fortran 77
3448    storage.  That is, the stored row and column indices can begin at
3449    either one (as in Fortran) or zero.  See the users' manual for details.
3450 
3451    Specify the preallocated storage with either nz or nnz (not both).
3452    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3453    allocation.  See Users-Manual: ch_mat for details.
3454 
3455 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3456 @*/
3457 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3458 {
3459   PetscErrorCode ierr;
3460 
3461   PetscFunctionBegin;
3462   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3463   PetscValidType(B,1);
3464   PetscValidLogicalCollectiveInt(B,bs,2);
3465   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3466   PetscFunctionReturn(0);
3467 }
3468 
3469 #undef __FUNCT__
3470 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3471 /*@C
3472    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3473    (the default sequential PETSc format).
3474 
3475    Collective on MPI_Comm
3476 
3477    Input Parameters:
3478 +  B - the matrix
3479 .  i - the indices into j for the start of each local row (starts with zero)
3480 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3481 -  v - optional values in the matrix
3482 
3483    Level: developer
3484 
3485    Notes:
3486    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3487    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3488    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3489    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3490    block column and the second index is over columns within a block.
3491 
3492 .keywords: matrix, aij, compressed row, sparse
3493 
3494 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3495 @*/
3496 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3497 {
3498   PetscErrorCode ierr;
3499 
3500   PetscFunctionBegin;
3501   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3502   PetscValidType(B,1);
3503   PetscValidLogicalCollectiveInt(B,bs,2);
3504   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3505   PetscFunctionReturn(0);
3506 }
3507 
3508 
3509 #undef __FUNCT__
3510 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3511 /*@
3512      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3513 
3514      Collective on MPI_Comm
3515 
3516    Input Parameters:
3517 +  comm - must be an MPI communicator of size 1
3518 .  bs - size of block
3519 .  m - number of rows
3520 .  n - number of columns
3521 .  i - row indices
3522 .  j - column indices
3523 -  a - matrix values
3524 
3525    Output Parameter:
3526 .  mat - the matrix
3527 
3528    Level: advanced
3529 
3530    Notes:
3531        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3532     once the matrix is destroyed
3533 
3534        You cannot set new nonzero locations into this matrix, that will generate an error.
3535 
3536        The i and j indices are 0 based
3537 
3538        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3539 
3540       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3541       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3542       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3543       with column-major ordering within blocks.
3544 
3545 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3546 
3547 @*/
3548 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3549 {
3550   PetscErrorCode ierr;
3551   PetscInt       ii;
3552   Mat_SeqBAIJ    *baij;
3553 
3554   PetscFunctionBegin;
3555   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3556   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3557 
3558   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3559   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3560   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3561   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3562   baij = (Mat_SeqBAIJ*)(*mat)->data;
3563   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3564   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3565 
3566   baij->i = i;
3567   baij->j = j;
3568   baij->a = a;
3569 
3570   baij->singlemalloc = PETSC_FALSE;
3571   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3572   baij->free_a       = PETSC_FALSE;
3573   baij->free_ij      = PETSC_FALSE;
3574 
3575   for (ii=0; ii<m; ii++) {
3576     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3577 #if defined(PETSC_USE_DEBUG)
3578     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3579 #endif
3580   }
3581 #if defined(PETSC_USE_DEBUG)
3582   for (ii=0; ii<baij->i[m]; ii++) {
3583     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3584     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3585   }
3586 #endif
3587 
3588   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3589   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3590   PetscFunctionReturn(0);
3591 }
3592 
3593 #undef __FUNCT__
3594 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3595 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3596 {
3597   PetscErrorCode ierr;
3598 
3599   PetscFunctionBegin;
3600   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3601   PetscFunctionReturn(0);
3602 }
3603