xref: /petsc/src/mat/impls/baij/seq/baij.c (revision efd4aadf157bf1ba2d80c2be092fcf4247860003)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
16 {
17   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
18   PetscErrorCode ierr;
19   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
20   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
21   PetscReal      shift = 0.0;
22   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
23 
24   PetscFunctionBegin;
25   allowzeropivot = PetscNot(A->erroriffailure);
26 
27   if (a->idiagvalid) {
28     if (values) *values = a->idiag;
29     PetscFunctionReturn(0);
30   }
31   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
32   diag_offset = a->diag;
33   if (!a->idiag) {
34     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
35     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
36   }
37   diag  = a->idiag;
38   mdiag = a->idiag+bs2*mbs;
39   if (values) *values = a->idiag;
40   /* factor and invert each block */
41   switch (bs) {
42   case 1:
43     for (i=0; i<mbs; i++) {
44       odiag    = v + 1*diag_offset[i];
45       diag[0]  = odiag[0];
46       mdiag[0] = odiag[0];
47 
48       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
49         if (allowzeropivot) {
50           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
51           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
52           A->factorerror_zeropivot_row   = i;
53           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
54         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
55       }
56 
57       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
58       diag    += 1;
59       mdiag   += 1;
60     }
61     break;
62   case 2:
63     for (i=0; i<mbs; i++) {
64       odiag    = v + 4*diag_offset[i];
65       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
66       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
67       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
68       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
69       diag    += 4;
70       mdiag   += 4;
71     }
72     break;
73   case 3:
74     for (i=0; i<mbs; i++) {
75       odiag    = v + 9*diag_offset[i];
76       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
78       diag[8]  = odiag[8];
79       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
80       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
81       mdiag[8] = odiag[8];
82       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
83       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
84       diag    += 9;
85       mdiag   += 9;
86     }
87     break;
88   case 4:
89     for (i=0; i<mbs; i++) {
90       odiag  = v + 16*diag_offset[i];
91       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
94       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
95       diag  += 16;
96       mdiag += 16;
97     }
98     break;
99   case 5:
100     for (i=0; i<mbs; i++) {
101       odiag  = v + 25*diag_offset[i];
102       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
105       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
106       diag  += 25;
107       mdiag += 25;
108     }
109     break;
110   case 6:
111     for (i=0; i<mbs; i++) {
112       odiag  = v + 36*diag_offset[i];
113       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
114       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
115       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
116       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
117       diag  += 36;
118       mdiag += 36;
119     }
120     break;
121   case 7:
122     for (i=0; i<mbs; i++) {
123       odiag  = v + 49*diag_offset[i];
124       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
125       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += 49;
129       mdiag += 49;
130     }
131     break;
132   default:
133     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
134     for (i=0; i<mbs; i++) {
135       odiag  = v + bs2*diag_offset[i];
136       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
137       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
138       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
139       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
140       diag  += bs2;
141       mdiag += bs2;
142     }
143     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
144   }
145   a->idiagvalid = PETSC_TRUE;
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
150 {
151   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
152   PetscScalar       *x,*work,*w,*workt,*t;
153   const MatScalar   *v,*aa = a->a, *idiag;
154   const PetscScalar *b,*xb;
155   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
156   PetscErrorCode    ierr;
157   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
158   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
159 
160   PetscFunctionBegin;
161   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
162   its = its*lits;
163   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
164   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
165   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
166   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
167   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
168 
169   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
170 
171   if (!m) PetscFunctionReturn(0);
172   diag  = a->diag;
173   idiag = a->idiag;
174   k    = PetscMax(A->rmap->n,A->cmap->n);
175   if (!a->mult_work) {
176     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
177   }
178   if (!a->sor_workt) {
179     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
180   }
181   if (!a->sor_work) {
182     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
183   }
184   work = a->mult_work;
185   t    = a->sor_workt;
186   w    = a->sor_work;
187 
188   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
189   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
190 
191   if (flag & SOR_ZERO_INITIAL_GUESS) {
192     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
193       switch (bs) {
194       case 1:
195         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
196         t[0] = b[0];
197         i2     = 1;
198         idiag += 1;
199         for (i=1; i<m; i++) {
200           v  = aa + ai[i];
201           vi = aj + ai[i];
202           nz = diag[i] - ai[i];
203           s[0] = b[i2];
204           for (j=0; j<nz; j++) {
205             xw[0] = x[vi[j]];
206             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
207           }
208           t[i2] = s[0];
209           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
210           x[i2]  = xw[0];
211           idiag += 1;
212           i2    += 1;
213         }
214         break;
215       case 2:
216         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
217         t[0] = b[0]; t[1] = b[1];
218         i2     = 2;
219         idiag += 4;
220         for (i=1; i<m; i++) {
221           v  = aa + 4*ai[i];
222           vi = aj + ai[i];
223           nz = diag[i] - ai[i];
224           s[0] = b[i2]; s[1] = b[i2+1];
225           for (j=0; j<nz; j++) {
226             idx = 2*vi[j];
227             it  = 4*j;
228             xw[0] = x[idx]; xw[1] = x[1+idx];
229             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
230           }
231           t[i2] = s[0]; t[i2+1] = s[1];
232           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
233           x[i2]   = xw[0]; x[i2+1] = xw[1];
234           idiag  += 4;
235           i2     += 2;
236         }
237         break;
238       case 3:
239         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
240         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
241         i2     = 3;
242         idiag += 9;
243         for (i=1; i<m; i++) {
244           v  = aa + 9*ai[i];
245           vi = aj + ai[i];
246           nz = diag[i] - ai[i];
247           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
248           while (nz--) {
249             idx = 3*(*vi++);
250             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
251             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
252             v  += 9;
253           }
254           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
255           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
256           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
257           idiag  += 9;
258           i2     += 3;
259         }
260         break;
261       case 4:
262         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
263         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
264         i2     = 4;
265         idiag += 16;
266         for (i=1; i<m; i++) {
267           v  = aa + 16*ai[i];
268           vi = aj + ai[i];
269           nz = diag[i] - ai[i];
270           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
271           while (nz--) {
272             idx = 4*(*vi++);
273             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
274             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
275             v  += 16;
276           }
277           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
278           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
279           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
280           idiag  += 16;
281           i2     += 4;
282         }
283         break;
284       case 5:
285         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
286         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
287         i2     = 5;
288         idiag += 25;
289         for (i=1; i<m; i++) {
290           v  = aa + 25*ai[i];
291           vi = aj + ai[i];
292           nz = diag[i] - ai[i];
293           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
294           while (nz--) {
295             idx = 5*(*vi++);
296             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
297             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
298             v  += 25;
299           }
300           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
301           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
302           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
303           idiag  += 25;
304           i2     += 5;
305         }
306         break;
307       case 6:
308         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
309         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
310         i2     = 6;
311         idiag += 36;
312         for (i=1; i<m; i++) {
313           v  = aa + 36*ai[i];
314           vi = aj + ai[i];
315           nz = diag[i] - ai[i];
316           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
317           while (nz--) {
318             idx = 6*(*vi++);
319             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
320             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
321             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
322             v  += 36;
323           }
324           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
325           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
326           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
327           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
328           idiag  += 36;
329           i2     += 6;
330         }
331         break;
332       case 7:
333         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
334         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
335         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
336         i2     = 7;
337         idiag += 49;
338         for (i=1; i<m; i++) {
339           v  = aa + 49*ai[i];
340           vi = aj + ai[i];
341           nz = diag[i] - ai[i];
342           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
343           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
344           while (nz--) {
345             idx = 7*(*vi++);
346             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
347             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
348             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
349             v  += 49;
350           }
351           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
352           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
353           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
354           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
355           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
356           idiag  += 49;
357           i2     += 7;
358         }
359         break;
360       default:
361         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
362         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
363         i2     = bs;
364         idiag += bs2;
365         for (i=1; i<m; i++) {
366           v  = aa + bs2*ai[i];
367           vi = aj + ai[i];
368           nz = diag[i] - ai[i];
369 
370           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
371           /* copy all rows of x that are needed into contiguous space */
372           workt = work;
373           for (j=0; j<nz; j++) {
374             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
375             workt += bs;
376           }
377           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
378           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
379           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
380 
381           idiag += bs2;
382           i2    += bs;
383         }
384         break;
385       }
386       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
387       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
388       xb = t;
389     }
390     else xb = b;
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
392       idiag = a->idiag+bs2*(a->mbs-1);
393       i2 = bs * (m-1);
394       switch (bs) {
395       case 1:
396         s[0]  = xb[i2];
397         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
398         x[i2] = xw[0];
399         i2   -= 1;
400         for (i=m-2; i>=0; i--) {
401           v  = aa + (diag[i]+1);
402           vi = aj + diag[i] + 1;
403           nz = ai[i+1] - diag[i] - 1;
404           s[0] = xb[i2];
405           for (j=0; j<nz; j++) {
406             xw[0] = x[vi[j]];
407             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
408           }
409           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
410           x[i2]  = xw[0];
411           idiag -= 1;
412           i2    -= 1;
413         }
414         break;
415       case 2:
416         s[0]  = xb[i2]; s[1] = xb[i2+1];
417         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418         x[i2] = xw[0]; x[i2+1] = xw[1];
419         i2    -= 2;
420         idiag -= 4;
421         for (i=m-2; i>=0; i--) {
422           v  = aa + 4*(diag[i] + 1);
423           vi = aj + diag[i] + 1;
424           nz = ai[i+1] - diag[i] - 1;
425           s[0] = xb[i2]; s[1] = xb[i2+1];
426           for (j=0; j<nz; j++) {
427             idx = 2*vi[j];
428             it  = 4*j;
429             xw[0] = x[idx]; xw[1] = x[1+idx];
430             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
431           }
432           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
433           x[i2]   = xw[0]; x[i2+1] = xw[1];
434           idiag  -= 4;
435           i2     -= 2;
436         }
437         break;
438       case 3:
439         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
440         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442         i2    -= 3;
443         idiag -= 9;
444         for (i=m-2; i>=0; i--) {
445           v  = aa + 9*(diag[i]+1);
446           vi = aj + diag[i] + 1;
447           nz = ai[i+1] - diag[i] - 1;
448           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
449           while (nz--) {
450             idx = 3*(*vi++);
451             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
452             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
453             v  += 9;
454           }
455           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
456           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
457           idiag  -= 9;
458           i2     -= 3;
459         }
460         break;
461       case 4:
462         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
463         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465         i2    -= 4;
466         idiag -= 16;
467         for (i=m-2; i>=0; i--) {
468           v  = aa + 16*(diag[i]+1);
469           vi = aj + diag[i] + 1;
470           nz = ai[i+1] - diag[i] - 1;
471           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
472           while (nz--) {
473             idx = 4*(*vi++);
474             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
475             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
476             v  += 16;
477           }
478           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
479           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
480           idiag  -= 16;
481           i2     -= 4;
482         }
483         break;
484       case 5:
485         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
486         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488         i2    -= 5;
489         idiag -= 25;
490         for (i=m-2; i>=0; i--) {
491           v  = aa + 25*(diag[i]+1);
492           vi = aj + diag[i] + 1;
493           nz = ai[i+1] - diag[i] - 1;
494           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
495           while (nz--) {
496             idx = 5*(*vi++);
497             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
498             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
499             v  += 25;
500           }
501           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
502           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
503           idiag  -= 25;
504           i2     -= 5;
505         }
506         break;
507       case 6:
508         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
509         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
510         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
511         i2    -= 6;
512         idiag -= 36;
513         for (i=m-2; i>=0; i--) {
514           v  = aa + 36*(diag[i]+1);
515           vi = aj + diag[i] + 1;
516           nz = ai[i+1] - diag[i] - 1;
517           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
518           while (nz--) {
519             idx = 6*(*vi++);
520             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
521             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
522             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
523             v  += 36;
524           }
525           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
526           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
527           idiag  -= 36;
528           i2     -= 6;
529         }
530         break;
531       case 7:
532         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
533         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
534         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
535         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
536         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
537         i2    -= 7;
538         idiag -= 49;
539         for (i=m-2; i>=0; i--) {
540           v  = aa + 49*(diag[i]+1);
541           vi = aj + diag[i] + 1;
542           nz = ai[i+1] - diag[i] - 1;
543           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
544           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
545           while (nz--) {
546             idx = 7*(*vi++);
547             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
548             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
549             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
550             v  += 49;
551           }
552           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
553           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
554           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
555           idiag  -= 49;
556           i2     -= 7;
557         }
558         break;
559       default:
560         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
561         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
562         i2    -= bs;
563         idiag -= bs2;
564         for (i=m-2; i>=0; i--) {
565           v  = aa + bs2*(diag[i]+1);
566           vi = aj + diag[i] + 1;
567           nz = ai[i+1] - diag[i] - 1;
568 
569           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
570           /* copy all rows of x that are needed into contiguous space */
571           workt = work;
572           for (j=0; j<nz; j++) {
573             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
574             workt += bs;
575           }
576           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
577           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
578 
579           idiag -= bs2;
580           i2    -= bs;
581         }
582         break;
583       }
584       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
585     }
586     its--;
587   }
588   while (its--) {
589     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
590       idiag = a->idiag;
591       i2 = 0;
592       switch (bs) {
593       case 1:
594         for (i=0; i<m; i++) {
595           v  = aa + ai[i];
596           vi = aj + ai[i];
597           nz = ai[i+1] - ai[i];
598           s[0] = b[i2];
599           for (j=0; j<nz; j++) {
600             xw[0] = x[vi[j]];
601             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
602           }
603           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
604           x[i2] += xw[0];
605           idiag += 1;
606           i2    += 1;
607         }
608         break;
609       case 2:
610         for (i=0; i<m; i++) {
611           v  = aa + 4*ai[i];
612           vi = aj + ai[i];
613           nz = ai[i+1] - ai[i];
614           s[0] = b[i2]; s[1] = b[i2+1];
615           for (j=0; j<nz; j++) {
616             idx = 2*vi[j];
617             it  = 4*j;
618             xw[0] = x[idx]; xw[1] = x[1+idx];
619             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
620           }
621           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
622           x[i2]  += xw[0]; x[i2+1] += xw[1];
623           idiag  += 4;
624           i2     += 2;
625         }
626         break;
627       case 3:
628         for (i=0; i<m; i++) {
629           v  = aa + 9*ai[i];
630           vi = aj + ai[i];
631           nz = ai[i+1] - ai[i];
632           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
633           while (nz--) {
634             idx = 3*(*vi++);
635             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
636             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
637             v  += 9;
638           }
639           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
640           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
641           idiag  += 9;
642           i2     += 3;
643         }
644         break;
645       case 4:
646         for (i=0; i<m; i++) {
647           v  = aa + 16*ai[i];
648           vi = aj + ai[i];
649           nz = ai[i+1] - ai[i];
650           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
651           while (nz--) {
652             idx = 4*(*vi++);
653             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
654             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
655             v  += 16;
656           }
657           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
658           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
659           idiag  += 16;
660           i2     += 4;
661         }
662         break;
663       case 5:
664         for (i=0; i<m; i++) {
665           v  = aa + 25*ai[i];
666           vi = aj + ai[i];
667           nz = ai[i+1] - ai[i];
668           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
669           while (nz--) {
670             idx = 5*(*vi++);
671             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
672             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
673             v  += 25;
674           }
675           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
676           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
677           idiag  += 25;
678           i2     += 5;
679         }
680         break;
681       case 6:
682         for (i=0; i<m; i++) {
683           v  = aa + 36*ai[i];
684           vi = aj + ai[i];
685           nz = ai[i+1] - ai[i];
686           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
687           while (nz--) {
688             idx = 6*(*vi++);
689             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
690             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
691             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
692             v  += 36;
693           }
694           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
695           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
696           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
697           idiag  += 36;
698           i2     += 6;
699         }
700         break;
701       case 7:
702         for (i=0; i<m; i++) {
703           v  = aa + 49*ai[i];
704           vi = aj + ai[i];
705           nz = ai[i+1] - ai[i];
706           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
707           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
708           while (nz--) {
709             idx = 7*(*vi++);
710             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
711             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
712             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
713             v  += 49;
714           }
715           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
716           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
717           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
718           idiag  += 49;
719           i2     += 7;
720         }
721         break;
722       default:
723         for (i=0; i<m; i++) {
724           v  = aa + bs2*ai[i];
725           vi = aj + ai[i];
726           nz = ai[i+1] - ai[i];
727 
728           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
729           /* copy all rows of x that are needed into contiguous space */
730           workt = work;
731           for (j=0; j<nz; j++) {
732             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
733             workt += bs;
734           }
735           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
736           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
737 
738           idiag += bs2;
739           i2    += bs;
740         }
741         break;
742       }
743       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
744     }
745     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
746       idiag = a->idiag+bs2*(a->mbs-1);
747       i2 = bs * (m-1);
748       switch (bs) {
749       case 1:
750         for (i=m-1; i>=0; i--) {
751           v  = aa + ai[i];
752           vi = aj + ai[i];
753           nz = ai[i+1] - ai[i];
754           s[0] = b[i2];
755           for (j=0; j<nz; j++) {
756             xw[0] = x[vi[j]];
757             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
758           }
759           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
760           x[i2] += xw[0];
761           idiag -= 1;
762           i2    -= 1;
763         }
764         break;
765       case 2:
766         for (i=m-1; i>=0; i--) {
767           v  = aa + 4*ai[i];
768           vi = aj + ai[i];
769           nz = ai[i+1] - ai[i];
770           s[0] = b[i2]; s[1] = b[i2+1];
771           for (j=0; j<nz; j++) {
772             idx = 2*vi[j];
773             it  = 4*j;
774             xw[0] = x[idx]; xw[1] = x[1+idx];
775             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
776           }
777           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
778           x[i2]  += xw[0]; x[i2+1] += xw[1];
779           idiag  -= 4;
780           i2     -= 2;
781         }
782         break;
783       case 3:
784         for (i=m-1; i>=0; i--) {
785           v  = aa + 9*ai[i];
786           vi = aj + ai[i];
787           nz = ai[i+1] - ai[i];
788           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
789           while (nz--) {
790             idx = 3*(*vi++);
791             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
792             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
793             v  += 9;
794           }
795           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
796           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
797           idiag  -= 9;
798           i2     -= 3;
799         }
800         break;
801       case 4:
802         for (i=m-1; i>=0; i--) {
803           v  = aa + 16*ai[i];
804           vi = aj + ai[i];
805           nz = ai[i+1] - ai[i];
806           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
807           while (nz--) {
808             idx = 4*(*vi++);
809             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
810             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
811             v  += 16;
812           }
813           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
814           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
815           idiag  -= 16;
816           i2     -= 4;
817         }
818         break;
819       case 5:
820         for (i=m-1; i>=0; i--) {
821           v  = aa + 25*ai[i];
822           vi = aj + ai[i];
823           nz = ai[i+1] - ai[i];
824           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
825           while (nz--) {
826             idx = 5*(*vi++);
827             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
828             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
829             v  += 25;
830           }
831           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
832           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
833           idiag  -= 25;
834           i2     -= 5;
835         }
836         break;
837       case 6:
838         for (i=m-1; i>=0; i--) {
839           v  = aa + 36*ai[i];
840           vi = aj + ai[i];
841           nz = ai[i+1] - ai[i];
842           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
843           while (nz--) {
844             idx = 6*(*vi++);
845             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
846             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
847             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
848             v  += 36;
849           }
850           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
851           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
852           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
853           idiag  -= 36;
854           i2     -= 6;
855         }
856         break;
857       case 7:
858         for (i=m-1; i>=0; i--) {
859           v  = aa + 49*ai[i];
860           vi = aj + ai[i];
861           nz = ai[i+1] - ai[i];
862           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
863           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
864           while (nz--) {
865             idx = 7*(*vi++);
866             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
867             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
868             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
869             v  += 49;
870           }
871           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
872           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
873           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
874           idiag  -= 49;
875           i2     -= 7;
876         }
877         break;
878       default:
879         for (i=m-1; i>=0; i--) {
880           v  = aa + bs2*ai[i];
881           vi = aj + ai[i];
882           nz = ai[i+1] - ai[i];
883 
884           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
885           /* copy all rows of x that are needed into contiguous space */
886           workt = work;
887           for (j=0; j<nz; j++) {
888             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
889             workt += bs;
890           }
891           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
892           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
893 
894           idiag -= bs2;
895           i2    -= bs;
896         }
897         break;
898       }
899       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
900     }
901   }
902   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
903   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
904   PetscFunctionReturn(0);
905 }
906 
907 
908 /*
909     Special version for direct calls from Fortran (Used in PETSc-fun3d)
910 */
911 #if defined(PETSC_HAVE_FORTRAN_CAPS)
912 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
913 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
914 #define matsetvaluesblocked4_ matsetvaluesblocked4
915 #endif
916 
917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
918 {
919   Mat               A  = *AA;
920   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
921   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
922   PetscInt          *ai    =a->i,*ailen=a->ilen;
923   PetscInt          *aj    =a->j,stepval,lastcol = -1;
924   const PetscScalar *value = v;
925   MatScalar         *ap,*aa = a->a,*bap;
926 
927   PetscFunctionBegin;
928   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
929   stepval = (n-1)*4;
930   for (k=0; k<m; k++) { /* loop over added rows */
931     row  = im[k];
932     rp   = aj + ai[row];
933     ap   = aa + 16*ai[row];
934     nrow = ailen[row];
935     low  = 0;
936     high = nrow;
937     for (l=0; l<n; l++) { /* loop over added columns */
938       col = in[l];
939       if (col <= lastcol)  low = 0;
940       else                high = nrow;
941       lastcol = col;
942       value   = v + k*(stepval+4 + l)*4;
943       while (high-low > 7) {
944         t = (low+high)/2;
945         if (rp[t] > col) high = t;
946         else             low  = t;
947       }
948       for (i=low; i<high; i++) {
949         if (rp[i] > col) break;
950         if (rp[i] == col) {
951           bap = ap +  16*i;
952           for (ii=0; ii<4; ii++,value+=stepval) {
953             for (jj=ii; jj<16; jj+=4) {
954               bap[jj] += *value++;
955             }
956           }
957           goto noinsert2;
958         }
959       }
960       N = nrow++ - 1;
961       high++; /* added new column index thus must search to one higher than before */
962       /* shift up all the later entries in this row */
963       for (ii=N; ii>=i; ii--) {
964         rp[ii+1] = rp[ii];
965         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
966       }
967       if (N >= i) {
968         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
969       }
970       rp[i] = col;
971       bap   = ap +  16*i;
972       for (ii=0; ii<4; ii++,value+=stepval) {
973         for (jj=ii; jj<16; jj+=4) {
974           bap[jj] = *value++;
975         }
976       }
977       noinsert2:;
978       low = i;
979     }
980     ailen[row] = nrow;
981   }
982   PetscFunctionReturnVoid();
983 }
984 
985 #if defined(PETSC_HAVE_FORTRAN_CAPS)
986 #define matsetvalues4_ MATSETVALUES4
987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
988 #define matsetvalues4_ matsetvalues4
989 #endif
990 
991 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
992 {
993   Mat         A  = *AA;
994   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
995   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
996   PetscInt    *ai=a->i,*ailen=a->ilen;
997   PetscInt    *aj=a->j,brow,bcol;
998   PetscInt    ridx,cidx,lastcol = -1;
999   MatScalar   *ap,value,*aa=a->a,*bap;
1000 
1001   PetscFunctionBegin;
1002   for (k=0; k<m; k++) { /* loop over added rows */
1003     row  = im[k]; brow = row/4;
1004     rp   = aj + ai[brow];
1005     ap   = aa + 16*ai[brow];
1006     nrow = ailen[brow];
1007     low  = 0;
1008     high = nrow;
1009     for (l=0; l<n; l++) { /* loop over added columns */
1010       col   = in[l]; bcol = col/4;
1011       ridx  = row % 4; cidx = col % 4;
1012       value = v[l + k*n];
1013       if (col <= lastcol)  low = 0;
1014       else                high = nrow;
1015       lastcol = col;
1016       while (high-low > 7) {
1017         t = (low+high)/2;
1018         if (rp[t] > bcol) high = t;
1019         else              low  = t;
1020       }
1021       for (i=low; i<high; i++) {
1022         if (rp[i] > bcol) break;
1023         if (rp[i] == bcol) {
1024           bap   = ap +  16*i + 4*cidx + ridx;
1025           *bap += value;
1026           goto noinsert1;
1027         }
1028       }
1029       N = nrow++ - 1;
1030       high++; /* added new column thus must search to one higher than before */
1031       /* shift up all the later entries in this row */
1032       for (ii=N; ii>=i; ii--) {
1033         rp[ii+1] = rp[ii];
1034         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1035       }
1036       if (N>=i) {
1037         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1038       }
1039       rp[i]                    = bcol;
1040       ap[16*i + 4*cidx + ridx] = value;
1041 noinsert1:;
1042       low = i;
1043     }
1044     ailen[brow] = nrow;
1045   }
1046   PetscFunctionReturnVoid();
1047 }
1048 
1049 /*
1050      Checks for missing diagonals
1051 */
1052 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1053 {
1054   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   PetscInt       *diag,*ii = a->i,i;
1057 
1058   PetscFunctionBegin;
1059   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1060   *missing = PETSC_FALSE;
1061   if (A->rmap->n > 0 && !ii) {
1062     *missing = PETSC_TRUE;
1063     if (d) *d = 0;
1064     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1065   } else {
1066     diag = a->diag;
1067     for (i=0; i<a->mbs; i++) {
1068       if (diag[i] >= ii[i+1]) {
1069         *missing = PETSC_TRUE;
1070         if (d) *d = i;
1071         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1072         break;
1073       }
1074     }
1075   }
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1080 {
1081   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1082   PetscErrorCode ierr;
1083   PetscInt       i,j,m = a->mbs;
1084 
1085   PetscFunctionBegin;
1086   if (!a->diag) {
1087     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1088     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1089     a->free_diag = PETSC_TRUE;
1090   }
1091   for (i=0; i<m; i++) {
1092     a->diag[i] = a->i[i+1];
1093     for (j=a->i[i]; j<a->i[i+1]; j++) {
1094       if (a->j[j] == i) {
1095         a->diag[i] = j;
1096         break;
1097       }
1098     }
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 
1104 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1105 {
1106   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1107   PetscErrorCode ierr;
1108   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1109   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1110 
1111   PetscFunctionBegin;
1112   *nn = n;
1113   if (!ia) PetscFunctionReturn(0);
1114   if (symmetric) {
1115     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1116     nz   = tia[n];
1117   } else {
1118     tia = a->i; tja = a->j;
1119   }
1120 
1121   if (!blockcompressed && bs > 1) {
1122     (*nn) *= bs;
1123     /* malloc & create the natural set of indices */
1124     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1125     if (n) {
1126       (*ia)[0] = oshift;
1127       for (j=1; j<bs; j++) {
1128         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1129       }
1130     }
1131 
1132     for (i=1; i<n; i++) {
1133       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1134       for (j=1; j<bs; j++) {
1135         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1136       }
1137     }
1138     if (n) {
1139       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1140     }
1141 
1142     if (inja) {
1143       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1144       cnt = 0;
1145       for (i=0; i<n; i++) {
1146         for (j=0; j<bs; j++) {
1147           for (k=tia[i]; k<tia[i+1]; k++) {
1148             for (l=0; l<bs; l++) {
1149               (*ja)[cnt++] = bs*tja[k] + l;
1150             }
1151           }
1152         }
1153       }
1154     }
1155 
1156     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1157       ierr = PetscFree(tia);CHKERRQ(ierr);
1158       ierr = PetscFree(tja);CHKERRQ(ierr);
1159     }
1160   } else if (oshift == 1) {
1161     if (symmetric) {
1162       nz = tia[A->rmap->n/bs];
1163       /*  add 1 to i and j indices */
1164       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1165       *ia = tia;
1166       if (ja) {
1167         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1168         *ja = tja;
1169       }
1170     } else {
1171       nz = a->i[A->rmap->n/bs];
1172       /* malloc space and  add 1 to i and j indices */
1173       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1174       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1175       if (ja) {
1176         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1177         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1178       }
1179     }
1180   } else {
1181     *ia = tia;
1182     if (ja) *ja = tja;
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1188 {
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192   if (!ia) PetscFunctionReturn(0);
1193   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1194     ierr = PetscFree(*ia);CHKERRQ(ierr);
1195     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1196   }
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1201 {
1202   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206 #if defined(PETSC_USE_LOG)
1207   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1208 #endif
1209   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1210   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1211   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1212   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1213   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1214   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1215   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1216   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1217   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1218   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1219   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1220   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1221   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1222 
1223   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1224   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1225   ierr = PetscFree(A->data);CHKERRQ(ierr);
1226 
1227   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1238 #if defined(PETSC_HAVE_HYPRE)
1239   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1240 #endif
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1245 {
1246   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   switch (op) {
1251   case MAT_ROW_ORIENTED:
1252     a->roworiented = flg;
1253     break;
1254   case MAT_KEEP_NONZERO_PATTERN:
1255     a->keepnonzeropattern = flg;
1256     break;
1257   case MAT_NEW_NONZERO_LOCATIONS:
1258     a->nonew = (flg ? 0 : 1);
1259     break;
1260   case MAT_NEW_NONZERO_LOCATION_ERR:
1261     a->nonew = (flg ? -1 : 0);
1262     break;
1263   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1264     a->nonew = (flg ? -2 : 0);
1265     break;
1266   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1267     a->nounused = (flg ? -1 : 0);
1268     break;
1269   case MAT_NEW_DIAGONALS:
1270   case MAT_IGNORE_OFF_PROC_ENTRIES:
1271   case MAT_USE_HASH_TABLE:
1272     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1273     break;
1274   case MAT_SPD:
1275   case MAT_SYMMETRIC:
1276   case MAT_STRUCTURALLY_SYMMETRIC:
1277   case MAT_HERMITIAN:
1278   case MAT_SYMMETRY_ETERNAL:
1279   case MAT_SUBMAT_SINGLEIS:
1280   case MAT_STRUCTURE_ONLY:
1281     /* These options are handled directly by MatSetOption() */
1282     break;
1283   default:
1284     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1285   }
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1290 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1291 {
1292   PetscErrorCode ierr;
1293   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1294   MatScalar      *aa_i;
1295   PetscScalar    *v_i;
1296 
1297   PetscFunctionBegin;
1298   bs  = A->rmap->bs;
1299   bs2 = bs*bs;
1300   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1301 
1302   bn  = row/bs;   /* Block number */
1303   bp  = row % bs; /* Block Position */
1304   M   = ai[bn+1] - ai[bn];
1305   *nz = bs*M;
1306 
1307   if (v) {
1308     *v = 0;
1309     if (*nz) {
1310       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1311       for (i=0; i<M; i++) { /* for each block in the block row */
1312         v_i  = *v + i*bs;
1313         aa_i = aa + bs2*(ai[bn] + i);
1314         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1315       }
1316     }
1317   }
1318 
1319   if (idx) {
1320     *idx = 0;
1321     if (*nz) {
1322       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1323       for (i=0; i<M; i++) { /* for each block in the block row */
1324         idx_i = *idx + i*bs;
1325         itmp  = bs*aj[ai[bn] + i];
1326         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1327       }
1328     }
1329   }
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1334 {
1335   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1344 {
1345   PetscErrorCode ierr;
1346 
1347   PetscFunctionBegin;
1348   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1349   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1354 {
1355   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1356   Mat            C;
1357   PetscErrorCode ierr;
1358   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1359   PetscInt       *rows,*cols,bs2=a->bs2;
1360   MatScalar      *array;
1361 
1362   PetscFunctionBegin;
1363   if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1364   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1365     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1366 
1367     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1368     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1369     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1370     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1371     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1372     ierr = PetscFree(col);CHKERRQ(ierr);
1373   } else {
1374     C = *B;
1375   }
1376 
1377   array = a->a;
1378   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1379   for (i=0; i<mbs; i++) {
1380     cols[0] = i*bs;
1381     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1382     len = ai[i+1] - ai[i];
1383     for (j=0; j<len; j++) {
1384       rows[0] = (*aj++)*bs;
1385       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1386       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1387       array += bs2;
1388     }
1389   }
1390   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1391 
1392   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1393   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1394 
1395   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1396     *B = C;
1397   } else {
1398     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1404 {
1405   PetscErrorCode ierr;
1406   Mat            Btrans;
1407 
1408   PetscFunctionBegin;
1409   *f   = PETSC_FALSE;
1410   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1411   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1412   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1417 {
1418   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1419   PetscErrorCode ierr;
1420   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1421   int            fd;
1422   PetscScalar    *aa;
1423   FILE           *file;
1424 
1425   PetscFunctionBegin;
1426   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1427   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1428   col_lens[0] = MAT_FILE_CLASSID;
1429 
1430   col_lens[1] = A->rmap->N;
1431   col_lens[2] = A->cmap->n;
1432   col_lens[3] = a->nz*bs2;
1433 
1434   /* store lengths of each row and write (including header) to file */
1435   count = 0;
1436   for (i=0; i<a->mbs; i++) {
1437     for (j=0; j<bs; j++) {
1438       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1439     }
1440   }
1441   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1442   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1443 
1444   /* store column indices (zero start index) */
1445   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1446   count = 0;
1447   for (i=0; i<a->mbs; i++) {
1448     for (j=0; j<bs; j++) {
1449       for (k=a->i[i]; k<a->i[i+1]; k++) {
1450         for (l=0; l<bs; l++) {
1451           jj[count++] = bs*a->j[k] + l;
1452         }
1453       }
1454     }
1455   }
1456   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1457   ierr = PetscFree(jj);CHKERRQ(ierr);
1458 
1459   /* store nonzero values */
1460   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1461   count = 0;
1462   for (i=0; i<a->mbs; i++) {
1463     for (j=0; j<bs; j++) {
1464       for (k=a->i[i]; k<a->i[i+1]; k++) {
1465         for (l=0; l<bs; l++) {
1466           aa[count++] = a->a[bs2*k + l*bs + j];
1467         }
1468       }
1469     }
1470   }
1471   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1472   ierr = PetscFree(aa);CHKERRQ(ierr);
1473 
1474   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1475   if (file) {
1476     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1477   }
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1482 {
1483   PetscErrorCode ierr;
1484   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1485   PetscInt       i,bs = A->rmap->bs,k;
1486 
1487   PetscFunctionBegin;
1488   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1489   for (i=0; i<a->mbs; i++) {
1490     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1491     for (k=a->i[i]; k<a->i[i+1]; k++) {
1492       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1493     }
1494     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1495   }
1496   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1501 {
1502   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1503   PetscErrorCode    ierr;
1504   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1505   PetscViewerFormat format;
1506 
1507   PetscFunctionBegin;
1508   if (A->structure_only) {
1509     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1510     PetscFunctionReturn(0);
1511   }
1512 
1513   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1514   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1515     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1516   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1517     const char *matname;
1518     Mat        aij;
1519     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1520     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1521     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1522     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1523     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1524   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1525       PetscFunctionReturn(0);
1526   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1527     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1528     for (i=0; i<a->mbs; i++) {
1529       for (j=0; j<bs; j++) {
1530         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1531         for (k=a->i[i]; k<a->i[i+1]; k++) {
1532           for (l=0; l<bs; l++) {
1533 #if defined(PETSC_USE_COMPLEX)
1534             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1535               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1536                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1537             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1538               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1539                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1540             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1541               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1542             }
1543 #else
1544             if (a->a[bs2*k + l*bs + j] != 0.0) {
1545               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1546             }
1547 #endif
1548           }
1549         }
1550         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1551       }
1552     }
1553     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1554   } else {
1555     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1556     for (i=0; i<a->mbs; i++) {
1557       for (j=0; j<bs; j++) {
1558         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1559         for (k=a->i[i]; k<a->i[i+1]; k++) {
1560           for (l=0; l<bs; l++) {
1561 #if defined(PETSC_USE_COMPLEX)
1562             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1563               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1564                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1565             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1566               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1567                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1568             } else {
1569               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1570             }
1571 #else
1572             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1573 #endif
1574           }
1575         }
1576         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1577       }
1578     }
1579     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1580   }
1581   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #include <petscdraw.h>
1586 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1587 {
1588   Mat               A = (Mat) Aa;
1589   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1590   PetscErrorCode    ierr;
1591   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1592   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1593   MatScalar         *aa;
1594   PetscViewer       viewer;
1595   PetscViewerFormat format;
1596 
1597   PetscFunctionBegin;
1598   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1599   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1600   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1601 
1602   /* loop over matrix elements drawing boxes */
1603 
1604   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1605     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1606     /* Blue for negative, Cyan for zero and  Red for positive */
1607     color = PETSC_DRAW_BLUE;
1608     for (i=0,row=0; i<mbs; i++,row+=bs) {
1609       for (j=a->i[i]; j<a->i[i+1]; j++) {
1610         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1611         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1612         aa  = a->a + j*bs2;
1613         for (k=0; k<bs; k++) {
1614           for (l=0; l<bs; l++) {
1615             if (PetscRealPart(*aa++) >=  0.) continue;
1616             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1617           }
1618         }
1619       }
1620     }
1621     color = PETSC_DRAW_CYAN;
1622     for (i=0,row=0; i<mbs; i++,row+=bs) {
1623       for (j=a->i[i]; j<a->i[i+1]; j++) {
1624         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1625         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1626         aa  = a->a + j*bs2;
1627         for (k=0; k<bs; k++) {
1628           for (l=0; l<bs; l++) {
1629             if (PetscRealPart(*aa++) != 0.) continue;
1630             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1631           }
1632         }
1633       }
1634     }
1635     color = PETSC_DRAW_RED;
1636     for (i=0,row=0; i<mbs; i++,row+=bs) {
1637       for (j=a->i[i]; j<a->i[i+1]; j++) {
1638         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1639         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1640         aa  = a->a + j*bs2;
1641         for (k=0; k<bs; k++) {
1642           for (l=0; l<bs; l++) {
1643             if (PetscRealPart(*aa++) <= 0.) continue;
1644             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1645           }
1646         }
1647       }
1648     }
1649     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1650   } else {
1651     /* use contour shading to indicate magnitude of values */
1652     /* first determine max of all nonzero values */
1653     PetscReal minv = 0.0, maxv = 0.0;
1654     PetscDraw popup;
1655 
1656     for (i=0; i<a->nz*a->bs2; i++) {
1657       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1658     }
1659     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1660     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1661     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1662 
1663     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1664     for (i=0,row=0; i<mbs; i++,row+=bs) {
1665       for (j=a->i[i]; j<a->i[i+1]; j++) {
1666         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1667         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1668         aa  = a->a + j*bs2;
1669         for (k=0; k<bs; k++) {
1670           for (l=0; l<bs; l++) {
1671             MatScalar v = *aa++;
1672             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1673             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1674           }
1675         }
1676       }
1677     }
1678     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1679   }
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1684 {
1685   PetscErrorCode ierr;
1686   PetscReal      xl,yl,xr,yr,w,h;
1687   PetscDraw      draw;
1688   PetscBool      isnull;
1689 
1690   PetscFunctionBegin;
1691   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1692   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1693   if (isnull) PetscFunctionReturn(0);
1694 
1695   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1696   xr  += w;          yr += h;        xl = -w;     yl = -h;
1697   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1698   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1699   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1700   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1701   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1706 {
1707   PetscErrorCode ierr;
1708   PetscBool      iascii,isbinary,isdraw;
1709 
1710   PetscFunctionBegin;
1711   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1712   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1713   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1714   if (iascii) {
1715     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1716   } else if (isbinary) {
1717     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1718   } else if (isdraw) {
1719     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1720   } else {
1721     Mat B;
1722     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1723     ierr = MatView(B,viewer);CHKERRQ(ierr);
1724     ierr = MatDestroy(&B);CHKERRQ(ierr);
1725   }
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 
1730 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1731 {
1732   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1733   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1734   PetscInt    *ai = a->i,*ailen = a->ilen;
1735   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1736   MatScalar   *ap,*aa = a->a;
1737 
1738   PetscFunctionBegin;
1739   for (k=0; k<m; k++) { /* loop over rows */
1740     row = im[k]; brow = row/bs;
1741     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1742     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1743     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1744     nrow = ailen[brow];
1745     for (l=0; l<n; l++) { /* loop over columns */
1746       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1747       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1748       col  = in[l];
1749       bcol = col/bs;
1750       cidx = col%bs;
1751       ridx = row%bs;
1752       high = nrow;
1753       low  = 0; /* assume unsorted */
1754       while (high-low > 5) {
1755         t = (low+high)/2;
1756         if (rp[t] > bcol) high = t;
1757         else             low  = t;
1758       }
1759       for (i=low; i<high; i++) {
1760         if (rp[i] > bcol) break;
1761         if (rp[i] == bcol) {
1762           *v++ = ap[bs2*i+bs*cidx+ridx];
1763           goto finished;
1764         }
1765       }
1766       *v++ = 0.0;
1767 finished:;
1768     }
1769   }
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1774 {
1775   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1776   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1777   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1778   PetscErrorCode    ierr;
1779   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1780   PetscBool         roworiented=a->roworiented;
1781   const PetscScalar *value     = v;
1782   MatScalar         *ap=NULL,*aa = a->a,*bap;
1783 
1784   PetscFunctionBegin;
1785   if (roworiented) {
1786     stepval = (n-1)*bs;
1787   } else {
1788     stepval = (m-1)*bs;
1789   }
1790   for (k=0; k<m; k++) { /* loop over added rows */
1791     row = im[k];
1792     if (row < 0) continue;
1793 #if defined(PETSC_USE_DEBUG)
1794     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1795 #endif
1796     rp   = aj + ai[row];
1797     if (!A->structure_only) ap = aa + bs2*ai[row];
1798     rmax = imax[row];
1799     nrow = ailen[row];
1800     low  = 0;
1801     high = nrow;
1802     for (l=0; l<n; l++) { /* loop over added columns */
1803       if (in[l] < 0) continue;
1804 #if defined(PETSC_USE_DEBUG)
1805       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1806 #endif
1807       col = in[l];
1808       if (!A->structure_only) {
1809         if (roworiented) {
1810           value = v + (k*(stepval+bs) + l)*bs;
1811         } else {
1812           value = v + (l*(stepval+bs) + k)*bs;
1813         }
1814       }
1815       if (col <= lastcol) low = 0;
1816       else high = nrow;
1817       lastcol = col;
1818       while (high-low > 7) {
1819         t = (low+high)/2;
1820         if (rp[t] > col) high = t;
1821         else             low  = t;
1822       }
1823       for (i=low; i<high; i++) {
1824         if (rp[i] > col) break;
1825         if (rp[i] == col) {
1826           if (A->structure_only) goto noinsert2;
1827           bap = ap +  bs2*i;
1828           if (roworiented) {
1829             if (is == ADD_VALUES) {
1830               for (ii=0; ii<bs; ii++,value+=stepval) {
1831                 for (jj=ii; jj<bs2; jj+=bs) {
1832                   bap[jj] += *value++;
1833                 }
1834               }
1835             } else {
1836               for (ii=0; ii<bs; ii++,value+=stepval) {
1837                 for (jj=ii; jj<bs2; jj+=bs) {
1838                   bap[jj] = *value++;
1839                 }
1840               }
1841             }
1842           } else {
1843             if (is == ADD_VALUES) {
1844               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1845                 for (jj=0; jj<bs; jj++) {
1846                   bap[jj] += value[jj];
1847                 }
1848                 bap += bs;
1849               }
1850             } else {
1851               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1852                 for (jj=0; jj<bs; jj++) {
1853                   bap[jj]  = value[jj];
1854                 }
1855                 bap += bs;
1856               }
1857             }
1858           }
1859           goto noinsert2;
1860         }
1861       }
1862       if (nonew == 1) goto noinsert2;
1863       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1864       if (A->structure_only) {
1865         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1866       } else {
1867         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1868       }
1869       N = nrow++ - 1; high++;
1870       /* shift up all the later entries in this row */
1871       for (ii=N; ii>=i; ii--) {
1872         rp[ii+1] = rp[ii];
1873         if (!A->structure_only) {
1874           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1875         }
1876       }
1877       if (N >= i && !A->structure_only) {
1878         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1879       }
1880 
1881       rp[i] = col;
1882       if (!A->structure_only) {
1883         bap   = ap +  bs2*i;
1884         if (roworiented) {
1885           for (ii=0; ii<bs; ii++,value+=stepval) {
1886             for (jj=ii; jj<bs2; jj+=bs) {
1887               bap[jj] = *value++;
1888             }
1889           }
1890         } else {
1891           for (ii=0; ii<bs; ii++,value+=stepval) {
1892             for (jj=0; jj<bs; jj++) {
1893               *bap++ = *value++;
1894             }
1895           }
1896         }
1897       }
1898 noinsert2:;
1899       low = i;
1900     }
1901     ailen[row] = nrow;
1902   }
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1907 {
1908   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1909   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1910   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1911   PetscErrorCode ierr;
1912   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1913   MatScalar      *aa  = a->a,*ap;
1914   PetscReal      ratio=0.6;
1915 
1916   PetscFunctionBegin;
1917   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1918 
1919   if (m) rmax = ailen[0];
1920   for (i=1; i<mbs; i++) {
1921     /* move each row back by the amount of empty slots (fshift) before it*/
1922     fshift += imax[i-1] - ailen[i-1];
1923     rmax    = PetscMax(rmax,ailen[i]);
1924     if (fshift) {
1925       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1926       N  = ailen[i];
1927       for (j=0; j<N; j++) {
1928         ip[j-fshift] = ip[j];
1929         if (!A->structure_only) {
1930           ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1931         }
1932       }
1933     }
1934     ai[i] = ai[i-1] + ailen[i-1];
1935   }
1936   if (mbs) {
1937     fshift += imax[mbs-1] - ailen[mbs-1];
1938     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1939   }
1940 
1941   /* reset ilen and imax for each row */
1942   a->nonzerorowcnt = 0;
1943   if (A->structure_only) {
1944     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1945   } else { /* !A->structure_only */
1946     for (i=0; i<mbs; i++) {
1947       ailen[i] = imax[i] = ai[i+1] - ai[i];
1948       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1949     }
1950   }
1951   a->nz = ai[mbs];
1952 
1953   /* diagonals may have moved, so kill the diagonal pointers */
1954   a->idiagvalid = PETSC_FALSE;
1955   if (fshift && a->diag) {
1956     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1957     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1958     a->diag = 0;
1959   }
1960   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1961   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1962   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1963   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1964 
1965   A->info.mallocs    += a->reallocs;
1966   a->reallocs         = 0;
1967   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1968   a->rmax             = rmax;
1969 
1970   if (!A->structure_only) {
1971     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1972   }
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 /*
1977    This function returns an array of flags which indicate the locations of contiguous
1978    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1979    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1980    Assume: sizes should be long enough to hold all the values.
1981 */
1982 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1983 {
1984   PetscInt  i,j,k,row;
1985   PetscBool flg;
1986 
1987   PetscFunctionBegin;
1988   for (i=0,j=0; i<n; j++) {
1989     row = idx[i];
1990     if (row%bs!=0) { /* Not the begining of a block */
1991       sizes[j] = 1;
1992       i++;
1993     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1994       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1995       i++;
1996     } else { /* Begining of the block, so check if the complete block exists */
1997       flg = PETSC_TRUE;
1998       for (k=1; k<bs; k++) {
1999         if (row+k != idx[i+k]) { /* break in the block */
2000           flg = PETSC_FALSE;
2001           break;
2002         }
2003       }
2004       if (flg) { /* No break in the bs */
2005         sizes[j] = bs;
2006         i       += bs;
2007       } else {
2008         sizes[j] = 1;
2009         i++;
2010       }
2011     }
2012   }
2013   *bs_max = j;
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2018 {
2019   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2020   PetscErrorCode    ierr;
2021   PetscInt          i,j,k,count,*rows;
2022   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2023   PetscScalar       zero = 0.0;
2024   MatScalar         *aa;
2025   const PetscScalar *xx;
2026   PetscScalar       *bb;
2027 
2028   PetscFunctionBegin;
2029   /* fix right hand side if needed */
2030   if (x && b) {
2031     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2032     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2033     for (i=0; i<is_n; i++) {
2034       bb[is_idx[i]] = diag*xx[is_idx[i]];
2035     }
2036     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2037     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2038   }
2039 
2040   /* Make a copy of the IS and  sort it */
2041   /* allocate memory for rows,sizes */
2042   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2043 
2044   /* copy IS values to rows, and sort them */
2045   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2046   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2047 
2048   if (baij->keepnonzeropattern) {
2049     for (i=0; i<is_n; i++) sizes[i] = 1;
2050     bs_max          = is_n;
2051   } else {
2052     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2053     A->nonzerostate++;
2054   }
2055 
2056   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2057     row = rows[j];
2058     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2059     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2060     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2061     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2062       if (diag != (PetscScalar)0.0) {
2063         if (baij->ilen[row/bs] > 0) {
2064           baij->ilen[row/bs]       = 1;
2065           baij->j[baij->i[row/bs]] = row/bs;
2066 
2067           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2068         }
2069         /* Now insert all the diagonal values for this bs */
2070         for (k=0; k<bs; k++) {
2071           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2072         }
2073       } else { /* (diag == 0.0) */
2074         baij->ilen[row/bs] = 0;
2075       } /* end (diag == 0.0) */
2076     } else { /* (sizes[i] != bs) */
2077 #if defined(PETSC_USE_DEBUG)
2078       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2079 #endif
2080       for (k=0; k<count; k++) {
2081         aa[0] =  zero;
2082         aa   += bs;
2083       }
2084       if (diag != (PetscScalar)0.0) {
2085         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2086       }
2087     }
2088   }
2089 
2090   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2091   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2096 {
2097   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2098   PetscErrorCode    ierr;
2099   PetscInt          i,j,k,count;
2100   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2101   PetscScalar       zero = 0.0;
2102   MatScalar         *aa;
2103   const PetscScalar *xx;
2104   PetscScalar       *bb;
2105   PetscBool         *zeroed,vecs = PETSC_FALSE;
2106 
2107   PetscFunctionBegin;
2108   /* fix right hand side if needed */
2109   if (x && b) {
2110     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2111     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2112     vecs = PETSC_TRUE;
2113   }
2114 
2115   /* zero the columns */
2116   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2117   for (i=0; i<is_n; i++) {
2118     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2119     zeroed[is_idx[i]] = PETSC_TRUE;
2120   }
2121   for (i=0; i<A->rmap->N; i++) {
2122     if (!zeroed[i]) {
2123       row = i/bs;
2124       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2125         for (k=0; k<bs; k++) {
2126           col = bs*baij->j[j] + k;
2127           if (zeroed[col]) {
2128             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2129             if (vecs) bb[i] -= aa[0]*xx[col];
2130             aa[0] = 0.0;
2131           }
2132         }
2133       }
2134     } else if (vecs) bb[i] = diag*xx[i];
2135   }
2136   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2137   if (vecs) {
2138     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2139     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2140   }
2141 
2142   /* zero the rows */
2143   for (i=0; i<is_n; i++) {
2144     row   = is_idx[i];
2145     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2146     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2147     for (k=0; k<count; k++) {
2148       aa[0] =  zero;
2149       aa   += bs;
2150     }
2151     if (diag != (PetscScalar)0.0) {
2152       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2153     }
2154   }
2155   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2160 {
2161   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2162   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2163   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2164   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2165   PetscErrorCode ierr;
2166   PetscInt       ridx,cidx,bs2=a->bs2;
2167   PetscBool      roworiented=a->roworiented;
2168   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2169 
2170   PetscFunctionBegin;
2171   for (k=0; k<m; k++) { /* loop over added rows */
2172     row  = im[k];
2173     brow = row/bs;
2174     if (row < 0) continue;
2175 #if defined(PETSC_USE_DEBUG)
2176     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2177 #endif
2178     rp   = aj + ai[brow];
2179     if (!A->structure_only) ap = aa + bs2*ai[brow];
2180     rmax = imax[brow];
2181     nrow = ailen[brow];
2182     low  = 0;
2183     high = nrow;
2184     for (l=0; l<n; l++) { /* loop over added columns */
2185       if (in[l] < 0) continue;
2186 #if defined(PETSC_USE_DEBUG)
2187       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2188 #endif
2189       col  = in[l]; bcol = col/bs;
2190       ridx = row % bs; cidx = col % bs;
2191       if (!A->structure_only) {
2192         if (roworiented) {
2193           value = v[l + k*n];
2194         } else {
2195           value = v[k + l*m];
2196         }
2197       }
2198       if (col <= lastcol) low = 0; else high = nrow;
2199       lastcol = col;
2200       while (high-low > 7) {
2201         t = (low+high)/2;
2202         if (rp[t] > bcol) high = t;
2203         else              low  = t;
2204       }
2205       for (i=low; i<high; i++) {
2206         if (rp[i] > bcol) break;
2207         if (rp[i] == bcol) {
2208           bap = ap +  bs2*i + bs*cidx + ridx;
2209           if (!A->structure_only) {
2210             if (is == ADD_VALUES) *bap += value;
2211             else                  *bap  = value;
2212           }
2213           goto noinsert1;
2214         }
2215       }
2216       if (nonew == 1) goto noinsert1;
2217       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2218       if (A->structure_only) {
2219         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2220       } else {
2221         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2222       }
2223       N = nrow++ - 1; high++;
2224       /* shift up all the later entries in this row */
2225       for (ii=N; ii>=i; ii--) {
2226         rp[ii+1] = rp[ii];
2227         if (!A->structure_only) {
2228           ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2229         }
2230       }
2231       if (N>=i && !A->structure_only) {
2232         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2233       }
2234       rp[i]                      = bcol;
2235       if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2236       a->nz++;
2237       A->nonzerostate++;
2238 noinsert1:;
2239       low = i;
2240     }
2241     ailen[brow] = nrow;
2242   }
2243   PetscFunctionReturn(0);
2244 }
2245 
2246 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2247 {
2248   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2249   Mat            outA;
2250   PetscErrorCode ierr;
2251   PetscBool      row_identity,col_identity;
2252 
2253   PetscFunctionBegin;
2254   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2255   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2256   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2257   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2258 
2259   outA            = inA;
2260   inA->factortype = MAT_FACTOR_LU;
2261   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2262   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2263 
2264   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2265 
2266   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2267   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2268   a->row = row;
2269   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2270   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2271   a->col = col;
2272 
2273   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2274   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2275   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2276   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2277 
2278   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2279   if (!a->solve_work) {
2280     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2281     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2282   }
2283   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2288 {
2289   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2290   PetscInt    i,nz,mbs;
2291 
2292   PetscFunctionBegin;
2293   nz  = baij->maxnz;
2294   mbs = baij->mbs;
2295   for (i=0; i<nz; i++) {
2296     baij->j[i] = indices[i];
2297   }
2298   baij->nz = nz;
2299   for (i=0; i<mbs; i++) {
2300     baij->ilen[i] = baij->imax[i];
2301   }
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 /*@
2306     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2307        in the matrix.
2308 
2309   Input Parameters:
2310 +  mat - the SeqBAIJ matrix
2311 -  indices - the column indices
2312 
2313   Level: advanced
2314 
2315   Notes:
2316     This can be called if you have precomputed the nonzero structure of the
2317   matrix and want to provide it to the matrix object to improve the performance
2318   of the MatSetValues() operation.
2319 
2320     You MUST have set the correct numbers of nonzeros per row in the call to
2321   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2322 
2323     MUST be called before any calls to MatSetValues();
2324 
2325 @*/
2326 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2327 {
2328   PetscErrorCode ierr;
2329 
2330   PetscFunctionBegin;
2331   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2332   PetscValidPointer(indices,2);
2333   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2338 {
2339   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2340   PetscErrorCode ierr;
2341   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2342   PetscReal      atmp;
2343   PetscScalar    *x,zero = 0.0;
2344   MatScalar      *aa;
2345   PetscInt       ncols,brow,krow,kcol;
2346 
2347   PetscFunctionBegin;
2348   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2349   bs  = A->rmap->bs;
2350   aa  = a->a;
2351   ai  = a->i;
2352   aj  = a->j;
2353   mbs = a->mbs;
2354 
2355   ierr = VecSet(v,zero);CHKERRQ(ierr);
2356   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2357   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2358   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2359   for (i=0; i<mbs; i++) {
2360     ncols = ai[1] - ai[0]; ai++;
2361     brow  = bs*i;
2362     for (j=0; j<ncols; j++) {
2363       for (kcol=0; kcol<bs; kcol++) {
2364         for (krow=0; krow<bs; krow++) {
2365           atmp = PetscAbsScalar(*aa);aa++;
2366           row  = brow + krow;   /* row index */
2367           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2368         }
2369       }
2370       aj++;
2371     }
2372   }
2373   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2378 {
2379   PetscErrorCode ierr;
2380 
2381   PetscFunctionBegin;
2382   /* If the two matrices have the same copy implementation, use fast copy. */
2383   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2384     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2385     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2386     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2387 
2388     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2389     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2390     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2391   } else {
2392     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2393   }
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2398 {
2399   PetscErrorCode ierr;
2400 
2401   PetscFunctionBegin;
2402   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2407 {
2408   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2409 
2410   PetscFunctionBegin;
2411   *array = a->a;
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2416 {
2417   PetscFunctionBegin;
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2422 {
2423   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2424   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2425   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2426   PetscErrorCode ierr;
2427 
2428   PetscFunctionBegin;
2429   /* Set the number of nonzeros in the new matrix */
2430   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2435 {
2436   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2437   PetscErrorCode ierr;
2438   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2439   PetscBLASInt   one=1;
2440 
2441   PetscFunctionBegin;
2442   if (str == SAME_NONZERO_PATTERN) {
2443     PetscScalar  alpha = a;
2444     PetscBLASInt bnz;
2445     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2446     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2447     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2448   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2449     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2450   } else {
2451     Mat      B;
2452     PetscInt *nnz;
2453     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2454     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2455     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2456     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2457     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2458     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2459     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2460     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2461     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2462     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2463     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2464     ierr = PetscFree(nnz);CHKERRQ(ierr);
2465   }
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2470 {
2471   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2472   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2473   MatScalar   *aa = a->a;
2474 
2475   PetscFunctionBegin;
2476   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2481 {
2482   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2483   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2484   MatScalar   *aa = a->a;
2485 
2486   PetscFunctionBegin;
2487   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 /*
2492     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2493 */
2494 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2495 {
2496   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2497   PetscErrorCode ierr;
2498   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2499   PetscInt       nz = a->i[m],row,*jj,mr,col;
2500 
2501   PetscFunctionBegin;
2502   *nn = n;
2503   if (!ia) PetscFunctionReturn(0);
2504   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2505   else {
2506     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2507     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2508     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2509     jj   = a->j;
2510     for (i=0; i<nz; i++) {
2511       collengths[jj[i]]++;
2512     }
2513     cia[0] = oshift;
2514     for (i=0; i<n; i++) {
2515       cia[i+1] = cia[i] + collengths[i];
2516     }
2517     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2518     jj   = a->j;
2519     for (row=0; row<m; row++) {
2520       mr = a->i[row+1] - a->i[row];
2521       for (i=0; i<mr; i++) {
2522         col = *jj++;
2523 
2524         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2525       }
2526     }
2527     ierr = PetscFree(collengths);CHKERRQ(ierr);
2528     *ia  = cia; *ja = cja;
2529   }
2530   PetscFunctionReturn(0);
2531 }
2532 
2533 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2534 {
2535   PetscErrorCode ierr;
2536 
2537   PetscFunctionBegin;
2538   if (!ia) PetscFunctionReturn(0);
2539   ierr = PetscFree(*ia);CHKERRQ(ierr);
2540   ierr = PetscFree(*ja);CHKERRQ(ierr);
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 /*
2545  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2546  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2547  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2548  */
2549 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2550 {
2551   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2552   PetscErrorCode ierr;
2553   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2554   PetscInt       nz = a->i[m],row,*jj,mr,col;
2555   PetscInt       *cspidx;
2556 
2557   PetscFunctionBegin;
2558   *nn = n;
2559   if (!ia) PetscFunctionReturn(0);
2560 
2561   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2562   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2563   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2564   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2565   jj   = a->j;
2566   for (i=0; i<nz; i++) {
2567     collengths[jj[i]]++;
2568   }
2569   cia[0] = oshift;
2570   for (i=0; i<n; i++) {
2571     cia[i+1] = cia[i] + collengths[i];
2572   }
2573   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2574   jj   = a->j;
2575   for (row=0; row<m; row++) {
2576     mr = a->i[row+1] - a->i[row];
2577     for (i=0; i<mr; i++) {
2578       col = *jj++;
2579       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2580       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2581     }
2582   }
2583   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2584   *ia    = cia; *ja = cja;
2585   *spidx = cspidx;
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2590 {
2591   PetscErrorCode ierr;
2592 
2593   PetscFunctionBegin;
2594   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2595   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2600 {
2601   PetscErrorCode ierr;
2602   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2603 
2604   PetscFunctionBegin;
2605   if (!Y->preallocated || !aij->nz) {
2606     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2607   }
2608   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 /* -------------------------------------------------------------------*/
2613 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2614                                        MatGetRow_SeqBAIJ,
2615                                        MatRestoreRow_SeqBAIJ,
2616                                        MatMult_SeqBAIJ_N,
2617                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2618                                        MatMultTranspose_SeqBAIJ,
2619                                        MatMultTransposeAdd_SeqBAIJ,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                /* 10*/ 0,
2624                                        MatLUFactor_SeqBAIJ,
2625                                        0,
2626                                        0,
2627                                        MatTranspose_SeqBAIJ,
2628                                /* 15*/ MatGetInfo_SeqBAIJ,
2629                                        MatEqual_SeqBAIJ,
2630                                        MatGetDiagonal_SeqBAIJ,
2631                                        MatDiagonalScale_SeqBAIJ,
2632                                        MatNorm_SeqBAIJ,
2633                                /* 20*/ 0,
2634                                        MatAssemblyEnd_SeqBAIJ,
2635                                        MatSetOption_SeqBAIJ,
2636                                        MatZeroEntries_SeqBAIJ,
2637                                /* 24*/ MatZeroRows_SeqBAIJ,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                /* 29*/ MatSetUp_SeqBAIJ,
2643                                        0,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                /* 34*/ MatDuplicate_SeqBAIJ,
2648                                        0,
2649                                        0,
2650                                        MatILUFactor_SeqBAIJ,
2651                                        0,
2652                                /* 39*/ MatAXPY_SeqBAIJ,
2653                                        MatCreateSubMatrices_SeqBAIJ,
2654                                        MatIncreaseOverlap_SeqBAIJ,
2655                                        MatGetValues_SeqBAIJ,
2656                                        MatCopy_SeqBAIJ,
2657                                /* 44*/ 0,
2658                                        MatScale_SeqBAIJ,
2659                                        MatShift_SeqBAIJ,
2660                                        0,
2661                                        MatZeroRowsColumns_SeqBAIJ,
2662                                /* 49*/ 0,
2663                                        MatGetRowIJ_SeqBAIJ,
2664                                        MatRestoreRowIJ_SeqBAIJ,
2665                                        MatGetColumnIJ_SeqBAIJ,
2666                                        MatRestoreColumnIJ_SeqBAIJ,
2667                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                        MatSetValuesBlocked_SeqBAIJ,
2672                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2673                                        MatDestroy_SeqBAIJ,
2674                                        MatView_SeqBAIJ,
2675                                        0,
2676                                        0,
2677                                /* 64*/ 0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2683                                        0,
2684                                        MatConvert_Basic,
2685                                        0,
2686                                        0,
2687                                /* 74*/ 0,
2688                                        MatFDColoringApply_BAIJ,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                /* 79*/ 0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                        MatLoad_SeqBAIJ,
2697                                /* 84*/ 0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                /* 89*/ 0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                /* 94*/ 0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                /* 99*/ 0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                /*104*/ 0,
2718                                        MatRealPart_SeqBAIJ,
2719                                        MatImaginaryPart_SeqBAIJ,
2720                                        0,
2721                                        0,
2722                                /*109*/ 0,
2723                                        0,
2724                                        0,
2725                                        0,
2726                                        MatMissingDiagonal_SeqBAIJ,
2727                                /*114*/ 0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                /*119*/ 0,
2733                                        0,
2734                                        MatMultHermitianTranspose_SeqBAIJ,
2735                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2736                                        0,
2737                                /*124*/ 0,
2738                                        0,
2739                                        MatInvertBlockDiagonal_SeqBAIJ,
2740                                        0,
2741                                        0,
2742                                /*129*/ 0,
2743                                        0,
2744                                        0,
2745                                        0,
2746                                        0,
2747                                /*134*/ 0,
2748                                        0,
2749                                        0,
2750                                        0,
2751                                        0,
2752                                /*139*/ MatSetBlockSizes_Default,
2753                                        0,
2754                                        0,
2755                                        MatFDColoringSetUp_SeqXAIJ,
2756                                        0,
2757                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2758 };
2759 
2760 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2761 {
2762   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2763   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2764   PetscErrorCode ierr;
2765 
2766   PetscFunctionBegin;
2767   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2768 
2769   /* allocate space for values if not already there */
2770   if (!aij->saved_values) {
2771     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2772     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2773   }
2774 
2775   /* copy values over */
2776   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2781 {
2782   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2783   PetscErrorCode ierr;
2784   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2785 
2786   PetscFunctionBegin;
2787   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2788   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2789 
2790   /* copy values over */
2791   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2792   PetscFunctionReturn(0);
2793 }
2794 
2795 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2796 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2797 
2798 static PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2799 {
2800   Mat_SeqBAIJ    *b;
2801   PetscErrorCode ierr;
2802   PetscInt       i,mbs,nbs,bs2;
2803   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2804 
2805   PetscFunctionBegin;
2806   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2807   if (nz == MAT_SKIP_ALLOCATION) {
2808     skipallocation = PETSC_TRUE;
2809     nz             = 0;
2810   }
2811 
2812   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2813   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2814   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2815   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2816 
2817   B->preallocated = PETSC_TRUE;
2818 
2819   mbs = B->rmap->n/bs;
2820   nbs = B->cmap->n/bs;
2821   bs2 = bs*bs;
2822 
2823   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2824 
2825   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2826   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2827   if (nnz) {
2828     for (i=0; i<mbs; i++) {
2829       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2830       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2831     }
2832   }
2833 
2834   b    = (Mat_SeqBAIJ*)B->data;
2835   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2836   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2837   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2838 
2839   if (!flg) {
2840     switch (bs) {
2841     case 1:
2842       B->ops->mult    = MatMult_SeqBAIJ_1;
2843       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2844       break;
2845     case 2:
2846       B->ops->mult    = MatMult_SeqBAIJ_2;
2847       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2848       break;
2849     case 3:
2850       B->ops->mult    = MatMult_SeqBAIJ_3;
2851       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2852       break;
2853     case 4:
2854       B->ops->mult    = MatMult_SeqBAIJ_4;
2855       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2856       break;
2857     case 5:
2858       B->ops->mult    = MatMult_SeqBAIJ_5;
2859       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2860       break;
2861     case 6:
2862       B->ops->mult    = MatMult_SeqBAIJ_6;
2863       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2864       break;
2865     case 7:
2866       B->ops->mult    = MatMult_SeqBAIJ_7;
2867       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2868       break;
2869     case 11:
2870       B->ops->mult    = MatMult_SeqBAIJ_11;
2871       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2872       break;
2873     case 15:
2874       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2875       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2876       break;
2877     default:
2878       B->ops->mult    = MatMult_SeqBAIJ_N;
2879       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2880       break;
2881     }
2882   }
2883   B->ops->sor = MatSOR_SeqBAIJ;
2884   b->mbs = mbs;
2885   b->nbs = nbs;
2886   if (!skipallocation) {
2887     if (!b->imax) {
2888       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2889       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2890 
2891       b->free_imax_ilen = PETSC_TRUE;
2892     }
2893     /* b->ilen will count nonzeros in each block row so far. */
2894     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2895     if (!nnz) {
2896       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2897       else if (nz < 0) nz = 1;
2898       for (i=0; i<mbs; i++) b->imax[i] = nz;
2899       nz = nz*mbs;
2900     } else {
2901       nz = 0;
2902       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2903     }
2904 
2905     /* allocate the matrix space */
2906     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2907     if (B->structure_only) {
2908       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2909       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2910       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2911     } else {
2912       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2913       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2914       ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2915     }
2916     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2917 
2918     if (B->structure_only) {
2919       b->singlemalloc = PETSC_FALSE;
2920       b->free_a       = PETSC_FALSE;
2921     } else {
2922       b->singlemalloc = PETSC_TRUE;
2923       b->free_a       = PETSC_TRUE;
2924     }
2925     b->free_ij = PETSC_TRUE;
2926 
2927     b->i[0] = 0;
2928     for (i=1; i<mbs+1; i++) {
2929       b->i[i] = b->i[i-1] + b->imax[i-1];
2930     }
2931 
2932   } else {
2933     b->free_a  = PETSC_FALSE;
2934     b->free_ij = PETSC_FALSE;
2935   }
2936 
2937   b->bs2              = bs2;
2938   b->mbs              = mbs;
2939   b->nz               = 0;
2940   b->maxnz            = nz;
2941   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2942   B->was_assembled    = PETSC_FALSE;
2943   B->assembled        = PETSC_FALSE;
2944   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2945   PetscFunctionReturn(0);
2946 }
2947 
2948 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2949 {
2950   PetscInt       i,m,nz,nz_max=0,*nnz;
2951   PetscScalar    *values=0;
2952   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2953   PetscErrorCode ierr;
2954 
2955   PetscFunctionBegin;
2956   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2957   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2958   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2959   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2960   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2961   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2962   m    = B->rmap->n/bs;
2963 
2964   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2965   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2966   for (i=0; i<m; i++) {
2967     nz = ii[i+1]- ii[i];
2968     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2969     nz_max = PetscMax(nz_max, nz);
2970     nnz[i] = nz;
2971   }
2972   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2973   ierr = PetscFree(nnz);CHKERRQ(ierr);
2974 
2975   values = (PetscScalar*)V;
2976   if (!values) {
2977     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2978   }
2979   for (i=0; i<m; i++) {
2980     PetscInt          ncols  = ii[i+1] - ii[i];
2981     const PetscInt    *icols = jj + ii[i];
2982     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2983     if (!roworiented) {
2984       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2985     } else {
2986       PetscInt j;
2987       for (j=0; j<ncols; j++) {
2988         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2989         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2990       }
2991     }
2992   }
2993   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2994   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2995   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2996   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2997   PetscFunctionReturn(0);
2998 }
2999 
3000 /*MC
3001    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3002    block sparse compressed row format.
3003 
3004    Options Database Keys:
3005 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3006 
3007   Level: beginner
3008 
3009 .seealso: MatCreateSeqBAIJ()
3010 M*/
3011 
3012 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3013 
3014 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3015 {
3016   PetscErrorCode ierr;
3017   PetscMPIInt    size;
3018   Mat_SeqBAIJ    *b;
3019 
3020   PetscFunctionBegin;
3021   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3022   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3023 
3024   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3025   B->data = (void*)b;
3026   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3027 
3028   b->row          = 0;
3029   b->col          = 0;
3030   b->icol         = 0;
3031   b->reallocs     = 0;
3032   b->saved_values = 0;
3033 
3034   b->roworiented        = PETSC_TRUE;
3035   b->nonew              = 0;
3036   b->diag               = 0;
3037   B->spptr              = 0;
3038   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3039   b->keepnonzeropattern = PETSC_FALSE;
3040 
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3050 #if defined(PETSC_HAVE_HYPRE)
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3052 #endif
3053   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3054   PetscFunctionReturn(0);
3055 }
3056 
3057 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3058 {
3059   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3060   PetscErrorCode ierr;
3061   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3062 
3063   PetscFunctionBegin;
3064   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3065 
3066   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3067     c->imax           = a->imax;
3068     c->ilen           = a->ilen;
3069     c->free_imax_ilen = PETSC_FALSE;
3070   } else {
3071     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3072     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3073     for (i=0; i<mbs; i++) {
3074       c->imax[i] = a->imax[i];
3075       c->ilen[i] = a->ilen[i];
3076     }
3077     c->free_imax_ilen = PETSC_TRUE;
3078   }
3079 
3080   /* allocate the matrix space */
3081   if (mallocmatspace) {
3082     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3083       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3084       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3085 
3086       c->i            = a->i;
3087       c->j            = a->j;
3088       c->singlemalloc = PETSC_FALSE;
3089       c->free_a       = PETSC_TRUE;
3090       c->free_ij      = PETSC_FALSE;
3091       c->parent       = A;
3092       C->preallocated = PETSC_TRUE;
3093       C->assembled    = PETSC_TRUE;
3094 
3095       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3096       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3097       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3098     } else {
3099       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3100       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3101 
3102       c->singlemalloc = PETSC_TRUE;
3103       c->free_a       = PETSC_TRUE;
3104       c->free_ij      = PETSC_TRUE;
3105 
3106       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3107       if (mbs > 0) {
3108         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3109         if (cpvalues == MAT_COPY_VALUES) {
3110           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3111         } else {
3112           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3113         }
3114       }
3115       C->preallocated = PETSC_TRUE;
3116       C->assembled    = PETSC_TRUE;
3117     }
3118   }
3119 
3120   c->roworiented = a->roworiented;
3121   c->nonew       = a->nonew;
3122 
3123   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3124   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3125 
3126   c->bs2         = a->bs2;
3127   c->mbs         = a->mbs;
3128   c->nbs         = a->nbs;
3129 
3130   if (a->diag) {
3131     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3132       c->diag      = a->diag;
3133       c->free_diag = PETSC_FALSE;
3134     } else {
3135       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3136       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3137       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3138       c->free_diag = PETSC_TRUE;
3139     }
3140   } else c->diag = 0;
3141 
3142   c->nz         = a->nz;
3143   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3144   c->solve_work = NULL;
3145   c->mult_work  = NULL;
3146   c->sor_workt  = NULL;
3147   c->sor_work   = NULL;
3148 
3149   c->compressedrow.use   = a->compressedrow.use;
3150   c->compressedrow.nrows = a->compressedrow.nrows;
3151   if (a->compressedrow.use) {
3152     i    = a->compressedrow.nrows;
3153     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3154     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3155     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3156     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3157   } else {
3158     c->compressedrow.use    = PETSC_FALSE;
3159     c->compressedrow.i      = NULL;
3160     c->compressedrow.rindex = NULL;
3161   }
3162   C->nonzerostate = A->nonzerostate;
3163 
3164   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3165   PetscFunctionReturn(0);
3166 }
3167 
3168 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3169 {
3170   PetscErrorCode ierr;
3171 
3172   PetscFunctionBegin;
3173   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3174   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3175   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3176   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3177   PetscFunctionReturn(0);
3178 }
3179 
3180 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3181 {
3182   Mat_SeqBAIJ    *a;
3183   PetscErrorCode ierr;
3184   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3185   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3186   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3187   PetscInt       *masked,nmask,tmp,bs2,ishift;
3188   PetscMPIInt    size;
3189   int            fd;
3190   PetscScalar    *aa;
3191   MPI_Comm       comm;
3192 
3193   PetscFunctionBegin;
3194   /* force binary viewer to load .info file if it has not yet done so */
3195   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3196   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3197   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3198   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3199   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3200   if (bs < 0) bs = 1;
3201   bs2  = bs*bs;
3202 
3203   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3204   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3205   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3206   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3207   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3208   M = header[1]; N = header[2]; nz = header[3];
3209 
3210   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3211   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3212 
3213   /*
3214      This code adds extra rows to make sure the number of rows is
3215     divisible by the blocksize
3216   */
3217   mbs        = M/bs;
3218   extra_rows = bs - M + bs*(mbs);
3219   if (extra_rows == bs) extra_rows = 0;
3220   else mbs++;
3221   if (extra_rows) {
3222     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3223   }
3224 
3225   /* Set global sizes if not already set */
3226   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3227     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3228   } else { /* Check if the matrix global sizes are correct */
3229     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3230     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3231       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3232     }
3233     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3234   }
3235 
3236   /* read in row lengths */
3237   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3238   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3239   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3240 
3241   /* read in column indices */
3242   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3243   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3244   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3245 
3246   /* loop over row lengths determining block row lengths */
3247   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3248   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3249   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3250   rowcount = 0;
3251   nzcount  = 0;
3252   for (i=0; i<mbs; i++) {
3253     nmask = 0;
3254     for (j=0; j<bs; j++) {
3255       kmax = rowlengths[rowcount];
3256       for (k=0; k<kmax; k++) {
3257         tmp = jj[nzcount++]/bs;
3258         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3259       }
3260       rowcount++;
3261     }
3262     browlengths[i] += nmask;
3263     /* zero out the mask elements we set */
3264     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3265   }
3266 
3267   /* Do preallocation  */
3268   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3269   a    = (Mat_SeqBAIJ*)newmat->data;
3270 
3271   /* set matrix "i" values */
3272   a->i[0] = 0;
3273   for (i=1; i<= mbs; i++) {
3274     a->i[i]      = a->i[i-1] + browlengths[i-1];
3275     a->ilen[i-1] = browlengths[i-1];
3276   }
3277   a->nz = 0;
3278   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3279 
3280   /* read in nonzero values */
3281   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3282   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3283   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3284 
3285   /* set "a" and "j" values into matrix */
3286   nzcount = 0; jcount = 0;
3287   for (i=0; i<mbs; i++) {
3288     nzcountb = nzcount;
3289     nmask    = 0;
3290     for (j=0; j<bs; j++) {
3291       kmax = rowlengths[i*bs+j];
3292       for (k=0; k<kmax; k++) {
3293         tmp = jj[nzcount++]/bs;
3294         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3295       }
3296     }
3297     /* sort the masked values */
3298     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3299 
3300     /* set "j" values into matrix */
3301     maskcount = 1;
3302     for (j=0; j<nmask; j++) {
3303       a->j[jcount++]  = masked[j];
3304       mask[masked[j]] = maskcount++;
3305     }
3306     /* set "a" values into matrix */
3307     ishift = bs2*a->i[i];
3308     for (j=0; j<bs; j++) {
3309       kmax = rowlengths[i*bs+j];
3310       for (k=0; k<kmax; k++) {
3311         tmp       = jj[nzcountb]/bs;
3312         block     = mask[tmp] - 1;
3313         point     = jj[nzcountb] - bs*tmp;
3314         idx       = ishift + bs2*block + j + bs*point;
3315         a->a[idx] = (MatScalar)aa[nzcountb++];
3316       }
3317     }
3318     /* zero out the mask elements we set */
3319     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3320   }
3321   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3322 
3323   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3324   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3325   ierr = PetscFree(aa);CHKERRQ(ierr);
3326   ierr = PetscFree(jj);CHKERRQ(ierr);
3327   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3328 
3329   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3330   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3331   PetscFunctionReturn(0);
3332 }
3333 
3334 /*@C
3335    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3336    compressed row) format.  For good matrix assembly performance the
3337    user should preallocate the matrix storage by setting the parameter nz
3338    (or the array nnz).  By setting these parameters accurately, performance
3339    during matrix assembly can be increased by more than a factor of 50.
3340 
3341    Collective on MPI_Comm
3342 
3343    Input Parameters:
3344 +  comm - MPI communicator, set to PETSC_COMM_SELF
3345 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3346           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3347 .  m - number of rows
3348 .  n - number of columns
3349 .  nz - number of nonzero blocks  per block row (same for all rows)
3350 -  nnz - array containing the number of nonzero blocks in the various block rows
3351          (possibly different for each block row) or NULL
3352 
3353    Output Parameter:
3354 .  A - the matrix
3355 
3356    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3357    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3358    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3359 
3360    Options Database Keys:
3361 .   -mat_no_unroll - uses code that does not unroll the loops in the
3362                      block calculations (much slower)
3363 .    -mat_block_size - size of the blocks to use
3364 
3365    Level: intermediate
3366 
3367    Notes:
3368    The number of rows and columns must be divisible by blocksize.
3369 
3370    If the nnz parameter is given then the nz parameter is ignored
3371 
3372    A nonzero block is any block that as 1 or more nonzeros in it
3373 
3374    The block AIJ format is fully compatible with standard Fortran 77
3375    storage.  That is, the stored row and column indices can begin at
3376    either one (as in Fortran) or zero.  See the users' manual for details.
3377 
3378    Specify the preallocated storage with either nz or nnz (not both).
3379    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3380    allocation.  See Users-Manual: ch_mat for details.
3381    matrices.
3382 
3383 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3384 @*/
3385 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3386 {
3387   PetscErrorCode ierr;
3388 
3389   PetscFunctionBegin;
3390   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3391   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3392   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3393   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3394   PetscFunctionReturn(0);
3395 }
3396 
3397 /*@C
3398    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3399    per row in the matrix. For good matrix assembly performance the
3400    user should preallocate the matrix storage by setting the parameter nz
3401    (or the array nnz).  By setting these parameters accurately, performance
3402    during matrix assembly can be increased by more than a factor of 50.
3403 
3404    Collective on MPI_Comm
3405 
3406    Input Parameters:
3407 +  B - the matrix
3408 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3409           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3410 .  nz - number of block nonzeros per block row (same for all rows)
3411 -  nnz - array containing the number of block nonzeros in the various block rows
3412          (possibly different for each block row) or NULL
3413 
3414    Options Database Keys:
3415 .   -mat_no_unroll - uses code that does not unroll the loops in the
3416                      block calculations (much slower)
3417 .   -mat_block_size - size of the blocks to use
3418 
3419    Level: intermediate
3420 
3421    Notes:
3422    If the nnz parameter is given then the nz parameter is ignored
3423 
3424    You can call MatGetInfo() to get information on how effective the preallocation was;
3425    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3426    You can also run with the option -info and look for messages with the string
3427    malloc in them to see if additional memory allocation was needed.
3428 
3429    The block AIJ format is fully compatible with standard Fortran 77
3430    storage.  That is, the stored row and column indices can begin at
3431    either one (as in Fortran) or zero.  See the users' manual for details.
3432 
3433    Specify the preallocated storage with either nz or nnz (not both).
3434    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3435    allocation.  See Users-Manual: ch_mat for details.
3436 
3437 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3438 @*/
3439 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3440 {
3441   PetscErrorCode ierr;
3442 
3443   PetscFunctionBegin;
3444   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3445   PetscValidType(B,1);
3446   PetscValidLogicalCollectiveInt(B,bs,2);
3447   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3448   PetscFunctionReturn(0);
3449 }
3450 
3451 /*@C
3452    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3453    (the default sequential PETSc format).
3454 
3455    Collective on MPI_Comm
3456 
3457    Input Parameters:
3458 +  B - the matrix
3459 .  i - the indices into j for the start of each local row (starts with zero)
3460 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3461 -  v - optional values in the matrix
3462 
3463    Level: developer
3464 
3465    Notes:
3466    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3467    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3468    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3469    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3470    block column and the second index is over columns within a block.
3471 
3472 .keywords: matrix, aij, compressed row, sparse
3473 
3474 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3475 @*/
3476 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3477 {
3478   PetscErrorCode ierr;
3479 
3480   PetscFunctionBegin;
3481   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3482   PetscValidType(B,1);
3483   PetscValidLogicalCollectiveInt(B,bs,2);
3484   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3485   PetscFunctionReturn(0);
3486 }
3487 
3488 
3489 /*@
3490      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3491 
3492      Collective on MPI_Comm
3493 
3494    Input Parameters:
3495 +  comm - must be an MPI communicator of size 1
3496 .  bs - size of block
3497 .  m - number of rows
3498 .  n - number of columns
3499 .  i - row indices
3500 .  j - column indices
3501 -  a - matrix values
3502 
3503    Output Parameter:
3504 .  mat - the matrix
3505 
3506    Level: advanced
3507 
3508    Notes:
3509        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3510     once the matrix is destroyed
3511 
3512        You cannot set new nonzero locations into this matrix, that will generate an error.
3513 
3514        The i and j indices are 0 based
3515 
3516        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3517 
3518       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3519       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3520       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3521       with column-major ordering within blocks.
3522 
3523 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3524 
3525 @*/
3526 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3527 {
3528   PetscErrorCode ierr;
3529   PetscInt       ii;
3530   Mat_SeqBAIJ    *baij;
3531 
3532   PetscFunctionBegin;
3533   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3534   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3535 
3536   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3537   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3538   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3539   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3540   baij = (Mat_SeqBAIJ*)(*mat)->data;
3541   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3542   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3543 
3544   baij->i = i;
3545   baij->j = j;
3546   baij->a = a;
3547 
3548   baij->singlemalloc = PETSC_FALSE;
3549   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3550   baij->free_a       = PETSC_FALSE;
3551   baij->free_ij      = PETSC_FALSE;
3552 
3553   for (ii=0; ii<m; ii++) {
3554     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3555 #if defined(PETSC_USE_DEBUG)
3556     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3557 #endif
3558   }
3559 #if defined(PETSC_USE_DEBUG)
3560   for (ii=0; ii<baij->i[m]; ii++) {
3561     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3562     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3563   }
3564 #endif
3565 
3566   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3567   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3568   PetscFunctionReturn(0);
3569 }
3570 
3571 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3572 {
3573   PetscErrorCode ierr;
3574   PetscMPIInt    size;
3575 
3576   PetscFunctionBegin;
3577   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3578   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3579     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3580   } else {
3581     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3582   }
3583   PetscFunctionReturn(0);
3584 }
3585