xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 3d96e9964ff330fd2a9eee374bcd4376da7efe60)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
16 {
17   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
18   PetscErrorCode ierr;
19   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
20   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
21   PetscReal      shift = 0.0;
22   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
23 
24   PetscFunctionBegin;
25   allowzeropivot = PetscNot(A->erroriffailure);
26 
27   if (a->idiagvalid) {
28     if (values) *values = a->idiag;
29     PetscFunctionReturn(0);
30   }
31   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
32   diag_offset = a->diag;
33   if (!a->idiag) {
34     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
35     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
36   }
37   diag  = a->idiag;
38   mdiag = a->idiag+bs2*mbs;
39   if (values) *values = a->idiag;
40   /* factor and invert each block */
41   switch (bs) {
42   case 1:
43     for (i=0; i<mbs; i++) {
44       odiag    = v + 1*diag_offset[i];
45       diag[0]  = odiag[0];
46       mdiag[0] = odiag[0];
47 
48       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
49         if (allowzeropivot) {
50           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
51           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
52           A->factorerror_zeropivot_row   = i;
53           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
54         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
55       }
56 
57       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
58       diag    += 1;
59       mdiag   += 1;
60     }
61     break;
62   case 2:
63     for (i=0; i<mbs; i++) {
64       odiag    = v + 4*diag_offset[i];
65       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
66       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
67       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
68       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
69       diag    += 4;
70       mdiag   += 4;
71     }
72     break;
73   case 3:
74     for (i=0; i<mbs; i++) {
75       odiag    = v + 9*diag_offset[i];
76       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
78       diag[8]  = odiag[8];
79       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
80       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
81       mdiag[8] = odiag[8];
82       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
83       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
84       diag    += 9;
85       mdiag   += 9;
86     }
87     break;
88   case 4:
89     for (i=0; i<mbs; i++) {
90       odiag  = v + 16*diag_offset[i];
91       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
94       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
95       diag  += 16;
96       mdiag += 16;
97     }
98     break;
99   case 5:
100     for (i=0; i<mbs; i++) {
101       odiag  = v + 25*diag_offset[i];
102       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
105       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
106       diag  += 25;
107       mdiag += 25;
108     }
109     break;
110   case 6:
111     for (i=0; i<mbs; i++) {
112       odiag  = v + 36*diag_offset[i];
113       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
114       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
115       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
116       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
117       diag  += 36;
118       mdiag += 36;
119     }
120     break;
121   case 7:
122     for (i=0; i<mbs; i++) {
123       odiag  = v + 49*diag_offset[i];
124       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
125       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += 49;
129       mdiag += 49;
130     }
131     break;
132   default:
133     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
134     for (i=0; i<mbs; i++) {
135       odiag  = v + bs2*diag_offset[i];
136       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
137       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
138       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
139       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
140       diag  += bs2;
141       mdiag += bs2;
142     }
143     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
144   }
145   a->idiagvalid = PETSC_TRUE;
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
150 {
151   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
152   PetscScalar       *x,*work,*w,*workt,*t;
153   const MatScalar   *v,*aa = a->a, *idiag;
154   const PetscScalar *b,*xb;
155   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
156   PetscErrorCode    ierr;
157   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
158   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
159 
160   PetscFunctionBegin;
161   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
162   its = its*lits;
163   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
164   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
165   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
166   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
167   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
168 
169   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
170 
171   if (!m) PetscFunctionReturn(0);
172   diag  = a->diag;
173   idiag = a->idiag;
174   k    = PetscMax(A->rmap->n,A->cmap->n);
175   if (!a->mult_work) {
176     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
177   }
178   if (!a->sor_workt) {
179     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
180   }
181   if (!a->sor_work) {
182     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
183   }
184   work = a->mult_work;
185   t    = a->sor_workt;
186   w    = a->sor_work;
187 
188   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
189   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
190 
191   if (flag & SOR_ZERO_INITIAL_GUESS) {
192     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
193       switch (bs) {
194       case 1:
195         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
196         t[0] = b[0];
197         i2     = 1;
198         idiag += 1;
199         for (i=1; i<m; i++) {
200           v  = aa + ai[i];
201           vi = aj + ai[i];
202           nz = diag[i] - ai[i];
203           s[0] = b[i2];
204           for (j=0; j<nz; j++) {
205             xw[0] = x[vi[j]];
206             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
207           }
208           t[i2] = s[0];
209           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
210           x[i2]  = xw[0];
211           idiag += 1;
212           i2    += 1;
213         }
214         break;
215       case 2:
216         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
217         t[0] = b[0]; t[1] = b[1];
218         i2     = 2;
219         idiag += 4;
220         for (i=1; i<m; i++) {
221           v  = aa + 4*ai[i];
222           vi = aj + ai[i];
223           nz = diag[i] - ai[i];
224           s[0] = b[i2]; s[1] = b[i2+1];
225           for (j=0; j<nz; j++) {
226             idx = 2*vi[j];
227             it  = 4*j;
228             xw[0] = x[idx]; xw[1] = x[1+idx];
229             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
230           }
231           t[i2] = s[0]; t[i2+1] = s[1];
232           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
233           x[i2]   = xw[0]; x[i2+1] = xw[1];
234           idiag  += 4;
235           i2     += 2;
236         }
237         break;
238       case 3:
239         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
240         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
241         i2     = 3;
242         idiag += 9;
243         for (i=1; i<m; i++) {
244           v  = aa + 9*ai[i];
245           vi = aj + ai[i];
246           nz = diag[i] - ai[i];
247           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
248           while (nz--) {
249             idx = 3*(*vi++);
250             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
251             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
252             v  += 9;
253           }
254           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
255           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
256           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
257           idiag  += 9;
258           i2     += 3;
259         }
260         break;
261       case 4:
262         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
263         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
264         i2     = 4;
265         idiag += 16;
266         for (i=1; i<m; i++) {
267           v  = aa + 16*ai[i];
268           vi = aj + ai[i];
269           nz = diag[i] - ai[i];
270           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
271           while (nz--) {
272             idx = 4*(*vi++);
273             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
274             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
275             v  += 16;
276           }
277           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
278           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
279           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
280           idiag  += 16;
281           i2     += 4;
282         }
283         break;
284       case 5:
285         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
286         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
287         i2     = 5;
288         idiag += 25;
289         for (i=1; i<m; i++) {
290           v  = aa + 25*ai[i];
291           vi = aj + ai[i];
292           nz = diag[i] - ai[i];
293           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
294           while (nz--) {
295             idx = 5*(*vi++);
296             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
297             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
298             v  += 25;
299           }
300           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
301           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
302           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
303           idiag  += 25;
304           i2     += 5;
305         }
306         break;
307       case 6:
308         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
309         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
310         i2     = 6;
311         idiag += 36;
312         for (i=1; i<m; i++) {
313           v  = aa + 36*ai[i];
314           vi = aj + ai[i];
315           nz = diag[i] - ai[i];
316           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
317           while (nz--) {
318             idx = 6*(*vi++);
319             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
320             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
321             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
322             v  += 36;
323           }
324           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
325           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
326           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
327           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
328           idiag  += 36;
329           i2     += 6;
330         }
331         break;
332       case 7:
333         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
334         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
335         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
336         i2     = 7;
337         idiag += 49;
338         for (i=1; i<m; i++) {
339           v  = aa + 49*ai[i];
340           vi = aj + ai[i];
341           nz = diag[i] - ai[i];
342           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
343           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
344           while (nz--) {
345             idx = 7*(*vi++);
346             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
347             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
348             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
349             v  += 49;
350           }
351           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
352           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
353           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
354           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
355           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
356           idiag  += 49;
357           i2     += 7;
358         }
359         break;
360       default:
361         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
362         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
363         i2     = bs;
364         idiag += bs2;
365         for (i=1; i<m; i++) {
366           v  = aa + bs2*ai[i];
367           vi = aj + ai[i];
368           nz = diag[i] - ai[i];
369 
370           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
371           /* copy all rows of x that are needed into contiguous space */
372           workt = work;
373           for (j=0; j<nz; j++) {
374             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
375             workt += bs;
376           }
377           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
378           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
379           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
380 
381           idiag += bs2;
382           i2    += bs;
383         }
384         break;
385       }
386       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
387       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
388       xb = t;
389     }
390     else xb = b;
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
392       idiag = a->idiag+bs2*(a->mbs-1);
393       i2 = bs * (m-1);
394       switch (bs) {
395       case 1:
396         s[0]  = xb[i2];
397         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
398         x[i2] = xw[0];
399         i2   -= 1;
400         for (i=m-2; i>=0; i--) {
401           v  = aa + (diag[i]+1);
402           vi = aj + diag[i] + 1;
403           nz = ai[i+1] - diag[i] - 1;
404           s[0] = xb[i2];
405           for (j=0; j<nz; j++) {
406             xw[0] = x[vi[j]];
407             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
408           }
409           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
410           x[i2]  = xw[0];
411           idiag -= 1;
412           i2    -= 1;
413         }
414         break;
415       case 2:
416         s[0]  = xb[i2]; s[1] = xb[i2+1];
417         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418         x[i2] = xw[0]; x[i2+1] = xw[1];
419         i2    -= 2;
420         idiag -= 4;
421         for (i=m-2; i>=0; i--) {
422           v  = aa + 4*(diag[i] + 1);
423           vi = aj + diag[i] + 1;
424           nz = ai[i+1] - diag[i] - 1;
425           s[0] = xb[i2]; s[1] = xb[i2+1];
426           for (j=0; j<nz; j++) {
427             idx = 2*vi[j];
428             it  = 4*j;
429             xw[0] = x[idx]; xw[1] = x[1+idx];
430             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
431           }
432           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
433           x[i2]   = xw[0]; x[i2+1] = xw[1];
434           idiag  -= 4;
435           i2     -= 2;
436         }
437         break;
438       case 3:
439         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
440         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442         i2    -= 3;
443         idiag -= 9;
444         for (i=m-2; i>=0; i--) {
445           v  = aa + 9*(diag[i]+1);
446           vi = aj + diag[i] + 1;
447           nz = ai[i+1] - diag[i] - 1;
448           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
449           while (nz--) {
450             idx = 3*(*vi++);
451             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
452             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
453             v  += 9;
454           }
455           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
456           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
457           idiag  -= 9;
458           i2     -= 3;
459         }
460         break;
461       case 4:
462         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
463         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465         i2    -= 4;
466         idiag -= 16;
467         for (i=m-2; i>=0; i--) {
468           v  = aa + 16*(diag[i]+1);
469           vi = aj + diag[i] + 1;
470           nz = ai[i+1] - diag[i] - 1;
471           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
472           while (nz--) {
473             idx = 4*(*vi++);
474             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
475             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
476             v  += 16;
477           }
478           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
479           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
480           idiag  -= 16;
481           i2     -= 4;
482         }
483         break;
484       case 5:
485         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
486         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488         i2    -= 5;
489         idiag -= 25;
490         for (i=m-2; i>=0; i--) {
491           v  = aa + 25*(diag[i]+1);
492           vi = aj + diag[i] + 1;
493           nz = ai[i+1] - diag[i] - 1;
494           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
495           while (nz--) {
496             idx = 5*(*vi++);
497             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
498             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
499             v  += 25;
500           }
501           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
502           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
503           idiag  -= 25;
504           i2     -= 5;
505         }
506         break;
507       case 6:
508         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
509         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
510         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
511         i2    -= 6;
512         idiag -= 36;
513         for (i=m-2; i>=0; i--) {
514           v  = aa + 36*(diag[i]+1);
515           vi = aj + diag[i] + 1;
516           nz = ai[i+1] - diag[i] - 1;
517           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
518           while (nz--) {
519             idx = 6*(*vi++);
520             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
521             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
522             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
523             v  += 36;
524           }
525           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
526           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
527           idiag  -= 36;
528           i2     -= 6;
529         }
530         break;
531       case 7:
532         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
533         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
534         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
535         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
536         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
537         i2    -= 7;
538         idiag -= 49;
539         for (i=m-2; i>=0; i--) {
540           v  = aa + 49*(diag[i]+1);
541           vi = aj + diag[i] + 1;
542           nz = ai[i+1] - diag[i] - 1;
543           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
544           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
545           while (nz--) {
546             idx = 7*(*vi++);
547             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
548             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
549             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
550             v  += 49;
551           }
552           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
553           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
554           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
555           idiag  -= 49;
556           i2     -= 7;
557         }
558         break;
559       default:
560         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
561         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
562         i2    -= bs;
563         idiag -= bs2;
564         for (i=m-2; i>=0; i--) {
565           v  = aa + bs2*(diag[i]+1);
566           vi = aj + diag[i] + 1;
567           nz = ai[i+1] - diag[i] - 1;
568 
569           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
570           /* copy all rows of x that are needed into contiguous space */
571           workt = work;
572           for (j=0; j<nz; j++) {
573             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
574             workt += bs;
575           }
576           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
577           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
578 
579           idiag -= bs2;
580           i2    -= bs;
581         }
582         break;
583       }
584       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
585     }
586     its--;
587   }
588   while (its--) {
589     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
590       idiag = a->idiag;
591       i2 = 0;
592       switch (bs) {
593       case 1:
594         for (i=0; i<m; i++) {
595           v  = aa + ai[i];
596           vi = aj + ai[i];
597           nz = ai[i+1] - ai[i];
598           s[0] = b[i2];
599           for (j=0; j<nz; j++) {
600             xw[0] = x[vi[j]];
601             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
602           }
603           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
604           x[i2] += xw[0];
605           idiag += 1;
606           i2    += 1;
607         }
608         break;
609       case 2:
610         for (i=0; i<m; i++) {
611           v  = aa + 4*ai[i];
612           vi = aj + ai[i];
613           nz = ai[i+1] - ai[i];
614           s[0] = b[i2]; s[1] = b[i2+1];
615           for (j=0; j<nz; j++) {
616             idx = 2*vi[j];
617             it  = 4*j;
618             xw[0] = x[idx]; xw[1] = x[1+idx];
619             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
620           }
621           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
622           x[i2]  += xw[0]; x[i2+1] += xw[1];
623           idiag  += 4;
624           i2     += 2;
625         }
626         break;
627       case 3:
628         for (i=0; i<m; i++) {
629           v  = aa + 9*ai[i];
630           vi = aj + ai[i];
631           nz = ai[i+1] - ai[i];
632           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
633           while (nz--) {
634             idx = 3*(*vi++);
635             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
636             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
637             v  += 9;
638           }
639           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
640           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
641           idiag  += 9;
642           i2     += 3;
643         }
644         break;
645       case 4:
646         for (i=0; i<m; i++) {
647           v  = aa + 16*ai[i];
648           vi = aj + ai[i];
649           nz = ai[i+1] - ai[i];
650           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
651           while (nz--) {
652             idx = 4*(*vi++);
653             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
654             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
655             v  += 16;
656           }
657           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
658           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
659           idiag  += 16;
660           i2     += 4;
661         }
662         break;
663       case 5:
664         for (i=0; i<m; i++) {
665           v  = aa + 25*ai[i];
666           vi = aj + ai[i];
667           nz = ai[i+1] - ai[i];
668           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
669           while (nz--) {
670             idx = 5*(*vi++);
671             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
672             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
673             v  += 25;
674           }
675           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
676           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
677           idiag  += 25;
678           i2     += 5;
679         }
680         break;
681       case 6:
682         for (i=0; i<m; i++) {
683           v  = aa + 36*ai[i];
684           vi = aj + ai[i];
685           nz = ai[i+1] - ai[i];
686           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
687           while (nz--) {
688             idx = 6*(*vi++);
689             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
690             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
691             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
692             v  += 36;
693           }
694           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
695           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
696           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
697           idiag  += 36;
698           i2     += 6;
699         }
700         break;
701       case 7:
702         for (i=0; i<m; i++) {
703           v  = aa + 49*ai[i];
704           vi = aj + ai[i];
705           nz = ai[i+1] - ai[i];
706           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
707           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
708           while (nz--) {
709             idx = 7*(*vi++);
710             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
711             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
712             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
713             v  += 49;
714           }
715           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
716           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
717           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
718           idiag  += 49;
719           i2     += 7;
720         }
721         break;
722       default:
723         for (i=0; i<m; i++) {
724           v  = aa + bs2*ai[i];
725           vi = aj + ai[i];
726           nz = ai[i+1] - ai[i];
727 
728           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
729           /* copy all rows of x that are needed into contiguous space */
730           workt = work;
731           for (j=0; j<nz; j++) {
732             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
733             workt += bs;
734           }
735           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
736           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
737 
738           idiag += bs2;
739           i2    += bs;
740         }
741         break;
742       }
743       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
744     }
745     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
746       idiag = a->idiag+bs2*(a->mbs-1);
747       i2 = bs * (m-1);
748       switch (bs) {
749       case 1:
750         for (i=m-1; i>=0; i--) {
751           v  = aa + ai[i];
752           vi = aj + ai[i];
753           nz = ai[i+1] - ai[i];
754           s[0] = b[i2];
755           for (j=0; j<nz; j++) {
756             xw[0] = x[vi[j]];
757             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
758           }
759           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
760           x[i2] += xw[0];
761           idiag -= 1;
762           i2    -= 1;
763         }
764         break;
765       case 2:
766         for (i=m-1; i>=0; i--) {
767           v  = aa + 4*ai[i];
768           vi = aj + ai[i];
769           nz = ai[i+1] - ai[i];
770           s[0] = b[i2]; s[1] = b[i2+1];
771           for (j=0; j<nz; j++) {
772             idx = 2*vi[j];
773             it  = 4*j;
774             xw[0] = x[idx]; xw[1] = x[1+idx];
775             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
776           }
777           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
778           x[i2]  += xw[0]; x[i2+1] += xw[1];
779           idiag  -= 4;
780           i2     -= 2;
781         }
782         break;
783       case 3:
784         for (i=m-1; i>=0; i--) {
785           v  = aa + 9*ai[i];
786           vi = aj + ai[i];
787           nz = ai[i+1] - ai[i];
788           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
789           while (nz--) {
790             idx = 3*(*vi++);
791             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
792             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
793             v  += 9;
794           }
795           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
796           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
797           idiag  -= 9;
798           i2     -= 3;
799         }
800         break;
801       case 4:
802         for (i=m-1; i>=0; i--) {
803           v  = aa + 16*ai[i];
804           vi = aj + ai[i];
805           nz = ai[i+1] - ai[i];
806           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
807           while (nz--) {
808             idx = 4*(*vi++);
809             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
810             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
811             v  += 16;
812           }
813           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
814           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
815           idiag  -= 16;
816           i2     -= 4;
817         }
818         break;
819       case 5:
820         for (i=m-1; i>=0; i--) {
821           v  = aa + 25*ai[i];
822           vi = aj + ai[i];
823           nz = ai[i+1] - ai[i];
824           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
825           while (nz--) {
826             idx = 5*(*vi++);
827             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
828             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
829             v  += 25;
830           }
831           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
832           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
833           idiag  -= 25;
834           i2     -= 5;
835         }
836         break;
837       case 6:
838         for (i=m-1; i>=0; i--) {
839           v  = aa + 36*ai[i];
840           vi = aj + ai[i];
841           nz = ai[i+1] - ai[i];
842           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
843           while (nz--) {
844             idx = 6*(*vi++);
845             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
846             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
847             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
848             v  += 36;
849           }
850           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
851           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
852           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
853           idiag  -= 36;
854           i2     -= 6;
855         }
856         break;
857       case 7:
858         for (i=m-1; i>=0; i--) {
859           v  = aa + 49*ai[i];
860           vi = aj + ai[i];
861           nz = ai[i+1] - ai[i];
862           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
863           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
864           while (nz--) {
865             idx = 7*(*vi++);
866             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
867             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
868             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
869             v  += 49;
870           }
871           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
872           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
873           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
874           idiag  -= 49;
875           i2     -= 7;
876         }
877         break;
878       default:
879         for (i=m-1; i>=0; i--) {
880           v  = aa + bs2*ai[i];
881           vi = aj + ai[i];
882           nz = ai[i+1] - ai[i];
883 
884           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
885           /* copy all rows of x that are needed into contiguous space */
886           workt = work;
887           for (j=0; j<nz; j++) {
888             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
889             workt += bs;
890           }
891           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
892           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
893 
894           idiag -= bs2;
895           i2    -= bs;
896         }
897         break;
898       }
899       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
900     }
901   }
902   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
903   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
904   PetscFunctionReturn(0);
905 }
906 
907 
908 /*
909     Special version for direct calls from Fortran (Used in PETSc-fun3d)
910 */
911 #if defined(PETSC_HAVE_FORTRAN_CAPS)
912 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
913 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
914 #define matsetvaluesblocked4_ matsetvaluesblocked4
915 #endif
916 
917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
918 {
919   Mat               A  = *AA;
920   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
921   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
922   PetscInt          *ai    =a->i,*ailen=a->ilen;
923   PetscInt          *aj    =a->j,stepval,lastcol = -1;
924   const PetscScalar *value = v;
925   MatScalar         *ap,*aa = a->a,*bap;
926 
927   PetscFunctionBegin;
928   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
929   stepval = (n-1)*4;
930   for (k=0; k<m; k++) { /* loop over added rows */
931     row  = im[k];
932     rp   = aj + ai[row];
933     ap   = aa + 16*ai[row];
934     nrow = ailen[row];
935     low  = 0;
936     high = nrow;
937     for (l=0; l<n; l++) { /* loop over added columns */
938       col = in[l];
939       if (col <= lastcol)  low = 0;
940       else                high = nrow;
941       lastcol = col;
942       value   = v + k*(stepval+4 + l)*4;
943       while (high-low > 7) {
944         t = (low+high)/2;
945         if (rp[t] > col) high = t;
946         else             low  = t;
947       }
948       for (i=low; i<high; i++) {
949         if (rp[i] > col) break;
950         if (rp[i] == col) {
951           bap = ap +  16*i;
952           for (ii=0; ii<4; ii++,value+=stepval) {
953             for (jj=ii; jj<16; jj+=4) {
954               bap[jj] += *value++;
955             }
956           }
957           goto noinsert2;
958         }
959       }
960       N = nrow++ - 1;
961       high++; /* added new column index thus must search to one higher than before */
962       /* shift up all the later entries in this row */
963       for (ii=N; ii>=i; ii--) {
964         rp[ii+1] = rp[ii];
965         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
966       }
967       if (N >= i) {
968         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
969       }
970       rp[i] = col;
971       bap   = ap +  16*i;
972       for (ii=0; ii<4; ii++,value+=stepval) {
973         for (jj=ii; jj<16; jj+=4) {
974           bap[jj] = *value++;
975         }
976       }
977       noinsert2:;
978       low = i;
979     }
980     ailen[row] = nrow;
981   }
982   PetscFunctionReturnVoid();
983 }
984 
985 #if defined(PETSC_HAVE_FORTRAN_CAPS)
986 #define matsetvalues4_ MATSETVALUES4
987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
988 #define matsetvalues4_ matsetvalues4
989 #endif
990 
991 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
992 {
993   Mat         A  = *AA;
994   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
995   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
996   PetscInt    *ai=a->i,*ailen=a->ilen;
997   PetscInt    *aj=a->j,brow,bcol;
998   PetscInt    ridx,cidx,lastcol = -1;
999   MatScalar   *ap,value,*aa=a->a,*bap;
1000 
1001   PetscFunctionBegin;
1002   for (k=0; k<m; k++) { /* loop over added rows */
1003     row  = im[k]; brow = row/4;
1004     rp   = aj + ai[brow];
1005     ap   = aa + 16*ai[brow];
1006     nrow = ailen[brow];
1007     low  = 0;
1008     high = nrow;
1009     for (l=0; l<n; l++) { /* loop over added columns */
1010       col   = in[l]; bcol = col/4;
1011       ridx  = row % 4; cidx = col % 4;
1012       value = v[l + k*n];
1013       if (col <= lastcol)  low = 0;
1014       else                high = nrow;
1015       lastcol = col;
1016       while (high-low > 7) {
1017         t = (low+high)/2;
1018         if (rp[t] > bcol) high = t;
1019         else              low  = t;
1020       }
1021       for (i=low; i<high; i++) {
1022         if (rp[i] > bcol) break;
1023         if (rp[i] == bcol) {
1024           bap   = ap +  16*i + 4*cidx + ridx;
1025           *bap += value;
1026           goto noinsert1;
1027         }
1028       }
1029       N = nrow++ - 1;
1030       high++; /* added new column thus must search to one higher than before */
1031       /* shift up all the later entries in this row */
1032       for (ii=N; ii>=i; ii--) {
1033         rp[ii+1] = rp[ii];
1034         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1035       }
1036       if (N>=i) {
1037         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1038       }
1039       rp[i]                    = bcol;
1040       ap[16*i + 4*cidx + ridx] = value;
1041 noinsert1:;
1042       low = i;
1043     }
1044     ailen[brow] = nrow;
1045   }
1046   PetscFunctionReturnVoid();
1047 }
1048 
1049 /*
1050      Checks for missing diagonals
1051 */
1052 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1053 {
1054   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   PetscInt       *diag,*ii = a->i,i;
1057 
1058   PetscFunctionBegin;
1059   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1060   *missing = PETSC_FALSE;
1061   if (A->rmap->n > 0 && !ii) {
1062     *missing = PETSC_TRUE;
1063     if (d) *d = 0;
1064     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1065   } else {
1066     diag = a->diag;
1067     for (i=0; i<a->mbs; i++) {
1068       if (diag[i] >= ii[i+1]) {
1069         *missing = PETSC_TRUE;
1070         if (d) *d = i;
1071         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1072         break;
1073       }
1074     }
1075   }
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1080 {
1081   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1082   PetscErrorCode ierr;
1083   PetscInt       i,j,m = a->mbs;
1084 
1085   PetscFunctionBegin;
1086   if (!a->diag) {
1087     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1088     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1089     a->free_diag = PETSC_TRUE;
1090   }
1091   for (i=0; i<m; i++) {
1092     a->diag[i] = a->i[i+1];
1093     for (j=a->i[i]; j<a->i[i+1]; j++) {
1094       if (a->j[j] == i) {
1095         a->diag[i] = j;
1096         break;
1097       }
1098     }
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 
1104 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1105 {
1106   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1107   PetscErrorCode ierr;
1108   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1109   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1110 
1111   PetscFunctionBegin;
1112   *nn = n;
1113   if (!ia) PetscFunctionReturn(0);
1114   if (symmetric) {
1115     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1116     nz   = tia[n];
1117   } else {
1118     tia = a->i; tja = a->j;
1119   }
1120 
1121   if (!blockcompressed && bs > 1) {
1122     (*nn) *= bs;
1123     /* malloc & create the natural set of indices */
1124     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1125     if (n) {
1126       (*ia)[0] = oshift;
1127       for (j=1; j<bs; j++) {
1128         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1129       }
1130     }
1131 
1132     for (i=1; i<n; i++) {
1133       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1134       for (j=1; j<bs; j++) {
1135         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1136       }
1137     }
1138     if (n) {
1139       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1140     }
1141 
1142     if (inja) {
1143       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1144       cnt = 0;
1145       for (i=0; i<n; i++) {
1146         for (j=0; j<bs; j++) {
1147           for (k=tia[i]; k<tia[i+1]; k++) {
1148             for (l=0; l<bs; l++) {
1149               (*ja)[cnt++] = bs*tja[k] + l;
1150             }
1151           }
1152         }
1153       }
1154     }
1155 
1156     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1157       ierr = PetscFree(tia);CHKERRQ(ierr);
1158       ierr = PetscFree(tja);CHKERRQ(ierr);
1159     }
1160   } else if (oshift == 1) {
1161     if (symmetric) {
1162       nz = tia[A->rmap->n/bs];
1163       /*  add 1 to i and j indices */
1164       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1165       *ia = tia;
1166       if (ja) {
1167         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1168         *ja = tja;
1169       }
1170     } else {
1171       nz = a->i[A->rmap->n/bs];
1172       /* malloc space and  add 1 to i and j indices */
1173       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1174       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1175       if (ja) {
1176         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1177         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1178       }
1179     }
1180   } else {
1181     *ia = tia;
1182     if (ja) *ja = tja;
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1188 {
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192   if (!ia) PetscFunctionReturn(0);
1193   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1194     ierr = PetscFree(*ia);CHKERRQ(ierr);
1195     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1196   }
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1201 {
1202   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206 #if defined(PETSC_USE_LOG)
1207   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1208 #endif
1209   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1210   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1211   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1212   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1213   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1214   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1215   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1216   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1217   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1218   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1219   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1220   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1221   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1222 
1223   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1224   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1225   ierr = PetscFree(A->data);CHKERRQ(ierr);
1226 
1227   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1238 #if defined(PETSC_HAVE_HYPRE)
1239   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1240 #endif
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1245 {
1246   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   switch (op) {
1251   case MAT_ROW_ORIENTED:
1252     a->roworiented = flg;
1253     break;
1254   case MAT_KEEP_NONZERO_PATTERN:
1255     a->keepnonzeropattern = flg;
1256     break;
1257   case MAT_NEW_NONZERO_LOCATIONS:
1258     a->nonew = (flg ? 0 : 1);
1259     break;
1260   case MAT_NEW_NONZERO_LOCATION_ERR:
1261     a->nonew = (flg ? -1 : 0);
1262     break;
1263   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1264     a->nonew = (flg ? -2 : 0);
1265     break;
1266   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1267     a->nounused = (flg ? -1 : 0);
1268     break;
1269   case MAT_NEW_DIAGONALS:
1270   case MAT_IGNORE_OFF_PROC_ENTRIES:
1271   case MAT_USE_HASH_TABLE:
1272     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1273     break;
1274   case MAT_SPD:
1275   case MAT_SYMMETRIC:
1276   case MAT_STRUCTURALLY_SYMMETRIC:
1277   case MAT_HERMITIAN:
1278   case MAT_SYMMETRY_ETERNAL:
1279   case MAT_SUBMAT_SINGLEIS:
1280   case MAT_STRUCTURE_ONLY:
1281     /* These options are handled directly by MatSetOption() */
1282     break;
1283   default:
1284     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1285   }
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1290 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1291 {
1292   PetscErrorCode ierr;
1293   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1294   MatScalar      *aa_i;
1295   PetscScalar    *v_i;
1296 
1297   PetscFunctionBegin;
1298   bs  = A->rmap->bs;
1299   bs2 = bs*bs;
1300   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1301 
1302   bn  = row/bs;   /* Block number */
1303   bp  = row % bs; /* Block Position */
1304   M   = ai[bn+1] - ai[bn];
1305   *nz = bs*M;
1306 
1307   if (v) {
1308     *v = 0;
1309     if (*nz) {
1310       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1311       for (i=0; i<M; i++) { /* for each block in the block row */
1312         v_i  = *v + i*bs;
1313         aa_i = aa + bs2*(ai[bn] + i);
1314         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1315       }
1316     }
1317   }
1318 
1319   if (idx) {
1320     *idx = 0;
1321     if (*nz) {
1322       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1323       for (i=0; i<M; i++) { /* for each block in the block row */
1324         idx_i = *idx + i*bs;
1325         itmp  = bs*aj[ai[bn] + i];
1326         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1327       }
1328     }
1329   }
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1334 {
1335   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1344 {
1345   PetscErrorCode ierr;
1346 
1347   PetscFunctionBegin;
1348   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1349   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1354 {
1355   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1356   Mat            C;
1357   PetscErrorCode ierr;
1358   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1359   PetscInt       *rows,*cols,bs2=a->bs2;
1360   MatScalar      *array;
1361 
1362   PetscFunctionBegin;
1363   if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1364   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1365     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1366 
1367     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1368     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1369     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1370     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1371     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1372     ierr = PetscFree(col);CHKERRQ(ierr);
1373   } else {
1374     C = *B;
1375   }
1376 
1377   array = a->a;
1378   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1379   for (i=0; i<mbs; i++) {
1380     cols[0] = i*bs;
1381     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1382     len = ai[i+1] - ai[i];
1383     for (j=0; j<len; j++) {
1384       rows[0] = (*aj++)*bs;
1385       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1386       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1387       array += bs2;
1388     }
1389   }
1390   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1391 
1392   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1393   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1394 
1395   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1396     *B = C;
1397   } else {
1398     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1404 {
1405   PetscErrorCode ierr;
1406   Mat            Btrans;
1407 
1408   PetscFunctionBegin;
1409   *f   = PETSC_FALSE;
1410   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1411   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1412   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1417 {
1418   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1419   PetscErrorCode ierr;
1420   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1421   int            fd;
1422   PetscScalar    *aa;
1423   FILE           *file;
1424 
1425   PetscFunctionBegin;
1426   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1427   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1428   col_lens[0] = MAT_FILE_CLASSID;
1429 
1430   col_lens[1] = A->rmap->N;
1431   col_lens[2] = A->cmap->n;
1432   col_lens[3] = a->nz*bs2;
1433 
1434   /* store lengths of each row and write (including header) to file */
1435   count = 0;
1436   for (i=0; i<a->mbs; i++) {
1437     for (j=0; j<bs; j++) {
1438       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1439     }
1440   }
1441   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1442   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1443 
1444   /* store column indices (zero start index) */
1445   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1446   count = 0;
1447   for (i=0; i<a->mbs; i++) {
1448     for (j=0; j<bs; j++) {
1449       for (k=a->i[i]; k<a->i[i+1]; k++) {
1450         for (l=0; l<bs; l++) {
1451           jj[count++] = bs*a->j[k] + l;
1452         }
1453       }
1454     }
1455   }
1456   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1457   ierr = PetscFree(jj);CHKERRQ(ierr);
1458 
1459   /* store nonzero values */
1460   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1461   count = 0;
1462   for (i=0; i<a->mbs; i++) {
1463     for (j=0; j<bs; j++) {
1464       for (k=a->i[i]; k<a->i[i+1]; k++) {
1465         for (l=0; l<bs; l++) {
1466           aa[count++] = a->a[bs2*k + l*bs + j];
1467         }
1468       }
1469     }
1470   }
1471   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1472   ierr = PetscFree(aa);CHKERRQ(ierr);
1473 
1474   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1475   if (file) {
1476     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1477   }
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1482 {
1483   PetscErrorCode ierr;
1484   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1485   PetscInt       i,bs = A->rmap->bs,k;
1486 
1487   PetscFunctionBegin;
1488   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1489   for (i=0; i<a->mbs; i++) {
1490     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1491     for (k=a->i[i]; k<a->i[i+1]; k++) {
1492       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1493     }
1494     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1495   }
1496   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1501 {
1502   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1503   PetscErrorCode    ierr;
1504   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1505   PetscViewerFormat format;
1506 
1507   PetscFunctionBegin;
1508   if (A->structure_only) {
1509     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1510     PetscFunctionReturn(0);
1511   }
1512 
1513   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1514   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1515     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1516   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1517     const char *matname;
1518     Mat        aij;
1519     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1520     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1521     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1522     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1523     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1524   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1525       PetscFunctionReturn(0);
1526   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1527     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1528     for (i=0; i<a->mbs; i++) {
1529       for (j=0; j<bs; j++) {
1530         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1531         for (k=a->i[i]; k<a->i[i+1]; k++) {
1532           for (l=0; l<bs; l++) {
1533 #if defined(PETSC_USE_COMPLEX)
1534             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1535               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1536                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1537             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1538               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1539                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1540             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1541               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1542             }
1543 #else
1544             if (a->a[bs2*k + l*bs + j] != 0.0) {
1545               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1546             }
1547 #endif
1548           }
1549         }
1550         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1551       }
1552     }
1553     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1554   } else {
1555     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1556     for (i=0; i<a->mbs; i++) {
1557       for (j=0; j<bs; j++) {
1558         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1559         for (k=a->i[i]; k<a->i[i+1]; k++) {
1560           for (l=0; l<bs; l++) {
1561 #if defined(PETSC_USE_COMPLEX)
1562             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1563               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1564                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1565             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1566               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1567                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1568             } else {
1569               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1570             }
1571 #else
1572             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1573 #endif
1574           }
1575         }
1576         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1577       }
1578     }
1579     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1580   }
1581   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #include <petscdraw.h>
1586 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1587 {
1588   Mat               A = (Mat) Aa;
1589   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1590   PetscErrorCode    ierr;
1591   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1592   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1593   MatScalar         *aa;
1594   PetscViewer       viewer;
1595   PetscViewerFormat format;
1596 
1597   PetscFunctionBegin;
1598   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1599   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1600   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1601 
1602   /* loop over matrix elements drawing boxes */
1603 
1604   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1605     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1606     /* Blue for negative, Cyan for zero and  Red for positive */
1607     color = PETSC_DRAW_BLUE;
1608     for (i=0,row=0; i<mbs; i++,row+=bs) {
1609       for (j=a->i[i]; j<a->i[i+1]; j++) {
1610         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1611         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1612         aa  = a->a + j*bs2;
1613         for (k=0; k<bs; k++) {
1614           for (l=0; l<bs; l++) {
1615             if (PetscRealPart(*aa++) >=  0.) continue;
1616             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1617           }
1618         }
1619       }
1620     }
1621     color = PETSC_DRAW_CYAN;
1622     for (i=0,row=0; i<mbs; i++,row+=bs) {
1623       for (j=a->i[i]; j<a->i[i+1]; j++) {
1624         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1625         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1626         aa  = a->a + j*bs2;
1627         for (k=0; k<bs; k++) {
1628           for (l=0; l<bs; l++) {
1629             if (PetscRealPart(*aa++) != 0.) continue;
1630             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1631           }
1632         }
1633       }
1634     }
1635     color = PETSC_DRAW_RED;
1636     for (i=0,row=0; i<mbs; i++,row+=bs) {
1637       for (j=a->i[i]; j<a->i[i+1]; j++) {
1638         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1639         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1640         aa  = a->a + j*bs2;
1641         for (k=0; k<bs; k++) {
1642           for (l=0; l<bs; l++) {
1643             if (PetscRealPart(*aa++) <= 0.) continue;
1644             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1645           }
1646         }
1647       }
1648     }
1649     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1650   } else {
1651     /* use contour shading to indicate magnitude of values */
1652     /* first determine max of all nonzero values */
1653     PetscReal minv = 0.0, maxv = 0.0;
1654     PetscDraw popup;
1655 
1656     for (i=0; i<a->nz*a->bs2; i++) {
1657       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1658     }
1659     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1660     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1661     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1662 
1663     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1664     for (i=0,row=0; i<mbs; i++,row+=bs) {
1665       for (j=a->i[i]; j<a->i[i+1]; j++) {
1666         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1667         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1668         aa  = a->a + j*bs2;
1669         for (k=0; k<bs; k++) {
1670           for (l=0; l<bs; l++) {
1671             MatScalar v = *aa++;
1672             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1673             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1674           }
1675         }
1676       }
1677     }
1678     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1679   }
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1684 {
1685   PetscErrorCode ierr;
1686   PetscReal      xl,yl,xr,yr,w,h;
1687   PetscDraw      draw;
1688   PetscBool      isnull;
1689 
1690   PetscFunctionBegin;
1691   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1692   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1693   if (isnull) PetscFunctionReturn(0);
1694 
1695   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1696   xr  += w;          yr += h;        xl = -w;     yl = -h;
1697   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1698   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1699   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1700   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1701   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1706 {
1707   PetscErrorCode ierr;
1708   PetscBool      iascii,isbinary,isdraw;
1709 
1710   PetscFunctionBegin;
1711   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1712   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1713   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1714   if (iascii) {
1715     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1716   } else if (isbinary) {
1717     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1718   } else if (isdraw) {
1719     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1720   } else {
1721     Mat B;
1722     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1723     ierr = MatView(B,viewer);CHKERRQ(ierr);
1724     ierr = MatDestroy(&B);CHKERRQ(ierr);
1725   }
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 
1730 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1731 {
1732   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1733   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1734   PetscInt    *ai = a->i,*ailen = a->ilen;
1735   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1736   MatScalar   *ap,*aa = a->a;
1737 
1738   PetscFunctionBegin;
1739   for (k=0; k<m; k++) { /* loop over rows */
1740     row = im[k]; brow = row/bs;
1741     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1742     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1743     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1744     nrow = ailen[brow];
1745     for (l=0; l<n; l++) { /* loop over columns */
1746       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1747       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1748       col  = in[l];
1749       bcol = col/bs;
1750       cidx = col%bs;
1751       ridx = row%bs;
1752       high = nrow;
1753       low  = 0; /* assume unsorted */
1754       while (high-low > 5) {
1755         t = (low+high)/2;
1756         if (rp[t] > bcol) high = t;
1757         else             low  = t;
1758       }
1759       for (i=low; i<high; i++) {
1760         if (rp[i] > bcol) break;
1761         if (rp[i] == bcol) {
1762           *v++ = ap[bs2*i+bs*cidx+ridx];
1763           goto finished;
1764         }
1765       }
1766       *v++ = 0.0;
1767 finished:;
1768     }
1769   }
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1774 {
1775   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1776   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1777   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1778   PetscErrorCode    ierr;
1779   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1780   PetscBool         roworiented=a->roworiented;
1781   const PetscScalar *value     = v;
1782   MatScalar         *ap=NULL,*aa = a->a,*bap;
1783 
1784   PetscFunctionBegin;
1785   if (roworiented) {
1786     stepval = (n-1)*bs;
1787   } else {
1788     stepval = (m-1)*bs;
1789   }
1790   for (k=0; k<m; k++) { /* loop over added rows */
1791     row = im[k];
1792     if (row < 0) continue;
1793 #if defined(PETSC_USE_DEBUG)
1794     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1795 #endif
1796     rp   = aj + ai[row];
1797     if (!A->structure_only) ap = aa + bs2*ai[row];
1798     rmax = imax[row];
1799     nrow = ailen[row];
1800     low  = 0;
1801     high = nrow;
1802     for (l=0; l<n; l++) { /* loop over added columns */
1803       if (in[l] < 0) continue;
1804 #if defined(PETSC_USE_DEBUG)
1805       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1806 #endif
1807       col = in[l];
1808       if (!A->structure_only) {
1809         if (roworiented) {
1810           value = v + (k*(stepval+bs) + l)*bs;
1811         } else {
1812           value = v + (l*(stepval+bs) + k)*bs;
1813         }
1814       }
1815       if (col <= lastcol) low = 0;
1816       else high = nrow;
1817       lastcol = col;
1818       while (high-low > 7) {
1819         t = (low+high)/2;
1820         if (rp[t] > col) high = t;
1821         else             low  = t;
1822       }
1823       for (i=low; i<high; i++) {
1824         if (rp[i] > col) break;
1825         if (rp[i] == col) {
1826           if (A->structure_only) goto noinsert2;
1827           bap = ap +  bs2*i;
1828           if (roworiented) {
1829             if (is == ADD_VALUES) {
1830               for (ii=0; ii<bs; ii++,value+=stepval) {
1831                 for (jj=ii; jj<bs2; jj+=bs) {
1832                   bap[jj] += *value++;
1833                 }
1834               }
1835             } else {
1836               for (ii=0; ii<bs; ii++,value+=stepval) {
1837                 for (jj=ii; jj<bs2; jj+=bs) {
1838                   bap[jj] = *value++;
1839                 }
1840               }
1841             }
1842           } else {
1843             if (is == ADD_VALUES) {
1844               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1845                 for (jj=0; jj<bs; jj++) {
1846                   bap[jj] += value[jj];
1847                 }
1848                 bap += bs;
1849               }
1850             } else {
1851               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1852                 for (jj=0; jj<bs; jj++) {
1853                   bap[jj]  = value[jj];
1854                 }
1855                 bap += bs;
1856               }
1857             }
1858           }
1859           goto noinsert2;
1860         }
1861       }
1862       if (nonew == 1) goto noinsert2;
1863       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1864       if (A->structure_only) {
1865         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1866       } else {
1867         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1868       }
1869       N = nrow++ - 1; high++;
1870       /* shift up all the later entries in this row */
1871       for (ii=N; ii>=i; ii--) {
1872         rp[ii+1] = rp[ii];
1873         if (!A->structure_only) {
1874           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1875         }
1876       }
1877       if (N >= i && !A->structure_only) {
1878         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1879       }
1880 
1881       rp[i] = col;
1882       if (!A->structure_only) {
1883         bap   = ap +  bs2*i;
1884         if (roworiented) {
1885           for (ii=0; ii<bs; ii++,value+=stepval) {
1886             for (jj=ii; jj<bs2; jj+=bs) {
1887               bap[jj] = *value++;
1888             }
1889           }
1890         } else {
1891           for (ii=0; ii<bs; ii++,value+=stepval) {
1892             for (jj=0; jj<bs; jj++) {
1893               *bap++ = *value++;
1894             }
1895           }
1896         }
1897       }
1898 noinsert2:;
1899       low = i;
1900     }
1901     ailen[row] = nrow;
1902   }
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1907 {
1908   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1909   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1910   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1911   PetscErrorCode ierr;
1912   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1913   MatScalar      *aa  = a->a,*ap;
1914   PetscReal      ratio=0.6;
1915 
1916   PetscFunctionBegin;
1917   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1918 
1919   if (m) rmax = ailen[0];
1920   for (i=1; i<mbs; i++) {
1921     /* move each row back by the amount of empty slots (fshift) before it*/
1922     fshift += imax[i-1] - ailen[i-1];
1923     rmax    = PetscMax(rmax,ailen[i]);
1924     if (fshift) {
1925       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1926       N  = ailen[i];
1927       for (j=0; j<N; j++) {
1928         ip[j-fshift] = ip[j];
1929         if (!A->structure_only) {
1930           ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1931         }
1932       }
1933     }
1934     ai[i] = ai[i-1] + ailen[i-1];
1935   }
1936   if (mbs) {
1937     fshift += imax[mbs-1] - ailen[mbs-1];
1938     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1939   }
1940 
1941   /* reset ilen and imax for each row */
1942   a->nonzerorowcnt = 0;
1943   if (A->structure_only) {
1944     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1945   } else { /* !A->structure_only */
1946     for (i=0; i<mbs; i++) {
1947       ailen[i] = imax[i] = ai[i+1] - ai[i];
1948       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1949     }
1950   }
1951   a->nz = ai[mbs];
1952 
1953   /* diagonals may have moved, so kill the diagonal pointers */
1954   a->idiagvalid = PETSC_FALSE;
1955   if (fshift && a->diag) {
1956     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1957     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1958     a->diag = 0;
1959   }
1960   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1961   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1962   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1963   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1964 
1965   A->info.mallocs    += a->reallocs;
1966   a->reallocs         = 0;
1967   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1968   a->rmax             = rmax;
1969 
1970   if (!A->structure_only) {
1971     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1972   }
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 /*
1977    This function returns an array of flags which indicate the locations of contiguous
1978    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1979    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1980    Assume: sizes should be long enough to hold all the values.
1981 */
1982 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1983 {
1984   PetscInt  i,j,k,row;
1985   PetscBool flg;
1986 
1987   PetscFunctionBegin;
1988   for (i=0,j=0; i<n; j++) {
1989     row = idx[i];
1990     if (row%bs!=0) { /* Not the begining of a block */
1991       sizes[j] = 1;
1992       i++;
1993     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1994       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1995       i++;
1996     } else { /* Begining of the block, so check if the complete block exists */
1997       flg = PETSC_TRUE;
1998       for (k=1; k<bs; k++) {
1999         if (row+k != idx[i+k]) { /* break in the block */
2000           flg = PETSC_FALSE;
2001           break;
2002         }
2003       }
2004       if (flg) { /* No break in the bs */
2005         sizes[j] = bs;
2006         i       += bs;
2007       } else {
2008         sizes[j] = 1;
2009         i++;
2010       }
2011     }
2012   }
2013   *bs_max = j;
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2018 {
2019   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2020   PetscErrorCode    ierr;
2021   PetscInt          i,j,k,count,*rows;
2022   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2023   PetscScalar       zero = 0.0;
2024   MatScalar         *aa;
2025   const PetscScalar *xx;
2026   PetscScalar       *bb;
2027 
2028   PetscFunctionBegin;
2029   /* fix right hand side if needed */
2030   if (x && b) {
2031     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2032     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2033     for (i=0; i<is_n; i++) {
2034       bb[is_idx[i]] = diag*xx[is_idx[i]];
2035     }
2036     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2037     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2038   }
2039 
2040   /* Make a copy of the IS and  sort it */
2041   /* allocate memory for rows,sizes */
2042   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2043 
2044   /* copy IS values to rows, and sort them */
2045   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2046   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2047 
2048   if (baij->keepnonzeropattern) {
2049     for (i=0; i<is_n; i++) sizes[i] = 1;
2050     bs_max          = is_n;
2051   } else {
2052     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2053     A->nonzerostate++;
2054   }
2055 
2056   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2057     row = rows[j];
2058     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2059     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2060     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2061     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2062       if (diag != (PetscScalar)0.0) {
2063         if (baij->ilen[row/bs] > 0) {
2064           baij->ilen[row/bs]       = 1;
2065           baij->j[baij->i[row/bs]] = row/bs;
2066 
2067           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2068         }
2069         /* Now insert all the diagonal values for this bs */
2070         for (k=0; k<bs; k++) {
2071           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2072         }
2073       } else { /* (diag == 0.0) */
2074         baij->ilen[row/bs] = 0;
2075       } /* end (diag == 0.0) */
2076     } else { /* (sizes[i] != bs) */
2077 #if defined(PETSC_USE_DEBUG)
2078       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2079 #endif
2080       for (k=0; k<count; k++) {
2081         aa[0] =  zero;
2082         aa   += bs;
2083       }
2084       if (diag != (PetscScalar)0.0) {
2085         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2086       }
2087     }
2088   }
2089 
2090   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2091   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2096 {
2097   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2098   PetscErrorCode    ierr;
2099   PetscInt          i,j,k,count;
2100   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2101   PetscScalar       zero = 0.0;
2102   MatScalar         *aa;
2103   const PetscScalar *xx;
2104   PetscScalar       *bb;
2105   PetscBool         *zeroed,vecs = PETSC_FALSE;
2106 
2107   PetscFunctionBegin;
2108   /* fix right hand side if needed */
2109   if (x && b) {
2110     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2111     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2112     vecs = PETSC_TRUE;
2113   }
2114 
2115   /* zero the columns */
2116   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2117   for (i=0; i<is_n; i++) {
2118     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2119     zeroed[is_idx[i]] = PETSC_TRUE;
2120   }
2121   for (i=0; i<A->rmap->N; i++) {
2122     if (!zeroed[i]) {
2123       row = i/bs;
2124       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2125         for (k=0; k<bs; k++) {
2126           col = bs*baij->j[j] + k;
2127           if (zeroed[col]) {
2128             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2129             if (vecs) bb[i] -= aa[0]*xx[col];
2130             aa[0] = 0.0;
2131           }
2132         }
2133       }
2134     } else if (vecs) bb[i] = diag*xx[i];
2135   }
2136   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2137   if (vecs) {
2138     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2139     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2140   }
2141 
2142   /* zero the rows */
2143   for (i=0; i<is_n; i++) {
2144     row   = is_idx[i];
2145     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2146     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2147     for (k=0; k<count; k++) {
2148       aa[0] =  zero;
2149       aa   += bs;
2150     }
2151     if (diag != (PetscScalar)0.0) {
2152       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2153     }
2154   }
2155   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2160 {
2161   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2162   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2163   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2164   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2165   PetscErrorCode ierr;
2166   PetscInt       ridx,cidx,bs2=a->bs2;
2167   PetscBool      roworiented=a->roworiented;
2168   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2169 
2170   PetscFunctionBegin;
2171   for (k=0; k<m; k++) { /* loop over added rows */
2172     row  = im[k];
2173     brow = row/bs;
2174     if (row < 0) continue;
2175 #if defined(PETSC_USE_DEBUG)
2176     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2177 #endif
2178     rp   = aj + ai[brow];
2179     if (!A->structure_only) ap = aa + bs2*ai[brow];
2180     rmax = imax[brow];
2181     nrow = ailen[brow];
2182     low  = 0;
2183     high = nrow;
2184     for (l=0; l<n; l++) { /* loop over added columns */
2185       if (in[l] < 0) continue;
2186 #if defined(PETSC_USE_DEBUG)
2187       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2188 #endif
2189       col  = in[l]; bcol = col/bs;
2190       ridx = row % bs; cidx = col % bs;
2191       if (!A->structure_only) {
2192         if (roworiented) {
2193           value = v[l + k*n];
2194         } else {
2195           value = v[k + l*m];
2196         }
2197       }
2198       if (col <= lastcol) low = 0; else high = nrow;
2199       lastcol = col;
2200       while (high-low > 7) {
2201         t = (low+high)/2;
2202         if (rp[t] > bcol) high = t;
2203         else              low  = t;
2204       }
2205       for (i=low; i<high; i++) {
2206         if (rp[i] > bcol) break;
2207         if (rp[i] == bcol) {
2208           bap = ap +  bs2*i + bs*cidx + ridx;
2209           if (!A->structure_only) {
2210             if (is == ADD_VALUES) *bap += value;
2211             else                  *bap  = value;
2212           }
2213           goto noinsert1;
2214         }
2215       }
2216       if (nonew == 1) goto noinsert1;
2217       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2218       if (A->structure_only) {
2219         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2220       } else {
2221         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2222       }
2223       N = nrow++ - 1; high++;
2224       /* shift up all the later entries in this row */
2225       for (ii=N; ii>=i; ii--) {
2226         rp[ii+1] = rp[ii];
2227         if (!A->structure_only) {
2228           ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2229         }
2230       }
2231       if (N>=i && !A->structure_only) {
2232         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2233       }
2234       rp[i]                      = bcol;
2235       if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2236       a->nz++;
2237       A->nonzerostate++;
2238 noinsert1:;
2239       low = i;
2240     }
2241     ailen[brow] = nrow;
2242   }
2243   PetscFunctionReturn(0);
2244 }
2245 
2246 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2247 {
2248   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2249   Mat            outA;
2250   PetscErrorCode ierr;
2251   PetscBool      row_identity,col_identity;
2252 
2253   PetscFunctionBegin;
2254   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2255   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2256   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2257   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2258 
2259   outA            = inA;
2260   inA->factortype = MAT_FACTOR_LU;
2261   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2262   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2263 
2264   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2265 
2266   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2267   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2268   a->row = row;
2269   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2270   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2271   a->col = col;
2272 
2273   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2274   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2275   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2276   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2277 
2278   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2279   if (!a->solve_work) {
2280     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2281     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2282   }
2283   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2288 {
2289   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2290   PetscInt    i,nz,mbs;
2291 
2292   PetscFunctionBegin;
2293   nz  = baij->maxnz;
2294   mbs = baij->mbs;
2295   for (i=0; i<nz; i++) {
2296     baij->j[i] = indices[i];
2297   }
2298   baij->nz = nz;
2299   for (i=0; i<mbs; i++) {
2300     baij->ilen[i] = baij->imax[i];
2301   }
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 /*@
2306     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2307        in the matrix.
2308 
2309   Input Parameters:
2310 +  mat - the SeqBAIJ matrix
2311 -  indices - the column indices
2312 
2313   Level: advanced
2314 
2315   Notes:
2316     This can be called if you have precomputed the nonzero structure of the
2317   matrix and want to provide it to the matrix object to improve the performance
2318   of the MatSetValues() operation.
2319 
2320     You MUST have set the correct numbers of nonzeros per row in the call to
2321   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2322 
2323     MUST be called before any calls to MatSetValues();
2324 
2325 @*/
2326 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2327 {
2328   PetscErrorCode ierr;
2329 
2330   PetscFunctionBegin;
2331   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2332   PetscValidPointer(indices,2);
2333   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2338 {
2339   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2340   PetscErrorCode ierr;
2341   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2342   PetscReal      atmp;
2343   PetscScalar    *x,zero = 0.0;
2344   MatScalar      *aa;
2345   PetscInt       ncols,brow,krow,kcol;
2346 
2347   PetscFunctionBegin;
2348   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2349   bs  = A->rmap->bs;
2350   aa  = a->a;
2351   ai  = a->i;
2352   aj  = a->j;
2353   mbs = a->mbs;
2354 
2355   ierr = VecSet(v,zero);CHKERRQ(ierr);
2356   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2357   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2358   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2359   for (i=0; i<mbs; i++) {
2360     ncols = ai[1] - ai[0]; ai++;
2361     brow  = bs*i;
2362     for (j=0; j<ncols; j++) {
2363       for (kcol=0; kcol<bs; kcol++) {
2364         for (krow=0; krow<bs; krow++) {
2365           atmp = PetscAbsScalar(*aa);aa++;
2366           row  = brow + krow;   /* row index */
2367           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2368         }
2369       }
2370       aj++;
2371     }
2372   }
2373   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2378 {
2379   PetscErrorCode ierr;
2380 
2381   PetscFunctionBegin;
2382   /* If the two matrices have the same copy implementation, use fast copy. */
2383   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2384     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2385     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2386     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2387 
2388     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2389     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2390     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2391     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2392   } else {
2393     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2394   }
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2399 {
2400   PetscErrorCode ierr;
2401 
2402   PetscFunctionBegin;
2403   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2408 {
2409   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2410 
2411   PetscFunctionBegin;
2412   *array = a->a;
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2417 {
2418   PetscFunctionBegin;
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2423 {
2424   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2425   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2426   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2427   PetscErrorCode ierr;
2428 
2429   PetscFunctionBegin;
2430   /* Set the number of nonzeros in the new matrix */
2431   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2436 {
2437   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2438   PetscErrorCode ierr;
2439   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2440   PetscBLASInt   one=1;
2441 
2442   PetscFunctionBegin;
2443   if (str == SAME_NONZERO_PATTERN) {
2444     PetscScalar  alpha = a;
2445     PetscBLASInt bnz;
2446     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2447     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2448     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2449   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2450     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2451   } else {
2452     Mat      B;
2453     PetscInt *nnz;
2454     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2455     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2456     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2457     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2458     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2459     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2460     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2461     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2462     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2463     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2464     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2465     ierr = PetscFree(nnz);CHKERRQ(ierr);
2466   }
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2471 {
2472   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2473   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2474   MatScalar   *aa = a->a;
2475 
2476   PetscFunctionBegin;
2477   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2482 {
2483   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2484   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2485   MatScalar   *aa = a->a;
2486 
2487   PetscFunctionBegin;
2488   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 /*
2493     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2494 */
2495 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2496 {
2497   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2498   PetscErrorCode ierr;
2499   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2500   PetscInt       nz = a->i[m],row,*jj,mr,col;
2501 
2502   PetscFunctionBegin;
2503   *nn = n;
2504   if (!ia) PetscFunctionReturn(0);
2505   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2506   else {
2507     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2508     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2509     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2510     jj   = a->j;
2511     for (i=0; i<nz; i++) {
2512       collengths[jj[i]]++;
2513     }
2514     cia[0] = oshift;
2515     for (i=0; i<n; i++) {
2516       cia[i+1] = cia[i] + collengths[i];
2517     }
2518     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2519     jj   = a->j;
2520     for (row=0; row<m; row++) {
2521       mr = a->i[row+1] - a->i[row];
2522       for (i=0; i<mr; i++) {
2523         col = *jj++;
2524 
2525         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2526       }
2527     }
2528     ierr = PetscFree(collengths);CHKERRQ(ierr);
2529     *ia  = cia; *ja = cja;
2530   }
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2535 {
2536   PetscErrorCode ierr;
2537 
2538   PetscFunctionBegin;
2539   if (!ia) PetscFunctionReturn(0);
2540   ierr = PetscFree(*ia);CHKERRQ(ierr);
2541   ierr = PetscFree(*ja);CHKERRQ(ierr);
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 /*
2546  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2547  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2548  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2549  */
2550 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2551 {
2552   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2553   PetscErrorCode ierr;
2554   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2555   PetscInt       nz = a->i[m],row,*jj,mr,col;
2556   PetscInt       *cspidx;
2557 
2558   PetscFunctionBegin;
2559   *nn = n;
2560   if (!ia) PetscFunctionReturn(0);
2561 
2562   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2563   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2564   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2565   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2566   jj   = a->j;
2567   for (i=0; i<nz; i++) {
2568     collengths[jj[i]]++;
2569   }
2570   cia[0] = oshift;
2571   for (i=0; i<n; i++) {
2572     cia[i+1] = cia[i] + collengths[i];
2573   }
2574   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2575   jj   = a->j;
2576   for (row=0; row<m; row++) {
2577     mr = a->i[row+1] - a->i[row];
2578     for (i=0; i<mr; i++) {
2579       col = *jj++;
2580       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2581       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2582     }
2583   }
2584   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2585   *ia    = cia; *ja = cja;
2586   *spidx = cspidx;
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2591 {
2592   PetscErrorCode ierr;
2593 
2594   PetscFunctionBegin;
2595   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2596   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2597   PetscFunctionReturn(0);
2598 }
2599 
2600 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2601 {
2602   PetscErrorCode ierr;
2603   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2604 
2605   PetscFunctionBegin;
2606   if (!Y->preallocated || !aij->nz) {
2607     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2608   }
2609   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 /* -------------------------------------------------------------------*/
2614 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2615                                        MatGetRow_SeqBAIJ,
2616                                        MatRestoreRow_SeqBAIJ,
2617                                        MatMult_SeqBAIJ_N,
2618                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2619                                        MatMultTranspose_SeqBAIJ,
2620                                        MatMultTransposeAdd_SeqBAIJ,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                /* 10*/ 0,
2625                                        MatLUFactor_SeqBAIJ,
2626                                        0,
2627                                        0,
2628                                        MatTranspose_SeqBAIJ,
2629                                /* 15*/ MatGetInfo_SeqBAIJ,
2630                                        MatEqual_SeqBAIJ,
2631                                        MatGetDiagonal_SeqBAIJ,
2632                                        MatDiagonalScale_SeqBAIJ,
2633                                        MatNorm_SeqBAIJ,
2634                                /* 20*/ 0,
2635                                        MatAssemblyEnd_SeqBAIJ,
2636                                        MatSetOption_SeqBAIJ,
2637                                        MatZeroEntries_SeqBAIJ,
2638                                /* 24*/ MatZeroRows_SeqBAIJ,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                /* 29*/ MatSetUp_SeqBAIJ,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                /* 34*/ MatDuplicate_SeqBAIJ,
2649                                        0,
2650                                        0,
2651                                        MatILUFactor_SeqBAIJ,
2652                                        0,
2653                                /* 39*/ MatAXPY_SeqBAIJ,
2654                                        MatCreateSubMatrices_SeqBAIJ,
2655                                        MatIncreaseOverlap_SeqBAIJ,
2656                                        MatGetValues_SeqBAIJ,
2657                                        MatCopy_SeqBAIJ,
2658                                /* 44*/ 0,
2659                                        MatScale_SeqBAIJ,
2660                                        MatShift_SeqBAIJ,
2661                                        0,
2662                                        MatZeroRowsColumns_SeqBAIJ,
2663                                /* 49*/ 0,
2664                                        MatGetRowIJ_SeqBAIJ,
2665                                        MatRestoreRowIJ_SeqBAIJ,
2666                                        MatGetColumnIJ_SeqBAIJ,
2667                                        MatRestoreColumnIJ_SeqBAIJ,
2668                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                        MatSetValuesBlocked_SeqBAIJ,
2673                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2674                                        MatDestroy_SeqBAIJ,
2675                                        MatView_SeqBAIJ,
2676                                        0,
2677                                        0,
2678                                /* 64*/ 0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2684                                        0,
2685                                        MatConvert_Basic,
2686                                        0,
2687                                        0,
2688                                /* 74*/ 0,
2689                                        MatFDColoringApply_BAIJ,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                /* 79*/ 0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                        MatLoad_SeqBAIJ,
2698                                /* 84*/ 0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                /* 89*/ 0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                /* 94*/ 0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                        0,
2713                                /* 99*/ 0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                /*104*/ 0,
2719                                        MatRealPart_SeqBAIJ,
2720                                        MatImaginaryPart_SeqBAIJ,
2721                                        0,
2722                                        0,
2723                                /*109*/ 0,
2724                                        0,
2725                                        0,
2726                                        0,
2727                                        MatMissingDiagonal_SeqBAIJ,
2728                                /*114*/ 0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                        0,
2733                                /*119*/ 0,
2734                                        0,
2735                                        MatMultHermitianTranspose_SeqBAIJ,
2736                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2737                                        0,
2738                                /*124*/ 0,
2739                                        0,
2740                                        MatInvertBlockDiagonal_SeqBAIJ,
2741                                        0,
2742                                        0,
2743                                /*129*/ 0,
2744                                        0,
2745                                        0,
2746                                        0,
2747                                        0,
2748                                /*134*/ 0,
2749                                        0,
2750                                        0,
2751                                        0,
2752                                        0,
2753                                /*139*/ MatSetBlockSizes_Default,
2754                                        0,
2755                                        0,
2756                                        MatFDColoringSetUp_SeqXAIJ,
2757                                        0,
2758                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2759 };
2760 
2761 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2762 {
2763   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2764   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2765   PetscErrorCode ierr;
2766 
2767   PetscFunctionBegin;
2768   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2769 
2770   /* allocate space for values if not already there */
2771   if (!aij->saved_values) {
2772     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2773     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2774   }
2775 
2776   /* copy values over */
2777   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2782 {
2783   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2784   PetscErrorCode ierr;
2785   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2786 
2787   PetscFunctionBegin;
2788   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2789   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2790 
2791   /* copy values over */
2792   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2793   PetscFunctionReturn(0);
2794 }
2795 
2796 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2797 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2798 
2799 static PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2800 {
2801   Mat_SeqBAIJ    *b;
2802   PetscErrorCode ierr;
2803   PetscInt       i,mbs,nbs,bs2;
2804   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2805 
2806   PetscFunctionBegin;
2807   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2808   if (nz == MAT_SKIP_ALLOCATION) {
2809     skipallocation = PETSC_TRUE;
2810     nz             = 0;
2811   }
2812 
2813   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2814   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2815   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2816   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2817 
2818   B->preallocated = PETSC_TRUE;
2819 
2820   mbs = B->rmap->n/bs;
2821   nbs = B->cmap->n/bs;
2822   bs2 = bs*bs;
2823 
2824   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2825 
2826   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2827   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2828   if (nnz) {
2829     for (i=0; i<mbs; i++) {
2830       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2831       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2832     }
2833   }
2834 
2835   b    = (Mat_SeqBAIJ*)B->data;
2836   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2837   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2838   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2839 
2840   if (!flg) {
2841     switch (bs) {
2842     case 1:
2843       B->ops->mult    = MatMult_SeqBAIJ_1;
2844       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2845       break;
2846     case 2:
2847       B->ops->mult    = MatMult_SeqBAIJ_2;
2848       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2849       break;
2850     case 3:
2851       B->ops->mult    = MatMult_SeqBAIJ_3;
2852       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2853       break;
2854     case 4:
2855       B->ops->mult    = MatMult_SeqBAIJ_4;
2856       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2857       break;
2858     case 5:
2859       B->ops->mult    = MatMult_SeqBAIJ_5;
2860       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2861       break;
2862     case 6:
2863       B->ops->mult    = MatMult_SeqBAIJ_6;
2864       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2865       break;
2866     case 7:
2867       B->ops->mult    = MatMult_SeqBAIJ_7;
2868       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2869       break;
2870     case 11:
2871       B->ops->mult    = MatMult_SeqBAIJ_11;
2872       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2873       break;
2874     case 15:
2875       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2876       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2877       break;
2878     default:
2879       B->ops->mult    = MatMult_SeqBAIJ_N;
2880       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2881       break;
2882     }
2883   }
2884   B->ops->sor = MatSOR_SeqBAIJ;
2885   b->mbs = mbs;
2886   b->nbs = nbs;
2887   if (!skipallocation) {
2888     if (!b->imax) {
2889       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2890       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2891 
2892       b->free_imax_ilen = PETSC_TRUE;
2893     }
2894     /* b->ilen will count nonzeros in each block row so far. */
2895     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2896     if (!nnz) {
2897       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2898       else if (nz < 0) nz = 1;
2899       for (i=0; i<mbs; i++) b->imax[i] = nz;
2900       nz = nz*mbs;
2901     } else {
2902       nz = 0;
2903       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2904     }
2905 
2906     /* allocate the matrix space */
2907     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2908     if (B->structure_only) {
2909       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2910       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2911       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2912     } else {
2913       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2914       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2915       ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2916     }
2917     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2918 
2919     if (B->structure_only) {
2920       b->singlemalloc = PETSC_FALSE;
2921       b->free_a       = PETSC_FALSE;
2922     } else {
2923       b->singlemalloc = PETSC_TRUE;
2924       b->free_a       = PETSC_TRUE;
2925     }
2926     b->free_ij = PETSC_TRUE;
2927 
2928     b->i[0] = 0;
2929     for (i=1; i<mbs+1; i++) {
2930       b->i[i] = b->i[i-1] + b->imax[i-1];
2931     }
2932 
2933   } else {
2934     b->free_a  = PETSC_FALSE;
2935     b->free_ij = PETSC_FALSE;
2936   }
2937 
2938   b->bs2              = bs2;
2939   b->mbs              = mbs;
2940   b->nz               = 0;
2941   b->maxnz            = nz;
2942   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2943   B->was_assembled    = PETSC_FALSE;
2944   B->assembled        = PETSC_FALSE;
2945   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2950 {
2951   PetscInt       i,m,nz,nz_max=0,*nnz;
2952   PetscScalar    *values=0;
2953   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2954   PetscErrorCode ierr;
2955 
2956   PetscFunctionBegin;
2957   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2958   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2959   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2960   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2961   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2962   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2963   m    = B->rmap->n/bs;
2964 
2965   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2966   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2967   for (i=0; i<m; i++) {
2968     nz = ii[i+1]- ii[i];
2969     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2970     nz_max = PetscMax(nz_max, nz);
2971     nnz[i] = nz;
2972   }
2973   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2974   ierr = PetscFree(nnz);CHKERRQ(ierr);
2975 
2976   values = (PetscScalar*)V;
2977   if (!values) {
2978     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2979   }
2980   for (i=0; i<m; i++) {
2981     PetscInt          ncols  = ii[i+1] - ii[i];
2982     const PetscInt    *icols = jj + ii[i];
2983     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2984     if (!roworiented) {
2985       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2986     } else {
2987       PetscInt j;
2988       for (j=0; j<ncols; j++) {
2989         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2990         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2991       }
2992     }
2993   }
2994   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2995   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2996   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2997   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2998   PetscFunctionReturn(0);
2999 }
3000 
3001 /*MC
3002    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3003    block sparse compressed row format.
3004 
3005    Options Database Keys:
3006 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3007 
3008   Level: beginner
3009 
3010 .seealso: MatCreateSeqBAIJ()
3011 M*/
3012 
3013 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3014 
3015 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3016 {
3017   PetscErrorCode ierr;
3018   PetscMPIInt    size;
3019   Mat_SeqBAIJ    *b;
3020 
3021   PetscFunctionBegin;
3022   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3023   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3024 
3025   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3026   B->data = (void*)b;
3027   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3028 
3029   b->row          = 0;
3030   b->col          = 0;
3031   b->icol         = 0;
3032   b->reallocs     = 0;
3033   b->saved_values = 0;
3034 
3035   b->roworiented        = PETSC_TRUE;
3036   b->nonew              = 0;
3037   b->diag               = 0;
3038   B->spptr              = 0;
3039   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3040   b->keepnonzeropattern = PETSC_FALSE;
3041 
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3051 #if defined(PETSC_HAVE_HYPRE)
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3053 #endif
3054   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3055   PetscFunctionReturn(0);
3056 }
3057 
3058 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3059 {
3060   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3061   PetscErrorCode ierr;
3062   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3063 
3064   PetscFunctionBegin;
3065   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3066 
3067   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3068     c->imax           = a->imax;
3069     c->ilen           = a->ilen;
3070     c->free_imax_ilen = PETSC_FALSE;
3071   } else {
3072     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3073     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3074     for (i=0; i<mbs; i++) {
3075       c->imax[i] = a->imax[i];
3076       c->ilen[i] = a->ilen[i];
3077     }
3078     c->free_imax_ilen = PETSC_TRUE;
3079   }
3080 
3081   /* allocate the matrix space */
3082   if (mallocmatspace) {
3083     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3084       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3085       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3086 
3087       c->i            = a->i;
3088       c->j            = a->j;
3089       c->singlemalloc = PETSC_FALSE;
3090       c->free_a       = PETSC_TRUE;
3091       c->free_ij      = PETSC_FALSE;
3092       c->parent       = A;
3093       C->preallocated = PETSC_TRUE;
3094       C->assembled    = PETSC_TRUE;
3095 
3096       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3097       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3098       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3099     } else {
3100       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3101       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3102 
3103       c->singlemalloc = PETSC_TRUE;
3104       c->free_a       = PETSC_TRUE;
3105       c->free_ij      = PETSC_TRUE;
3106 
3107       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3108       if (mbs > 0) {
3109         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3110         if (cpvalues == MAT_COPY_VALUES) {
3111           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3112         } else {
3113           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3114         }
3115       }
3116       C->preallocated = PETSC_TRUE;
3117       C->assembled    = PETSC_TRUE;
3118     }
3119   }
3120 
3121   c->roworiented = a->roworiented;
3122   c->nonew       = a->nonew;
3123 
3124   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3125   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3126 
3127   c->bs2         = a->bs2;
3128   c->mbs         = a->mbs;
3129   c->nbs         = a->nbs;
3130 
3131   if (a->diag) {
3132     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3133       c->diag      = a->diag;
3134       c->free_diag = PETSC_FALSE;
3135     } else {
3136       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3137       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3138       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3139       c->free_diag = PETSC_TRUE;
3140     }
3141   } else c->diag = 0;
3142 
3143   c->nz         = a->nz;
3144   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3145   c->solve_work = NULL;
3146   c->mult_work  = NULL;
3147   c->sor_workt  = NULL;
3148   c->sor_work   = NULL;
3149 
3150   c->compressedrow.use   = a->compressedrow.use;
3151   c->compressedrow.nrows = a->compressedrow.nrows;
3152   if (a->compressedrow.use) {
3153     i    = a->compressedrow.nrows;
3154     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3155     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3156     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3157     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3158   } else {
3159     c->compressedrow.use    = PETSC_FALSE;
3160     c->compressedrow.i      = NULL;
3161     c->compressedrow.rindex = NULL;
3162   }
3163   C->nonzerostate = A->nonzerostate;
3164 
3165   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3166   PetscFunctionReturn(0);
3167 }
3168 
3169 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3170 {
3171   PetscErrorCode ierr;
3172 
3173   PetscFunctionBegin;
3174   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3175   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3176   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3177   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3178   PetscFunctionReturn(0);
3179 }
3180 
3181 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3182 {
3183   Mat_SeqBAIJ    *a;
3184   PetscErrorCode ierr;
3185   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3186   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3187   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3188   PetscInt       *masked,nmask,tmp,bs2,ishift;
3189   PetscMPIInt    size;
3190   int            fd;
3191   PetscScalar    *aa;
3192   MPI_Comm       comm;
3193 
3194   PetscFunctionBegin;
3195   /* force binary viewer to load .info file if it has not yet done so */
3196   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3197   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3198   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3199   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3200   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3201   if (bs < 0) bs = 1;
3202   bs2  = bs*bs;
3203 
3204   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3205   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3206   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3207   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3208   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3209   M = header[1]; N = header[2]; nz = header[3];
3210 
3211   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3212   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3213 
3214   /*
3215      This code adds extra rows to make sure the number of rows is
3216     divisible by the blocksize
3217   */
3218   mbs        = M/bs;
3219   extra_rows = bs - M + bs*(mbs);
3220   if (extra_rows == bs) extra_rows = 0;
3221   else mbs++;
3222   if (extra_rows) {
3223     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3224   }
3225 
3226   /* Set global sizes if not already set */
3227   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3228     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3229   } else { /* Check if the matrix global sizes are correct */
3230     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3231     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3232       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3233     }
3234     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3235   }
3236 
3237   /* read in row lengths */
3238   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3239   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3240   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3241 
3242   /* read in column indices */
3243   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3244   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3245   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3246 
3247   /* loop over row lengths determining block row lengths */
3248   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3249   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3250   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3251   rowcount = 0;
3252   nzcount  = 0;
3253   for (i=0; i<mbs; i++) {
3254     nmask = 0;
3255     for (j=0; j<bs; j++) {
3256       kmax = rowlengths[rowcount];
3257       for (k=0; k<kmax; k++) {
3258         tmp = jj[nzcount++]/bs;
3259         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3260       }
3261       rowcount++;
3262     }
3263     browlengths[i] += nmask;
3264     /* zero out the mask elements we set */
3265     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3266   }
3267 
3268   /* Do preallocation  */
3269   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3270   a    = (Mat_SeqBAIJ*)newmat->data;
3271 
3272   /* set matrix "i" values */
3273   a->i[0] = 0;
3274   for (i=1; i<= mbs; i++) {
3275     a->i[i]      = a->i[i-1] + browlengths[i-1];
3276     a->ilen[i-1] = browlengths[i-1];
3277   }
3278   a->nz = 0;
3279   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3280 
3281   /* read in nonzero values */
3282   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3283   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3284   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3285 
3286   /* set "a" and "j" values into matrix */
3287   nzcount = 0; jcount = 0;
3288   for (i=0; i<mbs; i++) {
3289     nzcountb = nzcount;
3290     nmask    = 0;
3291     for (j=0; j<bs; j++) {
3292       kmax = rowlengths[i*bs+j];
3293       for (k=0; k<kmax; k++) {
3294         tmp = jj[nzcount++]/bs;
3295         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3296       }
3297     }
3298     /* sort the masked values */
3299     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3300 
3301     /* set "j" values into matrix */
3302     maskcount = 1;
3303     for (j=0; j<nmask; j++) {
3304       a->j[jcount++]  = masked[j];
3305       mask[masked[j]] = maskcount++;
3306     }
3307     /* set "a" values into matrix */
3308     ishift = bs2*a->i[i];
3309     for (j=0; j<bs; j++) {
3310       kmax = rowlengths[i*bs+j];
3311       for (k=0; k<kmax; k++) {
3312         tmp       = jj[nzcountb]/bs;
3313         block     = mask[tmp] - 1;
3314         point     = jj[nzcountb] - bs*tmp;
3315         idx       = ishift + bs2*block + j + bs*point;
3316         a->a[idx] = (MatScalar)aa[nzcountb++];
3317       }
3318     }
3319     /* zero out the mask elements we set */
3320     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3321   }
3322   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3323 
3324   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3325   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3326   ierr = PetscFree(aa);CHKERRQ(ierr);
3327   ierr = PetscFree(jj);CHKERRQ(ierr);
3328   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3329 
3330   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3331   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3332   PetscFunctionReturn(0);
3333 }
3334 
3335 /*@C
3336    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3337    compressed row) format.  For good matrix assembly performance the
3338    user should preallocate the matrix storage by setting the parameter nz
3339    (or the array nnz).  By setting these parameters accurately, performance
3340    during matrix assembly can be increased by more than a factor of 50.
3341 
3342    Collective on MPI_Comm
3343 
3344    Input Parameters:
3345 +  comm - MPI communicator, set to PETSC_COMM_SELF
3346 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3347           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3348 .  m - number of rows
3349 .  n - number of columns
3350 .  nz - number of nonzero blocks  per block row (same for all rows)
3351 -  nnz - array containing the number of nonzero blocks in the various block rows
3352          (possibly different for each block row) or NULL
3353 
3354    Output Parameter:
3355 .  A - the matrix
3356 
3357    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3358    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3359    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3360 
3361    Options Database Keys:
3362 .   -mat_no_unroll - uses code that does not unroll the loops in the
3363                      block calculations (much slower)
3364 .    -mat_block_size - size of the blocks to use
3365 
3366    Level: intermediate
3367 
3368    Notes:
3369    The number of rows and columns must be divisible by blocksize.
3370 
3371    If the nnz parameter is given then the nz parameter is ignored
3372 
3373    A nonzero block is any block that as 1 or more nonzeros in it
3374 
3375    The block AIJ format is fully compatible with standard Fortran 77
3376    storage.  That is, the stored row and column indices can begin at
3377    either one (as in Fortran) or zero.  See the users' manual for details.
3378 
3379    Specify the preallocated storage with either nz or nnz (not both).
3380    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3381    allocation.  See Users-Manual: ch_mat for details.
3382    matrices.
3383 
3384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3385 @*/
3386 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3387 {
3388   PetscErrorCode ierr;
3389 
3390   PetscFunctionBegin;
3391   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3392   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3393   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3394   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3395   PetscFunctionReturn(0);
3396 }
3397 
3398 /*@C
3399    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3400    per row in the matrix. For good matrix assembly performance the
3401    user should preallocate the matrix storage by setting the parameter nz
3402    (or the array nnz).  By setting these parameters accurately, performance
3403    during matrix assembly can be increased by more than a factor of 50.
3404 
3405    Collective on MPI_Comm
3406 
3407    Input Parameters:
3408 +  B - the matrix
3409 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3410           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3411 .  nz - number of block nonzeros per block row (same for all rows)
3412 -  nnz - array containing the number of block nonzeros in the various block rows
3413          (possibly different for each block row) or NULL
3414 
3415    Options Database Keys:
3416 .   -mat_no_unroll - uses code that does not unroll the loops in the
3417                      block calculations (much slower)
3418 .   -mat_block_size - size of the blocks to use
3419 
3420    Level: intermediate
3421 
3422    Notes:
3423    If the nnz parameter is given then the nz parameter is ignored
3424 
3425    You can call MatGetInfo() to get information on how effective the preallocation was;
3426    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3427    You can also run with the option -info and look for messages with the string
3428    malloc in them to see if additional memory allocation was needed.
3429 
3430    The block AIJ format is fully compatible with standard Fortran 77
3431    storage.  That is, the stored row and column indices can begin at
3432    either one (as in Fortran) or zero.  See the users' manual for details.
3433 
3434    Specify the preallocated storage with either nz or nnz (not both).
3435    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3436    allocation.  See Users-Manual: ch_mat for details.
3437 
3438 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3439 @*/
3440 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3441 {
3442   PetscErrorCode ierr;
3443 
3444   PetscFunctionBegin;
3445   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3446   PetscValidType(B,1);
3447   PetscValidLogicalCollectiveInt(B,bs,2);
3448   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3449   PetscFunctionReturn(0);
3450 }
3451 
3452 /*@C
3453    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3454    (the default sequential PETSc format).
3455 
3456    Collective on MPI_Comm
3457 
3458    Input Parameters:
3459 +  B - the matrix
3460 .  i - the indices into j for the start of each local row (starts with zero)
3461 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3462 -  v - optional values in the matrix
3463 
3464    Level: developer
3465 
3466    Notes:
3467    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3468    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3469    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3470    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3471    block column and the second index is over columns within a block.
3472 
3473 .keywords: matrix, aij, compressed row, sparse
3474 
3475 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3476 @*/
3477 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3478 {
3479   PetscErrorCode ierr;
3480 
3481   PetscFunctionBegin;
3482   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3483   PetscValidType(B,1);
3484   PetscValidLogicalCollectiveInt(B,bs,2);
3485   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3486   PetscFunctionReturn(0);
3487 }
3488 
3489 
3490 /*@
3491      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3492 
3493      Collective on MPI_Comm
3494 
3495    Input Parameters:
3496 +  comm - must be an MPI communicator of size 1
3497 .  bs - size of block
3498 .  m - number of rows
3499 .  n - number of columns
3500 .  i - row indices
3501 .  j - column indices
3502 -  a - matrix values
3503 
3504    Output Parameter:
3505 .  mat - the matrix
3506 
3507    Level: advanced
3508 
3509    Notes:
3510        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3511     once the matrix is destroyed
3512 
3513        You cannot set new nonzero locations into this matrix, that will generate an error.
3514 
3515        The i and j indices are 0 based
3516 
3517        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3518 
3519       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3520       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3521       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3522       with column-major ordering within blocks.
3523 
3524 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3525 
3526 @*/
3527 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3528 {
3529   PetscErrorCode ierr;
3530   PetscInt       ii;
3531   Mat_SeqBAIJ    *baij;
3532 
3533   PetscFunctionBegin;
3534   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3535   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3536 
3537   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3538   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3539   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3540   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3541   baij = (Mat_SeqBAIJ*)(*mat)->data;
3542   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3543   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3544 
3545   baij->i = i;
3546   baij->j = j;
3547   baij->a = a;
3548 
3549   baij->singlemalloc = PETSC_FALSE;
3550   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3551   baij->free_a       = PETSC_FALSE;
3552   baij->free_ij      = PETSC_FALSE;
3553 
3554   for (ii=0; ii<m; ii++) {
3555     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3556 #if defined(PETSC_USE_DEBUG)
3557     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3558 #endif
3559   }
3560 #if defined(PETSC_USE_DEBUG)
3561   for (ii=0; ii<baij->i[m]; ii++) {
3562     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3563     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3564   }
3565 #endif
3566 
3567   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3568   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3569   PetscFunctionReturn(0);
3570 }
3571 
3572 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3573 {
3574   PetscErrorCode ierr;
3575   PetscMPIInt    size;
3576 
3577   PetscFunctionBegin;
3578   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3579   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3580     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3581   } else {
3582     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3583   }
3584   PetscFunctionReturn(0);
3585 }
3586