xref: /petsc/src/mat/impls/baij/seq/baij.c (revision a9f05ab01b48311f1a4f4a3d36b06eef45c4952f)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
16 {
17   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
18   PetscErrorCode ierr;
19   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
20   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
21   PetscReal      shift = 0.0;
22   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
23 
24   PetscFunctionBegin;
25   allowzeropivot = PetscNot(A->erroriffailure);
26 
27   if (a->idiagvalid) {
28     if (values) *values = a->idiag;
29     PetscFunctionReturn(0);
30   }
31   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
32   diag_offset = a->diag;
33   if (!a->idiag) {
34     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
35     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
36   }
37   diag  = a->idiag;
38   mdiag = a->idiag+bs2*mbs;
39   if (values) *values = a->idiag;
40   /* factor and invert each block */
41   switch (bs) {
42   case 1:
43     for (i=0; i<mbs; i++) {
44       odiag    = v + 1*diag_offset[i];
45       diag[0]  = odiag[0];
46       mdiag[0] = odiag[0];
47 
48       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
49         if (allowzeropivot) {
50           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
51           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
52           A->factorerror_zeropivot_row   = i;
53           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
54         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
55       }
56 
57       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
58       diag    += 1;
59       mdiag   += 1;
60     }
61     break;
62   case 2:
63     for (i=0; i<mbs; i++) {
64       odiag    = v + 4*diag_offset[i];
65       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
66       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
67       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
68       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
69       diag    += 4;
70       mdiag   += 4;
71     }
72     break;
73   case 3:
74     for (i=0; i<mbs; i++) {
75       odiag    = v + 9*diag_offset[i];
76       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
78       diag[8]  = odiag[8];
79       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
80       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
81       mdiag[8] = odiag[8];
82       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
83       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
84       diag    += 9;
85       mdiag   += 9;
86     }
87     break;
88   case 4:
89     for (i=0; i<mbs; i++) {
90       odiag  = v + 16*diag_offset[i];
91       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
94       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
95       diag  += 16;
96       mdiag += 16;
97     }
98     break;
99   case 5:
100     for (i=0; i<mbs; i++) {
101       odiag  = v + 25*diag_offset[i];
102       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
105       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
106       diag  += 25;
107       mdiag += 25;
108     }
109     break;
110   case 6:
111     for (i=0; i<mbs; i++) {
112       odiag  = v + 36*diag_offset[i];
113       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
114       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
115       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
116       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
117       diag  += 36;
118       mdiag += 36;
119     }
120     break;
121   case 7:
122     for (i=0; i<mbs; i++) {
123       odiag  = v + 49*diag_offset[i];
124       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
125       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += 49;
129       mdiag += 49;
130     }
131     break;
132   default:
133     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
134     for (i=0; i<mbs; i++) {
135       odiag  = v + bs2*diag_offset[i];
136       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
137       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
138       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
139       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
140       diag  += bs2;
141       mdiag += bs2;
142     }
143     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
144   }
145   a->idiagvalid = PETSC_TRUE;
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
150 {
151   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
152   PetscScalar       *x,*work,*w,*workt,*t;
153   const MatScalar   *v,*aa = a->a, *idiag;
154   const PetscScalar *b,*xb;
155   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
156   PetscErrorCode    ierr;
157   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
158   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
159 
160   PetscFunctionBegin;
161   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
162   its = its*lits;
163   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
164   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
165   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
166   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
167   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
168 
169   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
170 
171   if (!m) PetscFunctionReturn(0);
172   diag  = a->diag;
173   idiag = a->idiag;
174   k    = PetscMax(A->rmap->n,A->cmap->n);
175   if (!a->mult_work) {
176     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
177   }
178   if (!a->sor_workt) {
179     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
180   }
181   if (!a->sor_work) {
182     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
183   }
184   work = a->mult_work;
185   t    = a->sor_workt;
186   w    = a->sor_work;
187 
188   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
189   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
190 
191   if (flag & SOR_ZERO_INITIAL_GUESS) {
192     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
193       switch (bs) {
194       case 1:
195         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
196         t[0] = b[0];
197         i2     = 1;
198         idiag += 1;
199         for (i=1; i<m; i++) {
200           v  = aa + ai[i];
201           vi = aj + ai[i];
202           nz = diag[i] - ai[i];
203           s[0] = b[i2];
204           for (j=0; j<nz; j++) {
205             xw[0] = x[vi[j]];
206             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
207           }
208           t[i2] = s[0];
209           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
210           x[i2]  = xw[0];
211           idiag += 1;
212           i2    += 1;
213         }
214         break;
215       case 2:
216         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
217         t[0] = b[0]; t[1] = b[1];
218         i2     = 2;
219         idiag += 4;
220         for (i=1; i<m; i++) {
221           v  = aa + 4*ai[i];
222           vi = aj + ai[i];
223           nz = diag[i] - ai[i];
224           s[0] = b[i2]; s[1] = b[i2+1];
225           for (j=0; j<nz; j++) {
226             idx = 2*vi[j];
227             it  = 4*j;
228             xw[0] = x[idx]; xw[1] = x[1+idx];
229             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
230           }
231           t[i2] = s[0]; t[i2+1] = s[1];
232           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
233           x[i2]   = xw[0]; x[i2+1] = xw[1];
234           idiag  += 4;
235           i2     += 2;
236         }
237         break;
238       case 3:
239         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
240         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
241         i2     = 3;
242         idiag += 9;
243         for (i=1; i<m; i++) {
244           v  = aa + 9*ai[i];
245           vi = aj + ai[i];
246           nz = diag[i] - ai[i];
247           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
248           while (nz--) {
249             idx = 3*(*vi++);
250             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
251             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
252             v  += 9;
253           }
254           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
255           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
256           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
257           idiag  += 9;
258           i2     += 3;
259         }
260         break;
261       case 4:
262         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
263         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
264         i2     = 4;
265         idiag += 16;
266         for (i=1; i<m; i++) {
267           v  = aa + 16*ai[i];
268           vi = aj + ai[i];
269           nz = diag[i] - ai[i];
270           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
271           while (nz--) {
272             idx = 4*(*vi++);
273             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
274             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
275             v  += 16;
276           }
277           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
278           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
279           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
280           idiag  += 16;
281           i2     += 4;
282         }
283         break;
284       case 5:
285         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
286         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
287         i2     = 5;
288         idiag += 25;
289         for (i=1; i<m; i++) {
290           v  = aa + 25*ai[i];
291           vi = aj + ai[i];
292           nz = diag[i] - ai[i];
293           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
294           while (nz--) {
295             idx = 5*(*vi++);
296             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
297             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
298             v  += 25;
299           }
300           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
301           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
302           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
303           idiag  += 25;
304           i2     += 5;
305         }
306         break;
307       case 6:
308         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
309         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
310         i2     = 6;
311         idiag += 36;
312         for (i=1; i<m; i++) {
313           v  = aa + 36*ai[i];
314           vi = aj + ai[i];
315           nz = diag[i] - ai[i];
316           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
317           while (nz--) {
318             idx = 6*(*vi++);
319             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
320             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
321             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
322             v  += 36;
323           }
324           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
325           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
326           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
327           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
328           idiag  += 36;
329           i2     += 6;
330         }
331         break;
332       case 7:
333         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
334         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
335         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
336         i2     = 7;
337         idiag += 49;
338         for (i=1; i<m; i++) {
339           v  = aa + 49*ai[i];
340           vi = aj + ai[i];
341           nz = diag[i] - ai[i];
342           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
343           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
344           while (nz--) {
345             idx = 7*(*vi++);
346             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
347             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
348             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
349             v  += 49;
350           }
351           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
352           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
353           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
354           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
355           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
356           idiag  += 49;
357           i2     += 7;
358         }
359         break;
360       default:
361         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
362         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
363         i2     = bs;
364         idiag += bs2;
365         for (i=1; i<m; i++) {
366           v  = aa + bs2*ai[i];
367           vi = aj + ai[i];
368           nz = diag[i] - ai[i];
369 
370           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
371           /* copy all rows of x that are needed into contiguous space */
372           workt = work;
373           for (j=0; j<nz; j++) {
374             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
375             workt += bs;
376           }
377           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
378           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
379           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
380 
381           idiag += bs2;
382           i2    += bs;
383         }
384         break;
385       }
386       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
387       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
388       xb = t;
389     }
390     else xb = b;
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
392       idiag = a->idiag+bs2*(a->mbs-1);
393       i2 = bs * (m-1);
394       switch (bs) {
395       case 1:
396         s[0]  = xb[i2];
397         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
398         x[i2] = xw[0];
399         i2   -= 1;
400         for (i=m-2; i>=0; i--) {
401           v  = aa + (diag[i]+1);
402           vi = aj + diag[i] + 1;
403           nz = ai[i+1] - diag[i] - 1;
404           s[0] = xb[i2];
405           for (j=0; j<nz; j++) {
406             xw[0] = x[vi[j]];
407             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
408           }
409           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
410           x[i2]  = xw[0];
411           idiag -= 1;
412           i2    -= 1;
413         }
414         break;
415       case 2:
416         s[0]  = xb[i2]; s[1] = xb[i2+1];
417         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418         x[i2] = xw[0]; x[i2+1] = xw[1];
419         i2    -= 2;
420         idiag -= 4;
421         for (i=m-2; i>=0; i--) {
422           v  = aa + 4*(diag[i] + 1);
423           vi = aj + diag[i] + 1;
424           nz = ai[i+1] - diag[i] - 1;
425           s[0] = xb[i2]; s[1] = xb[i2+1];
426           for (j=0; j<nz; j++) {
427             idx = 2*vi[j];
428             it  = 4*j;
429             xw[0] = x[idx]; xw[1] = x[1+idx];
430             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
431           }
432           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
433           x[i2]   = xw[0]; x[i2+1] = xw[1];
434           idiag  -= 4;
435           i2     -= 2;
436         }
437         break;
438       case 3:
439         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
440         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442         i2    -= 3;
443         idiag -= 9;
444         for (i=m-2; i>=0; i--) {
445           v  = aa + 9*(diag[i]+1);
446           vi = aj + diag[i] + 1;
447           nz = ai[i+1] - diag[i] - 1;
448           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
449           while (nz--) {
450             idx = 3*(*vi++);
451             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
452             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
453             v  += 9;
454           }
455           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
456           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
457           idiag  -= 9;
458           i2     -= 3;
459         }
460         break;
461       case 4:
462         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
463         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465         i2    -= 4;
466         idiag -= 16;
467         for (i=m-2; i>=0; i--) {
468           v  = aa + 16*(diag[i]+1);
469           vi = aj + diag[i] + 1;
470           nz = ai[i+1] - diag[i] - 1;
471           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
472           while (nz--) {
473             idx = 4*(*vi++);
474             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
475             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
476             v  += 16;
477           }
478           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
479           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
480           idiag  -= 16;
481           i2     -= 4;
482         }
483         break;
484       case 5:
485         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
486         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488         i2    -= 5;
489         idiag -= 25;
490         for (i=m-2; i>=0; i--) {
491           v  = aa + 25*(diag[i]+1);
492           vi = aj + diag[i] + 1;
493           nz = ai[i+1] - diag[i] - 1;
494           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
495           while (nz--) {
496             idx = 5*(*vi++);
497             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
498             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
499             v  += 25;
500           }
501           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
502           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
503           idiag  -= 25;
504           i2     -= 5;
505         }
506         break;
507       case 6:
508         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
509         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
510         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
511         i2    -= 6;
512         idiag -= 36;
513         for (i=m-2; i>=0; i--) {
514           v  = aa + 36*(diag[i]+1);
515           vi = aj + diag[i] + 1;
516           nz = ai[i+1] - diag[i] - 1;
517           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
518           while (nz--) {
519             idx = 6*(*vi++);
520             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
521             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
522             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
523             v  += 36;
524           }
525           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
526           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
527           idiag  -= 36;
528           i2     -= 6;
529         }
530         break;
531       case 7:
532         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
533         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
534         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
535         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
536         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
537         i2    -= 7;
538         idiag -= 49;
539         for (i=m-2; i>=0; i--) {
540           v  = aa + 49*(diag[i]+1);
541           vi = aj + diag[i] + 1;
542           nz = ai[i+1] - diag[i] - 1;
543           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
544           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
545           while (nz--) {
546             idx = 7*(*vi++);
547             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
548             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
549             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
550             v  += 49;
551           }
552           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
553           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
554           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
555           idiag  -= 49;
556           i2     -= 7;
557         }
558         break;
559       default:
560         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
561         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
562         i2    -= bs;
563         idiag -= bs2;
564         for (i=m-2; i>=0; i--) {
565           v  = aa + bs2*(diag[i]+1);
566           vi = aj + diag[i] + 1;
567           nz = ai[i+1] - diag[i] - 1;
568 
569           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
570           /* copy all rows of x that are needed into contiguous space */
571           workt = work;
572           for (j=0; j<nz; j++) {
573             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
574             workt += bs;
575           }
576           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
577           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
578 
579           idiag -= bs2;
580           i2    -= bs;
581         }
582         break;
583       }
584       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
585     }
586     its--;
587   }
588   while (its--) {
589     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
590       idiag = a->idiag;
591       i2 = 0;
592       switch (bs) {
593       case 1:
594         for (i=0; i<m; i++) {
595           v  = aa + ai[i];
596           vi = aj + ai[i];
597           nz = ai[i+1] - ai[i];
598           s[0] = b[i2];
599           for (j=0; j<nz; j++) {
600             xw[0] = x[vi[j]];
601             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
602           }
603           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
604           x[i2] += xw[0];
605           idiag += 1;
606           i2    += 1;
607         }
608         break;
609       case 2:
610         for (i=0; i<m; i++) {
611           v  = aa + 4*ai[i];
612           vi = aj + ai[i];
613           nz = ai[i+1] - ai[i];
614           s[0] = b[i2]; s[1] = b[i2+1];
615           for (j=0; j<nz; j++) {
616             idx = 2*vi[j];
617             it  = 4*j;
618             xw[0] = x[idx]; xw[1] = x[1+idx];
619             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
620           }
621           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
622           x[i2]  += xw[0]; x[i2+1] += xw[1];
623           idiag  += 4;
624           i2     += 2;
625         }
626         break;
627       case 3:
628         for (i=0; i<m; i++) {
629           v  = aa + 9*ai[i];
630           vi = aj + ai[i];
631           nz = ai[i+1] - ai[i];
632           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
633           while (nz--) {
634             idx = 3*(*vi++);
635             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
636             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
637             v  += 9;
638           }
639           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
640           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
641           idiag  += 9;
642           i2     += 3;
643         }
644         break;
645       case 4:
646         for (i=0; i<m; i++) {
647           v  = aa + 16*ai[i];
648           vi = aj + ai[i];
649           nz = ai[i+1] - ai[i];
650           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
651           while (nz--) {
652             idx = 4*(*vi++);
653             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
654             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
655             v  += 16;
656           }
657           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
658           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
659           idiag  += 16;
660           i2     += 4;
661         }
662         break;
663       case 5:
664         for (i=0; i<m; i++) {
665           v  = aa + 25*ai[i];
666           vi = aj + ai[i];
667           nz = ai[i+1] - ai[i];
668           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
669           while (nz--) {
670             idx = 5*(*vi++);
671             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
672             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
673             v  += 25;
674           }
675           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
676           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
677           idiag  += 25;
678           i2     += 5;
679         }
680         break;
681       case 6:
682         for (i=0; i<m; i++) {
683           v  = aa + 36*ai[i];
684           vi = aj + ai[i];
685           nz = ai[i+1] - ai[i];
686           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
687           while (nz--) {
688             idx = 6*(*vi++);
689             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
690             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
691             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
692             v  += 36;
693           }
694           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
695           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
696           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
697           idiag  += 36;
698           i2     += 6;
699         }
700         break;
701       case 7:
702         for (i=0; i<m; i++) {
703           v  = aa + 49*ai[i];
704           vi = aj + ai[i];
705           nz = ai[i+1] - ai[i];
706           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
707           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
708           while (nz--) {
709             idx = 7*(*vi++);
710             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
711             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
712             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
713             v  += 49;
714           }
715           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
716           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
717           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
718           idiag  += 49;
719           i2     += 7;
720         }
721         break;
722       default:
723         for (i=0; i<m; i++) {
724           v  = aa + bs2*ai[i];
725           vi = aj + ai[i];
726           nz = ai[i+1] - ai[i];
727 
728           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
729           /* copy all rows of x that are needed into contiguous space */
730           workt = work;
731           for (j=0; j<nz; j++) {
732             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
733             workt += bs;
734           }
735           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
736           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
737 
738           idiag += bs2;
739           i2    += bs;
740         }
741         break;
742       }
743       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
744     }
745     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
746       idiag = a->idiag+bs2*(a->mbs-1);
747       i2 = bs * (m-1);
748       switch (bs) {
749       case 1:
750         for (i=m-1; i>=0; i--) {
751           v  = aa + ai[i];
752           vi = aj + ai[i];
753           nz = ai[i+1] - ai[i];
754           s[0] = b[i2];
755           for (j=0; j<nz; j++) {
756             xw[0] = x[vi[j]];
757             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
758           }
759           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
760           x[i2] += xw[0];
761           idiag -= 1;
762           i2    -= 1;
763         }
764         break;
765       case 2:
766         for (i=m-1; i>=0; i--) {
767           v  = aa + 4*ai[i];
768           vi = aj + ai[i];
769           nz = ai[i+1] - ai[i];
770           s[0] = b[i2]; s[1] = b[i2+1];
771           for (j=0; j<nz; j++) {
772             idx = 2*vi[j];
773             it  = 4*j;
774             xw[0] = x[idx]; xw[1] = x[1+idx];
775             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
776           }
777           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
778           x[i2]  += xw[0]; x[i2+1] += xw[1];
779           idiag  -= 4;
780           i2     -= 2;
781         }
782         break;
783       case 3:
784         for (i=m-1; i>=0; i--) {
785           v  = aa + 9*ai[i];
786           vi = aj + ai[i];
787           nz = ai[i+1] - ai[i];
788           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
789           while (nz--) {
790             idx = 3*(*vi++);
791             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
792             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
793             v  += 9;
794           }
795           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
796           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
797           idiag  -= 9;
798           i2     -= 3;
799         }
800         break;
801       case 4:
802         for (i=m-1; i>=0; i--) {
803           v  = aa + 16*ai[i];
804           vi = aj + ai[i];
805           nz = ai[i+1] - ai[i];
806           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
807           while (nz--) {
808             idx = 4*(*vi++);
809             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
810             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
811             v  += 16;
812           }
813           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
814           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
815           idiag  -= 16;
816           i2     -= 4;
817         }
818         break;
819       case 5:
820         for (i=m-1; i>=0; i--) {
821           v  = aa + 25*ai[i];
822           vi = aj + ai[i];
823           nz = ai[i+1] - ai[i];
824           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
825           while (nz--) {
826             idx = 5*(*vi++);
827             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
828             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
829             v  += 25;
830           }
831           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
832           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
833           idiag  -= 25;
834           i2     -= 5;
835         }
836         break;
837       case 6:
838         for (i=m-1; i>=0; i--) {
839           v  = aa + 36*ai[i];
840           vi = aj + ai[i];
841           nz = ai[i+1] - ai[i];
842           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
843           while (nz--) {
844             idx = 6*(*vi++);
845             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
846             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
847             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
848             v  += 36;
849           }
850           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
851           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
852           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
853           idiag  -= 36;
854           i2     -= 6;
855         }
856         break;
857       case 7:
858         for (i=m-1; i>=0; i--) {
859           v  = aa + 49*ai[i];
860           vi = aj + ai[i];
861           nz = ai[i+1] - ai[i];
862           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
863           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
864           while (nz--) {
865             idx = 7*(*vi++);
866             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
867             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
868             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
869             v  += 49;
870           }
871           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
872           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
873           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
874           idiag  -= 49;
875           i2     -= 7;
876         }
877         break;
878       default:
879         for (i=m-1; i>=0; i--) {
880           v  = aa + bs2*ai[i];
881           vi = aj + ai[i];
882           nz = ai[i+1] - ai[i];
883 
884           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
885           /* copy all rows of x that are needed into contiguous space */
886           workt = work;
887           for (j=0; j<nz; j++) {
888             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
889             workt += bs;
890           }
891           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
892           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
893 
894           idiag -= bs2;
895           i2    -= bs;
896         }
897         break;
898       }
899       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
900     }
901   }
902   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
903   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
904   PetscFunctionReturn(0);
905 }
906 
907 
908 /*
909     Special version for direct calls from Fortran (Used in PETSc-fun3d)
910 */
911 #if defined(PETSC_HAVE_FORTRAN_CAPS)
912 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
913 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
914 #define matsetvaluesblocked4_ matsetvaluesblocked4
915 #endif
916 
917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
918 {
919   Mat               A  = *AA;
920   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
921   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
922   PetscInt          *ai    =a->i,*ailen=a->ilen;
923   PetscInt          *aj    =a->j,stepval,lastcol = -1;
924   const PetscScalar *value = v;
925   MatScalar         *ap,*aa = a->a,*bap;
926 
927   PetscFunctionBegin;
928   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
929   stepval = (n-1)*4;
930   for (k=0; k<m; k++) { /* loop over added rows */
931     row  = im[k];
932     rp   = aj + ai[row];
933     ap   = aa + 16*ai[row];
934     nrow = ailen[row];
935     low  = 0;
936     high = nrow;
937     for (l=0; l<n; l++) { /* loop over added columns */
938       col = in[l];
939       if (col <= lastcol)  low = 0;
940       else                high = nrow;
941       lastcol = col;
942       value   = v + k*(stepval+4 + l)*4;
943       while (high-low > 7) {
944         t = (low+high)/2;
945         if (rp[t] > col) high = t;
946         else             low  = t;
947       }
948       for (i=low; i<high; i++) {
949         if (rp[i] > col) break;
950         if (rp[i] == col) {
951           bap = ap +  16*i;
952           for (ii=0; ii<4; ii++,value+=stepval) {
953             for (jj=ii; jj<16; jj+=4) {
954               bap[jj] += *value++;
955             }
956           }
957           goto noinsert2;
958         }
959       }
960       N = nrow++ - 1;
961       high++; /* added new column index thus must search to one higher than before */
962       /* shift up all the later entries in this row */
963       for (ii=N; ii>=i; ii--) {
964         rp[ii+1] = rp[ii];
965         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
966       }
967       if (N >= i) {
968         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
969       }
970       rp[i] = col;
971       bap   = ap +  16*i;
972       for (ii=0; ii<4; ii++,value+=stepval) {
973         for (jj=ii; jj<16; jj+=4) {
974           bap[jj] = *value++;
975         }
976       }
977       noinsert2:;
978       low = i;
979     }
980     ailen[row] = nrow;
981   }
982   PetscFunctionReturnVoid();
983 }
984 
985 #if defined(PETSC_HAVE_FORTRAN_CAPS)
986 #define matsetvalues4_ MATSETVALUES4
987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
988 #define matsetvalues4_ matsetvalues4
989 #endif
990 
991 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
992 {
993   Mat         A  = *AA;
994   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
995   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
996   PetscInt    *ai=a->i,*ailen=a->ilen;
997   PetscInt    *aj=a->j,brow,bcol;
998   PetscInt    ridx,cidx,lastcol = -1;
999   MatScalar   *ap,value,*aa=a->a,*bap;
1000 
1001   PetscFunctionBegin;
1002   for (k=0; k<m; k++) { /* loop over added rows */
1003     row  = im[k]; brow = row/4;
1004     rp   = aj + ai[brow];
1005     ap   = aa + 16*ai[brow];
1006     nrow = ailen[brow];
1007     low  = 0;
1008     high = nrow;
1009     for (l=0; l<n; l++) { /* loop over added columns */
1010       col   = in[l]; bcol = col/4;
1011       ridx  = row % 4; cidx = col % 4;
1012       value = v[l + k*n];
1013       if (col <= lastcol)  low = 0;
1014       else                high = nrow;
1015       lastcol = col;
1016       while (high-low > 7) {
1017         t = (low+high)/2;
1018         if (rp[t] > bcol) high = t;
1019         else              low  = t;
1020       }
1021       for (i=low; i<high; i++) {
1022         if (rp[i] > bcol) break;
1023         if (rp[i] == bcol) {
1024           bap   = ap +  16*i + 4*cidx + ridx;
1025           *bap += value;
1026           goto noinsert1;
1027         }
1028       }
1029       N = nrow++ - 1;
1030       high++; /* added new column thus must search to one higher than before */
1031       /* shift up all the later entries in this row */
1032       for (ii=N; ii>=i; ii--) {
1033         rp[ii+1] = rp[ii];
1034         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1035       }
1036       if (N>=i) {
1037         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1038       }
1039       rp[i]                    = bcol;
1040       ap[16*i + 4*cidx + ridx] = value;
1041 noinsert1:;
1042       low = i;
1043     }
1044     ailen[brow] = nrow;
1045   }
1046   PetscFunctionReturnVoid();
1047 }
1048 
1049 /*
1050      Checks for missing diagonals
1051 */
1052 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1053 {
1054   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   PetscInt       *diag,*ii = a->i,i;
1057 
1058   PetscFunctionBegin;
1059   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1060   *missing = PETSC_FALSE;
1061   if (A->rmap->n > 0 && !ii) {
1062     *missing = PETSC_TRUE;
1063     if (d) *d = 0;
1064     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1065   } else {
1066     diag = a->diag;
1067     for (i=0; i<a->mbs; i++) {
1068       if (diag[i] >= ii[i+1]) {
1069         *missing = PETSC_TRUE;
1070         if (d) *d = i;
1071         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1072         break;
1073       }
1074     }
1075   }
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1080 {
1081   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1082   PetscErrorCode ierr;
1083   PetscInt       i,j,m = a->mbs;
1084 
1085   PetscFunctionBegin;
1086   if (!a->diag) {
1087     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1088     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1089     a->free_diag = PETSC_TRUE;
1090   }
1091   for (i=0; i<m; i++) {
1092     a->diag[i] = a->i[i+1];
1093     for (j=a->i[i]; j<a->i[i+1]; j++) {
1094       if (a->j[j] == i) {
1095         a->diag[i] = j;
1096         break;
1097       }
1098     }
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 
1104 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1105 {
1106   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1107   PetscErrorCode ierr;
1108   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1109   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1110 
1111   PetscFunctionBegin;
1112   *nn = n;
1113   if (!ia) PetscFunctionReturn(0);
1114   if (symmetric) {
1115     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1116     nz   = tia[n];
1117   } else {
1118     tia = a->i; tja = a->j;
1119   }
1120 
1121   if (!blockcompressed && bs > 1) {
1122     (*nn) *= bs;
1123     /* malloc & create the natural set of indices */
1124     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1125     if (n) {
1126       (*ia)[0] = oshift;
1127       for (j=1; j<bs; j++) {
1128         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1129       }
1130     }
1131 
1132     for (i=1; i<n; i++) {
1133       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1134       for (j=1; j<bs; j++) {
1135         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1136       }
1137     }
1138     if (n) {
1139       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1140     }
1141 
1142     if (inja) {
1143       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1144       cnt = 0;
1145       for (i=0; i<n; i++) {
1146         for (j=0; j<bs; j++) {
1147           for (k=tia[i]; k<tia[i+1]; k++) {
1148             for (l=0; l<bs; l++) {
1149               (*ja)[cnt++] = bs*tja[k] + l;
1150             }
1151           }
1152         }
1153       }
1154     }
1155 
1156     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1157       ierr = PetscFree(tia);CHKERRQ(ierr);
1158       ierr = PetscFree(tja);CHKERRQ(ierr);
1159     }
1160   } else if (oshift == 1) {
1161     if (symmetric) {
1162       nz = tia[A->rmap->n/bs];
1163       /*  add 1 to i and j indices */
1164       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1165       *ia = tia;
1166       if (ja) {
1167         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1168         *ja = tja;
1169       }
1170     } else {
1171       nz = a->i[A->rmap->n/bs];
1172       /* malloc space and  add 1 to i and j indices */
1173       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1174       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1175       if (ja) {
1176         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1177         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1178       }
1179     }
1180   } else {
1181     *ia = tia;
1182     if (ja) *ja = tja;
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1188 {
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192   if (!ia) PetscFunctionReturn(0);
1193   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1194     ierr = PetscFree(*ia);CHKERRQ(ierr);
1195     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1196   }
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1201 {
1202   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206 #if defined(PETSC_USE_LOG)
1207   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1208 #endif
1209   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1210   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1211   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1212   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1213   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1214   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1215   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1216   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1217   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1218   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1219   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1220   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1221   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1222 
1223   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1224   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1225   ierr = PetscFree(A->data);CHKERRQ(ierr);
1226 
1227   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1238 #if defined(PETSC_HAVE_HYPRE)
1239   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1240 #endif
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1245 {
1246   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   switch (op) {
1251   case MAT_ROW_ORIENTED:
1252     a->roworiented = flg;
1253     break;
1254   case MAT_KEEP_NONZERO_PATTERN:
1255     a->keepnonzeropattern = flg;
1256     break;
1257   case MAT_NEW_NONZERO_LOCATIONS:
1258     a->nonew = (flg ? 0 : 1);
1259     break;
1260   case MAT_NEW_NONZERO_LOCATION_ERR:
1261     a->nonew = (flg ? -1 : 0);
1262     break;
1263   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1264     a->nonew = (flg ? -2 : 0);
1265     break;
1266   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1267     a->nounused = (flg ? -1 : 0);
1268     break;
1269   case MAT_NEW_DIAGONALS:
1270   case MAT_IGNORE_OFF_PROC_ENTRIES:
1271   case MAT_USE_HASH_TABLE:
1272     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1273     break;
1274   case MAT_SPD:
1275   case MAT_SYMMETRIC:
1276   case MAT_STRUCTURALLY_SYMMETRIC:
1277   case MAT_HERMITIAN:
1278   case MAT_SYMMETRY_ETERNAL:
1279   case MAT_SUBMAT_SINGLEIS:
1280     /* These options are handled directly by MatSetOption() */
1281     break;
1282   default:
1283     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1284   }
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1289 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1290 {
1291   PetscErrorCode ierr;
1292   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1293   MatScalar      *aa_i;
1294   PetscScalar    *v_i;
1295 
1296   PetscFunctionBegin;
1297   bs  = A->rmap->bs;
1298   bs2 = bs*bs;
1299   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1300 
1301   bn  = row/bs;   /* Block number */
1302   bp  = row % bs; /* Block Position */
1303   M   = ai[bn+1] - ai[bn];
1304   *nz = bs*M;
1305 
1306   if (v) {
1307     *v = 0;
1308     if (*nz) {
1309       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1310       for (i=0; i<M; i++) { /* for each block in the block row */
1311         v_i  = *v + i*bs;
1312         aa_i = aa + bs2*(ai[bn] + i);
1313         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1314       }
1315     }
1316   }
1317 
1318   if (idx) {
1319     *idx = 0;
1320     if (*nz) {
1321       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1322       for (i=0; i<M; i++) { /* for each block in the block row */
1323         idx_i = *idx + i*bs;
1324         itmp  = bs*aj[ai[bn] + i];
1325         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1326       }
1327     }
1328   }
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1333 {
1334   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1335   PetscErrorCode ierr;
1336 
1337   PetscFunctionBegin;
1338   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1343 {
1344   PetscErrorCode ierr;
1345 
1346   PetscFunctionBegin;
1347   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1348   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1353 {
1354   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1355   Mat            C;
1356   PetscErrorCode ierr;
1357   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1358   PetscInt       *rows,*cols,bs2=a->bs2;
1359   MatScalar      *array;
1360 
1361   PetscFunctionBegin;
1362   if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1363   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1364     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1365 
1366     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1367     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1368     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1369     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1370     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1371     ierr = PetscFree(col);CHKERRQ(ierr);
1372   } else {
1373     C = *B;
1374   }
1375 
1376   array = a->a;
1377   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1378   for (i=0; i<mbs; i++) {
1379     cols[0] = i*bs;
1380     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1381     len = ai[i+1] - ai[i];
1382     for (j=0; j<len; j++) {
1383       rows[0] = (*aj++)*bs;
1384       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1385       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1386       array += bs2;
1387     }
1388   }
1389   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1390 
1391   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1392   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1393 
1394   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1395     *B = C;
1396   } else {
1397     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1398   }
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1403 {
1404   PetscErrorCode ierr;
1405   Mat            Btrans;
1406 
1407   PetscFunctionBegin;
1408   *f   = PETSC_FALSE;
1409   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1410   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1411   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1416 {
1417   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1418   PetscErrorCode ierr;
1419   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1420   int            fd;
1421   PetscScalar    *aa;
1422   FILE           *file;
1423 
1424   PetscFunctionBegin;
1425   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1426   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1427   col_lens[0] = MAT_FILE_CLASSID;
1428 
1429   col_lens[1] = A->rmap->N;
1430   col_lens[2] = A->cmap->n;
1431   col_lens[3] = a->nz*bs2;
1432 
1433   /* store lengths of each row and write (including header) to file */
1434   count = 0;
1435   for (i=0; i<a->mbs; i++) {
1436     for (j=0; j<bs; j++) {
1437       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1438     }
1439   }
1440   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1441   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1442 
1443   /* store column indices (zero start index) */
1444   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1445   count = 0;
1446   for (i=0; i<a->mbs; i++) {
1447     for (j=0; j<bs; j++) {
1448       for (k=a->i[i]; k<a->i[i+1]; k++) {
1449         for (l=0; l<bs; l++) {
1450           jj[count++] = bs*a->j[k] + l;
1451         }
1452       }
1453     }
1454   }
1455   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1456   ierr = PetscFree(jj);CHKERRQ(ierr);
1457 
1458   /* store nonzero values */
1459   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1460   count = 0;
1461   for (i=0; i<a->mbs; i++) {
1462     for (j=0; j<bs; j++) {
1463       for (k=a->i[i]; k<a->i[i+1]; k++) {
1464         for (l=0; l<bs; l++) {
1465           aa[count++] = a->a[bs2*k + l*bs + j];
1466         }
1467       }
1468     }
1469   }
1470   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1471   ierr = PetscFree(aa);CHKERRQ(ierr);
1472 
1473   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1474   if (file) {
1475     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1476   }
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1481 {
1482   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1483   PetscErrorCode    ierr;
1484   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1485   PetscViewerFormat format;
1486 
1487   PetscFunctionBegin;
1488   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1489   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1490     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1491   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1492     const char *matname;
1493     Mat        aij;
1494     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1495     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1496     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1497     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1498     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1499   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1500       PetscFunctionReturn(0);
1501   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1502     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1503     for (i=0; i<a->mbs; i++) {
1504       for (j=0; j<bs; j++) {
1505         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1506         for (k=a->i[i]; k<a->i[i+1]; k++) {
1507           for (l=0; l<bs; l++) {
1508 #if defined(PETSC_USE_COMPLEX)
1509             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1510               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1511                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1512             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1513               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1514                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1515             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1516               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1517             }
1518 #else
1519             if (a->a[bs2*k + l*bs + j] != 0.0) {
1520               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1521             }
1522 #endif
1523           }
1524         }
1525         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1526       }
1527     }
1528     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1529   } else {
1530     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1531     for (i=0; i<a->mbs; i++) {
1532       for (j=0; j<bs; j++) {
1533         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1534         for (k=a->i[i]; k<a->i[i+1]; k++) {
1535           for (l=0; l<bs; l++) {
1536 #if defined(PETSC_USE_COMPLEX)
1537             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1538               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1539                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1540             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1541               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1542                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1543             } else {
1544               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1545             }
1546 #else
1547             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1548 #endif
1549           }
1550         }
1551         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1552       }
1553     }
1554     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1555   }
1556   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 #include <petscdraw.h>
1561 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1562 {
1563   Mat               A = (Mat) Aa;
1564   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1565   PetscErrorCode    ierr;
1566   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1567   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1568   MatScalar         *aa;
1569   PetscViewer       viewer;
1570   PetscViewerFormat format;
1571 
1572   PetscFunctionBegin;
1573   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1574   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1575   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1576 
1577   /* loop over matrix elements drawing boxes */
1578 
1579   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1580     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1581     /* Blue for negative, Cyan for zero and  Red for positive */
1582     color = PETSC_DRAW_BLUE;
1583     for (i=0,row=0; i<mbs; i++,row+=bs) {
1584       for (j=a->i[i]; j<a->i[i+1]; j++) {
1585         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1586         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1587         aa  = a->a + j*bs2;
1588         for (k=0; k<bs; k++) {
1589           for (l=0; l<bs; l++) {
1590             if (PetscRealPart(*aa++) >=  0.) continue;
1591             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1592           }
1593         }
1594       }
1595     }
1596     color = PETSC_DRAW_CYAN;
1597     for (i=0,row=0; i<mbs; i++,row+=bs) {
1598       for (j=a->i[i]; j<a->i[i+1]; j++) {
1599         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1600         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1601         aa  = a->a + j*bs2;
1602         for (k=0; k<bs; k++) {
1603           for (l=0; l<bs; l++) {
1604             if (PetscRealPart(*aa++) != 0.) continue;
1605             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1606           }
1607         }
1608       }
1609     }
1610     color = PETSC_DRAW_RED;
1611     for (i=0,row=0; i<mbs; i++,row+=bs) {
1612       for (j=a->i[i]; j<a->i[i+1]; j++) {
1613         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1614         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1615         aa  = a->a + j*bs2;
1616         for (k=0; k<bs; k++) {
1617           for (l=0; l<bs; l++) {
1618             if (PetscRealPart(*aa++) <= 0.) continue;
1619             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1620           }
1621         }
1622       }
1623     }
1624     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1625   } else {
1626     /* use contour shading to indicate magnitude of values */
1627     /* first determine max of all nonzero values */
1628     PetscReal minv = 0.0, maxv = 0.0;
1629     PetscDraw popup;
1630 
1631     for (i=0; i<a->nz*a->bs2; i++) {
1632       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1633     }
1634     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1635     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1636     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1637 
1638     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1639     for (i=0,row=0; i<mbs; i++,row+=bs) {
1640       for (j=a->i[i]; j<a->i[i+1]; j++) {
1641         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1642         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1643         aa  = a->a + j*bs2;
1644         for (k=0; k<bs; k++) {
1645           for (l=0; l<bs; l++) {
1646             MatScalar v = *aa++;
1647             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1648             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1649           }
1650         }
1651       }
1652     }
1653     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1654   }
1655   PetscFunctionReturn(0);
1656 }
1657 
1658 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1659 {
1660   PetscErrorCode ierr;
1661   PetscReal      xl,yl,xr,yr,w,h;
1662   PetscDraw      draw;
1663   PetscBool      isnull;
1664 
1665   PetscFunctionBegin;
1666   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1667   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1668   if (isnull) PetscFunctionReturn(0);
1669 
1670   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1671   xr  += w;          yr += h;        xl = -w;     yl = -h;
1672   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1673   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1674   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1675   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1676   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1681 {
1682   PetscErrorCode ierr;
1683   PetscBool      iascii,isbinary,isdraw;
1684 
1685   PetscFunctionBegin;
1686   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1687   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1688   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1689   if (iascii) {
1690     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1691   } else if (isbinary) {
1692     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1693   } else if (isdraw) {
1694     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1695   } else {
1696     Mat B;
1697     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1698     ierr = MatView(B,viewer);CHKERRQ(ierr);
1699     ierr = MatDestroy(&B);CHKERRQ(ierr);
1700   }
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 
1705 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1706 {
1707   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1708   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1709   PetscInt    *ai = a->i,*ailen = a->ilen;
1710   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1711   MatScalar   *ap,*aa = a->a;
1712 
1713   PetscFunctionBegin;
1714   for (k=0; k<m; k++) { /* loop over rows */
1715     row = im[k]; brow = row/bs;
1716     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1717     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1718     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1719     nrow = ailen[brow];
1720     for (l=0; l<n; l++) { /* loop over columns */
1721       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1722       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1723       col  = in[l];
1724       bcol = col/bs;
1725       cidx = col%bs;
1726       ridx = row%bs;
1727       high = nrow;
1728       low  = 0; /* assume unsorted */
1729       while (high-low > 5) {
1730         t = (low+high)/2;
1731         if (rp[t] > bcol) high = t;
1732         else             low  = t;
1733       }
1734       for (i=low; i<high; i++) {
1735         if (rp[i] > bcol) break;
1736         if (rp[i] == bcol) {
1737           *v++ = ap[bs2*i+bs*cidx+ridx];
1738           goto finished;
1739         }
1740       }
1741       *v++ = 0.0;
1742 finished:;
1743     }
1744   }
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1749 {
1750   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1751   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1752   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1753   PetscErrorCode    ierr;
1754   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1755   PetscBool         roworiented=a->roworiented;
1756   const PetscScalar *value     = v;
1757   MatScalar         *ap,*aa = a->a,*bap;
1758 
1759   PetscFunctionBegin;
1760   if (roworiented) {
1761     stepval = (n-1)*bs;
1762   } else {
1763     stepval = (m-1)*bs;
1764   }
1765   for (k=0; k<m; k++) { /* loop over added rows */
1766     row = im[k];
1767     if (row < 0) continue;
1768 #if defined(PETSC_USE_DEBUG)
1769     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1770 #endif
1771     rp   = aj + ai[row];
1772     ap   = aa + bs2*ai[row];
1773     rmax = imax[row];
1774     nrow = ailen[row];
1775     low  = 0;
1776     high = nrow;
1777     for (l=0; l<n; l++) { /* loop over added columns */
1778       if (in[l] < 0) continue;
1779 #if defined(PETSC_USE_DEBUG)
1780       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1781 #endif
1782       col = in[l];
1783       if (roworiented) {
1784         value = v + (k*(stepval+bs) + l)*bs;
1785       } else {
1786         value = v + (l*(stepval+bs) + k)*bs;
1787       }
1788       if (col <= lastcol) low = 0;
1789       else high = nrow;
1790       lastcol = col;
1791       while (high-low > 7) {
1792         t = (low+high)/2;
1793         if (rp[t] > col) high = t;
1794         else             low  = t;
1795       }
1796       for (i=low; i<high; i++) {
1797         if (rp[i] > col) break;
1798         if (rp[i] == col) {
1799           bap = ap +  bs2*i;
1800           if (roworiented) {
1801             if (is == ADD_VALUES) {
1802               for (ii=0; ii<bs; ii++,value+=stepval) {
1803                 for (jj=ii; jj<bs2; jj+=bs) {
1804                   bap[jj] += *value++;
1805                 }
1806               }
1807             } else {
1808               for (ii=0; ii<bs; ii++,value+=stepval) {
1809                 for (jj=ii; jj<bs2; jj+=bs) {
1810                   bap[jj] = *value++;
1811                 }
1812               }
1813             }
1814           } else {
1815             if (is == ADD_VALUES) {
1816               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1817                 for (jj=0; jj<bs; jj++) {
1818                   bap[jj] += value[jj];
1819                 }
1820                 bap += bs;
1821               }
1822             } else {
1823               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1824                 for (jj=0; jj<bs; jj++) {
1825                   bap[jj]  = value[jj];
1826                 }
1827                 bap += bs;
1828               }
1829             }
1830           }
1831           goto noinsert2;
1832         }
1833       }
1834       if (nonew == 1) goto noinsert2;
1835       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1836       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1837       N = nrow++ - 1; high++;
1838       /* shift up all the later entries in this row */
1839       for (ii=N; ii>=i; ii--) {
1840         rp[ii+1] = rp[ii];
1841         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1842       }
1843       if (N >= i) {
1844         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1845       }
1846       rp[i] = col;
1847       bap   = ap +  bs2*i;
1848       if (roworiented) {
1849         for (ii=0; ii<bs; ii++,value+=stepval) {
1850           for (jj=ii; jj<bs2; jj+=bs) {
1851             bap[jj] = *value++;
1852           }
1853         }
1854       } else {
1855         for (ii=0; ii<bs; ii++,value+=stepval) {
1856           for (jj=0; jj<bs; jj++) {
1857             *bap++ = *value++;
1858           }
1859         }
1860       }
1861 noinsert2:;
1862       low = i;
1863     }
1864     ailen[row] = nrow;
1865   }
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1870 {
1871   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1872   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1873   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1874   PetscErrorCode ierr;
1875   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1876   MatScalar      *aa  = a->a,*ap;
1877   PetscReal      ratio=0.6;
1878 
1879   PetscFunctionBegin;
1880   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1881 
1882   if (m) rmax = ailen[0];
1883   for (i=1; i<mbs; i++) {
1884     /* move each row back by the amount of empty slots (fshift) before it*/
1885     fshift += imax[i-1] - ailen[i-1];
1886     rmax    = PetscMax(rmax,ailen[i]);
1887     if (fshift) {
1888       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1889       N  = ailen[i];
1890       for (j=0; j<N; j++) {
1891         ip[j-fshift] = ip[j];
1892 
1893         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1894       }
1895     }
1896     ai[i] = ai[i-1] + ailen[i-1];
1897   }
1898   if (mbs) {
1899     fshift += imax[mbs-1] - ailen[mbs-1];
1900     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1901   }
1902 
1903   /* reset ilen and imax for each row */
1904   a->nonzerorowcnt = 0;
1905   for (i=0; i<mbs; i++) {
1906     ailen[i] = imax[i] = ai[i+1] - ai[i];
1907     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1908   }
1909   a->nz = ai[mbs];
1910 
1911   /* diagonals may have moved, so kill the diagonal pointers */
1912   a->idiagvalid = PETSC_FALSE;
1913   if (fshift && a->diag) {
1914     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1915     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1916     a->diag = 0;
1917   }
1918   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1919   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1920   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1921   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1922 
1923   A->info.mallocs    += a->reallocs;
1924   a->reallocs         = 0;
1925   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1926   a->rmax             = rmax;
1927 
1928   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 /*
1933    This function returns an array of flags which indicate the locations of contiguous
1934    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1935    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1936    Assume: sizes should be long enough to hold all the values.
1937 */
1938 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1939 {
1940   PetscInt  i,j,k,row;
1941   PetscBool flg;
1942 
1943   PetscFunctionBegin;
1944   for (i=0,j=0; i<n; j++) {
1945     row = idx[i];
1946     if (row%bs!=0) { /* Not the begining of a block */
1947       sizes[j] = 1;
1948       i++;
1949     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1950       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1951       i++;
1952     } else { /* Begining of the block, so check if the complete block exists */
1953       flg = PETSC_TRUE;
1954       for (k=1; k<bs; k++) {
1955         if (row+k != idx[i+k]) { /* break in the block */
1956           flg = PETSC_FALSE;
1957           break;
1958         }
1959       }
1960       if (flg) { /* No break in the bs */
1961         sizes[j] = bs;
1962         i       += bs;
1963       } else {
1964         sizes[j] = 1;
1965         i++;
1966       }
1967     }
1968   }
1969   *bs_max = j;
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1974 {
1975   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1976   PetscErrorCode    ierr;
1977   PetscInt          i,j,k,count,*rows;
1978   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1979   PetscScalar       zero = 0.0;
1980   MatScalar         *aa;
1981   const PetscScalar *xx;
1982   PetscScalar       *bb;
1983 
1984   PetscFunctionBegin;
1985   /* fix right hand side if needed */
1986   if (x && b) {
1987     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1988     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1989     for (i=0; i<is_n; i++) {
1990       bb[is_idx[i]] = diag*xx[is_idx[i]];
1991     }
1992     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1993     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1994   }
1995 
1996   /* Make a copy of the IS and  sort it */
1997   /* allocate memory for rows,sizes */
1998   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
1999 
2000   /* copy IS values to rows, and sort them */
2001   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2002   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2003 
2004   if (baij->keepnonzeropattern) {
2005     for (i=0; i<is_n; i++) sizes[i] = 1;
2006     bs_max          = is_n;
2007   } else {
2008     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2009     A->nonzerostate++;
2010   }
2011 
2012   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2013     row = rows[j];
2014     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2015     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2016     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2017     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2018       if (diag != (PetscScalar)0.0) {
2019         if (baij->ilen[row/bs] > 0) {
2020           baij->ilen[row/bs]       = 1;
2021           baij->j[baij->i[row/bs]] = row/bs;
2022 
2023           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2024         }
2025         /* Now insert all the diagonal values for this bs */
2026         for (k=0; k<bs; k++) {
2027           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2028         }
2029       } else { /* (diag == 0.0) */
2030         baij->ilen[row/bs] = 0;
2031       } /* end (diag == 0.0) */
2032     } else { /* (sizes[i] != bs) */
2033 #if defined(PETSC_USE_DEBUG)
2034       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2035 #endif
2036       for (k=0; k<count; k++) {
2037         aa[0] =  zero;
2038         aa   += bs;
2039       }
2040       if (diag != (PetscScalar)0.0) {
2041         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2042       }
2043     }
2044   }
2045 
2046   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2047   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2052 {
2053   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2054   PetscErrorCode    ierr;
2055   PetscInt          i,j,k,count;
2056   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2057   PetscScalar       zero = 0.0;
2058   MatScalar         *aa;
2059   const PetscScalar *xx;
2060   PetscScalar       *bb;
2061   PetscBool         *zeroed,vecs = PETSC_FALSE;
2062 
2063   PetscFunctionBegin;
2064   /* fix right hand side if needed */
2065   if (x && b) {
2066     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2067     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2068     vecs = PETSC_TRUE;
2069   }
2070 
2071   /* zero the columns */
2072   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2073   for (i=0; i<is_n; i++) {
2074     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2075     zeroed[is_idx[i]] = PETSC_TRUE;
2076   }
2077   for (i=0; i<A->rmap->N; i++) {
2078     if (!zeroed[i]) {
2079       row = i/bs;
2080       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2081         for (k=0; k<bs; k++) {
2082           col = bs*baij->j[j] + k;
2083           if (zeroed[col]) {
2084             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2085             if (vecs) bb[i] -= aa[0]*xx[col];
2086             aa[0] = 0.0;
2087           }
2088         }
2089       }
2090     } else if (vecs) bb[i] = diag*xx[i];
2091   }
2092   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2093   if (vecs) {
2094     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2095     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2096   }
2097 
2098   /* zero the rows */
2099   for (i=0; i<is_n; i++) {
2100     row   = is_idx[i];
2101     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2102     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2103     for (k=0; k<count; k++) {
2104       aa[0] =  zero;
2105       aa   += bs;
2106     }
2107     if (diag != (PetscScalar)0.0) {
2108       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2109     }
2110   }
2111   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2116 {
2117   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2118   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2119   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2120   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2121   PetscErrorCode ierr;
2122   PetscInt       ridx,cidx,bs2=a->bs2;
2123   PetscBool      roworiented=a->roworiented;
2124   MatScalar      *ap,value,*aa=a->a,*bap;
2125 
2126   PetscFunctionBegin;
2127   for (k=0; k<m; k++) { /* loop over added rows */
2128     row  = im[k];
2129     brow = row/bs;
2130     if (row < 0) continue;
2131 #if defined(PETSC_USE_DEBUG)
2132     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2133 #endif
2134     rp   = aj + ai[brow];
2135     ap   = aa + bs2*ai[brow];
2136     rmax = imax[brow];
2137     nrow = ailen[brow];
2138     low  = 0;
2139     high = nrow;
2140     for (l=0; l<n; l++) { /* loop over added columns */
2141       if (in[l] < 0) continue;
2142 #if defined(PETSC_USE_DEBUG)
2143       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2144 #endif
2145       col  = in[l]; bcol = col/bs;
2146       ridx = row % bs; cidx = col % bs;
2147       if (roworiented) {
2148         value = v[l + k*n];
2149       } else {
2150         value = v[k + l*m];
2151       }
2152       if (col <= lastcol) low = 0; else high = nrow;
2153       lastcol = col;
2154       while (high-low > 7) {
2155         t = (low+high)/2;
2156         if (rp[t] > bcol) high = t;
2157         else              low  = t;
2158       }
2159       for (i=low; i<high; i++) {
2160         if (rp[i] > bcol) break;
2161         if (rp[i] == bcol) {
2162           bap = ap +  bs2*i + bs*cidx + ridx;
2163           if (is == ADD_VALUES) *bap += value;
2164           else                  *bap  = value;
2165           goto noinsert1;
2166         }
2167       }
2168       if (nonew == 1) goto noinsert1;
2169       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2170       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2171       N = nrow++ - 1; high++;
2172       /* shift up all the later entries in this row */
2173       for (ii=N; ii>=i; ii--) {
2174         rp[ii+1] = rp[ii];
2175         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2176       }
2177       if (N>=i) {
2178         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2179       }
2180       rp[i]                      = bcol;
2181       ap[bs2*i + bs*cidx + ridx] = value;
2182       a->nz++;
2183       A->nonzerostate++;
2184 noinsert1:;
2185       low = i;
2186     }
2187     ailen[brow] = nrow;
2188   }
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2193 {
2194   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2195   Mat            outA;
2196   PetscErrorCode ierr;
2197   PetscBool      row_identity,col_identity;
2198 
2199   PetscFunctionBegin;
2200   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2201   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2202   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2203   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2204 
2205   outA            = inA;
2206   inA->factortype = MAT_FACTOR_LU;
2207   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2208   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2209 
2210   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2211 
2212   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2213   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2214   a->row = row;
2215   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2216   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2217   a->col = col;
2218 
2219   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2220   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2221   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2222   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2223 
2224   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2225   if (!a->solve_work) {
2226     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2227     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2228   }
2229   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2234 {
2235   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2236   PetscInt    i,nz,mbs;
2237 
2238   PetscFunctionBegin;
2239   nz  = baij->maxnz;
2240   mbs = baij->mbs;
2241   for (i=0; i<nz; i++) {
2242     baij->j[i] = indices[i];
2243   }
2244   baij->nz = nz;
2245   for (i=0; i<mbs; i++) {
2246     baij->ilen[i] = baij->imax[i];
2247   }
2248   PetscFunctionReturn(0);
2249 }
2250 
2251 /*@
2252     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2253        in the matrix.
2254 
2255   Input Parameters:
2256 +  mat - the SeqBAIJ matrix
2257 -  indices - the column indices
2258 
2259   Level: advanced
2260 
2261   Notes:
2262     This can be called if you have precomputed the nonzero structure of the
2263   matrix and want to provide it to the matrix object to improve the performance
2264   of the MatSetValues() operation.
2265 
2266     You MUST have set the correct numbers of nonzeros per row in the call to
2267   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2268 
2269     MUST be called before any calls to MatSetValues();
2270 
2271 @*/
2272 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2273 {
2274   PetscErrorCode ierr;
2275 
2276   PetscFunctionBegin;
2277   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2278   PetscValidPointer(indices,2);
2279   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2284 {
2285   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2286   PetscErrorCode ierr;
2287   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2288   PetscReal      atmp;
2289   PetscScalar    *x,zero = 0.0;
2290   MatScalar      *aa;
2291   PetscInt       ncols,brow,krow,kcol;
2292 
2293   PetscFunctionBegin;
2294   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2295   bs  = A->rmap->bs;
2296   aa  = a->a;
2297   ai  = a->i;
2298   aj  = a->j;
2299   mbs = a->mbs;
2300 
2301   ierr = VecSet(v,zero);CHKERRQ(ierr);
2302   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2303   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2304   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2305   for (i=0; i<mbs; i++) {
2306     ncols = ai[1] - ai[0]; ai++;
2307     brow  = bs*i;
2308     for (j=0; j<ncols; j++) {
2309       for (kcol=0; kcol<bs; kcol++) {
2310         for (krow=0; krow<bs; krow++) {
2311           atmp = PetscAbsScalar(*aa);aa++;
2312           row  = brow + krow;   /* row index */
2313           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2314         }
2315       }
2316       aj++;
2317     }
2318   }
2319   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2324 {
2325   PetscErrorCode ierr;
2326 
2327   PetscFunctionBegin;
2328   /* If the two matrices have the same copy implementation, use fast copy. */
2329   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2330     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2331     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2332     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2333 
2334     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2335     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2336     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2337   } else {
2338     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2339   }
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2344 {
2345   PetscErrorCode ierr;
2346 
2347   PetscFunctionBegin;
2348   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2353 {
2354   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2355 
2356   PetscFunctionBegin;
2357   *array = a->a;
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2362 {
2363   PetscFunctionBegin;
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2368 {
2369   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2370   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2371   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2372   PetscErrorCode ierr;
2373 
2374   PetscFunctionBegin;
2375   /* Set the number of nonzeros in the new matrix */
2376   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2381 {
2382   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2383   PetscErrorCode ierr;
2384   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2385   PetscBLASInt   one=1;
2386 
2387   PetscFunctionBegin;
2388   if (str == SAME_NONZERO_PATTERN) {
2389     PetscScalar  alpha = a;
2390     PetscBLASInt bnz;
2391     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2392     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2393     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2394   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2395     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2396   } else {
2397     Mat      B;
2398     PetscInt *nnz;
2399     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2400     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2401     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2402     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2403     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2404     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2405     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2406     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2407     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2408     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2409     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2410     ierr = PetscFree(nnz);CHKERRQ(ierr);
2411   }
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2416 {
2417   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2418   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2419   MatScalar   *aa = a->a;
2420 
2421   PetscFunctionBegin;
2422   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2427 {
2428   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2429   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2430   MatScalar   *aa = a->a;
2431 
2432   PetscFunctionBegin;
2433   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 /*
2438     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2439 */
2440 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2441 {
2442   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2443   PetscErrorCode ierr;
2444   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2445   PetscInt       nz = a->i[m],row,*jj,mr,col;
2446 
2447   PetscFunctionBegin;
2448   *nn = n;
2449   if (!ia) PetscFunctionReturn(0);
2450   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2451   else {
2452     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2453     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2454     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2455     jj   = a->j;
2456     for (i=0; i<nz; i++) {
2457       collengths[jj[i]]++;
2458     }
2459     cia[0] = oshift;
2460     for (i=0; i<n; i++) {
2461       cia[i+1] = cia[i] + collengths[i];
2462     }
2463     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2464     jj   = a->j;
2465     for (row=0; row<m; row++) {
2466       mr = a->i[row+1] - a->i[row];
2467       for (i=0; i<mr; i++) {
2468         col = *jj++;
2469 
2470         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2471       }
2472     }
2473     ierr = PetscFree(collengths);CHKERRQ(ierr);
2474     *ia  = cia; *ja = cja;
2475   }
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2480 {
2481   PetscErrorCode ierr;
2482 
2483   PetscFunctionBegin;
2484   if (!ia) PetscFunctionReturn(0);
2485   ierr = PetscFree(*ia);CHKERRQ(ierr);
2486   ierr = PetscFree(*ja);CHKERRQ(ierr);
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 /*
2491  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2492  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2493  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2494  */
2495 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2496 {
2497   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2498   PetscErrorCode ierr;
2499   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2500   PetscInt       nz = a->i[m],row,*jj,mr,col;
2501   PetscInt       *cspidx;
2502 
2503   PetscFunctionBegin;
2504   *nn = n;
2505   if (!ia) PetscFunctionReturn(0);
2506 
2507   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2508   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2509   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2510   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2511   jj   = a->j;
2512   for (i=0; i<nz; i++) {
2513     collengths[jj[i]]++;
2514   }
2515   cia[0] = oshift;
2516   for (i=0; i<n; i++) {
2517     cia[i+1] = cia[i] + collengths[i];
2518   }
2519   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2520   jj   = a->j;
2521   for (row=0; row<m; row++) {
2522     mr = a->i[row+1] - a->i[row];
2523     for (i=0; i<mr; i++) {
2524       col = *jj++;
2525       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2526       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2527     }
2528   }
2529   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2530   *ia    = cia; *ja = cja;
2531   *spidx = cspidx;
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2536 {
2537   PetscErrorCode ierr;
2538 
2539   PetscFunctionBegin;
2540   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2541   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2546 {
2547   PetscErrorCode ierr;
2548   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2549 
2550   PetscFunctionBegin;
2551   if (!Y->preallocated || !aij->nz) {
2552     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2553   }
2554   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 /* -------------------------------------------------------------------*/
2559 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2560                                        MatGetRow_SeqBAIJ,
2561                                        MatRestoreRow_SeqBAIJ,
2562                                        MatMult_SeqBAIJ_N,
2563                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2564                                        MatMultTranspose_SeqBAIJ,
2565                                        MatMultTransposeAdd_SeqBAIJ,
2566                                        0,
2567                                        0,
2568                                        0,
2569                                /* 10*/ 0,
2570                                        MatLUFactor_SeqBAIJ,
2571                                        0,
2572                                        0,
2573                                        MatTranspose_SeqBAIJ,
2574                                /* 15*/ MatGetInfo_SeqBAIJ,
2575                                        MatEqual_SeqBAIJ,
2576                                        MatGetDiagonal_SeqBAIJ,
2577                                        MatDiagonalScale_SeqBAIJ,
2578                                        MatNorm_SeqBAIJ,
2579                                /* 20*/ 0,
2580                                        MatAssemblyEnd_SeqBAIJ,
2581                                        MatSetOption_SeqBAIJ,
2582                                        MatZeroEntries_SeqBAIJ,
2583                                /* 24*/ MatZeroRows_SeqBAIJ,
2584                                        0,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                /* 29*/ MatSetUp_SeqBAIJ,
2589                                        0,
2590                                        0,
2591                                        0,
2592                                        0,
2593                                /* 34*/ MatDuplicate_SeqBAIJ,
2594                                        0,
2595                                        0,
2596                                        MatILUFactor_SeqBAIJ,
2597                                        0,
2598                                /* 39*/ MatAXPY_SeqBAIJ,
2599                                        MatCreateSubMatrices_SeqBAIJ,
2600                                        MatIncreaseOverlap_SeqBAIJ,
2601                                        MatGetValues_SeqBAIJ,
2602                                        MatCopy_SeqBAIJ,
2603                                /* 44*/ 0,
2604                                        MatScale_SeqBAIJ,
2605                                        MatShift_SeqBAIJ,
2606                                        0,
2607                                        MatZeroRowsColumns_SeqBAIJ,
2608                                /* 49*/ 0,
2609                                        MatGetRowIJ_SeqBAIJ,
2610                                        MatRestoreRowIJ_SeqBAIJ,
2611                                        MatGetColumnIJ_SeqBAIJ,
2612                                        MatRestoreColumnIJ_SeqBAIJ,
2613                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                        MatSetValuesBlocked_SeqBAIJ,
2618                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2619                                        MatDestroy_SeqBAIJ,
2620                                        MatView_SeqBAIJ,
2621                                        0,
2622                                        0,
2623                                /* 64*/ 0,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2629                                        0,
2630                                        MatConvert_Basic,
2631                                        0,
2632                                        0,
2633                                /* 74*/ 0,
2634                                        MatFDColoringApply_BAIJ,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                /* 79*/ 0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                        MatLoad_SeqBAIJ,
2643                                /* 84*/ 0,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                /* 89*/ 0,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                /* 94*/ 0,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                /* 99*/ 0,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                /*104*/ 0,
2664                                        MatRealPart_SeqBAIJ,
2665                                        MatImaginaryPart_SeqBAIJ,
2666                                        0,
2667                                        0,
2668                                /*109*/ 0,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                        MatMissingDiagonal_SeqBAIJ,
2673                                /*114*/ 0,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                /*119*/ 0,
2679                                        0,
2680                                        MatMultHermitianTranspose_SeqBAIJ,
2681                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2682                                        0,
2683                                /*124*/ 0,
2684                                        0,
2685                                        MatInvertBlockDiagonal_SeqBAIJ,
2686                                        0,
2687                                        0,
2688                                /*129*/ 0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                /*134*/ 0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                /*139*/ MatSetBlockSizes_Default,
2699                                        0,
2700                                        0,
2701                                        MatFDColoringSetUp_SeqXAIJ,
2702                                        0,
2703                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2704 };
2705 
2706 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2707 {
2708   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2709   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2710   PetscErrorCode ierr;
2711 
2712   PetscFunctionBegin;
2713   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2714 
2715   /* allocate space for values if not already there */
2716   if (!aij->saved_values) {
2717     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2718     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2719   }
2720 
2721   /* copy values over */
2722   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2723   PetscFunctionReturn(0);
2724 }
2725 
2726 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2727 {
2728   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2729   PetscErrorCode ierr;
2730   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2731 
2732   PetscFunctionBegin;
2733   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2734   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2735 
2736   /* copy values over */
2737   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2738   PetscFunctionReturn(0);
2739 }
2740 
2741 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2742 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2743 
2744 static PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2745 {
2746   Mat_SeqBAIJ    *b;
2747   PetscErrorCode ierr;
2748   PetscInt       i,mbs,nbs,bs2;
2749   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2750 
2751   PetscFunctionBegin;
2752   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2753   if (nz == MAT_SKIP_ALLOCATION) {
2754     skipallocation = PETSC_TRUE;
2755     nz             = 0;
2756   }
2757 
2758   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2759   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2760   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2761   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2762 
2763   B->preallocated = PETSC_TRUE;
2764 
2765   mbs = B->rmap->n/bs;
2766   nbs = B->cmap->n/bs;
2767   bs2 = bs*bs;
2768 
2769   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2770 
2771   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2772   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2773   if (nnz) {
2774     for (i=0; i<mbs; i++) {
2775       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2776       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2777     }
2778   }
2779 
2780   b    = (Mat_SeqBAIJ*)B->data;
2781   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2782   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2783   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2784 
2785   if (!flg) {
2786     switch (bs) {
2787     case 1:
2788       B->ops->mult    = MatMult_SeqBAIJ_1;
2789       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2790       break;
2791     case 2:
2792       B->ops->mult    = MatMult_SeqBAIJ_2;
2793       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2794       break;
2795     case 3:
2796       B->ops->mult    = MatMult_SeqBAIJ_3;
2797       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2798       break;
2799     case 4:
2800       B->ops->mult    = MatMult_SeqBAIJ_4;
2801       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2802       break;
2803     case 5:
2804       B->ops->mult    = MatMult_SeqBAIJ_5;
2805       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2806       break;
2807     case 6:
2808       B->ops->mult    = MatMult_SeqBAIJ_6;
2809       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2810       break;
2811     case 7:
2812       B->ops->mult    = MatMult_SeqBAIJ_7;
2813       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2814       break;
2815     case 11:
2816       B->ops->mult    = MatMult_SeqBAIJ_11;
2817       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2818       break;
2819     case 15:
2820       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2821       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2822       break;
2823     default:
2824       B->ops->mult    = MatMult_SeqBAIJ_N;
2825       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2826       break;
2827     }
2828   }
2829   B->ops->sor = MatSOR_SeqBAIJ;
2830   b->mbs = mbs;
2831   b->nbs = nbs;
2832   if (!skipallocation) {
2833     if (!b->imax) {
2834       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2835       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2836 
2837       b->free_imax_ilen = PETSC_TRUE;
2838     }
2839     /* b->ilen will count nonzeros in each block row so far. */
2840     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2841     if (!nnz) {
2842       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2843       else if (nz < 0) nz = 1;
2844       for (i=0; i<mbs; i++) b->imax[i] = nz;
2845       nz = nz*mbs;
2846     } else {
2847       nz = 0;
2848       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2849     }
2850 
2851     /* allocate the matrix space */
2852     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2853     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2854     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2855     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2856     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2857 
2858     b->singlemalloc = PETSC_TRUE;
2859     b->i[0]         = 0;
2860     for (i=1; i<mbs+1; i++) {
2861       b->i[i] = b->i[i-1] + b->imax[i-1];
2862     }
2863     b->free_a  = PETSC_TRUE;
2864     b->free_ij = PETSC_TRUE;
2865   } else {
2866     b->free_a  = PETSC_FALSE;
2867     b->free_ij = PETSC_FALSE;
2868   }
2869 
2870   b->bs2              = bs2;
2871   b->mbs              = mbs;
2872   b->nz               = 0;
2873   b->maxnz            = nz;
2874   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2875   B->was_assembled    = PETSC_FALSE;
2876   B->assembled        = PETSC_FALSE;
2877   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2878   PetscFunctionReturn(0);
2879 }
2880 
2881 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2882 {
2883   PetscInt       i,m,nz,nz_max=0,*nnz;
2884   PetscScalar    *values=0;
2885   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2886   PetscErrorCode ierr;
2887 
2888   PetscFunctionBegin;
2889   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2890   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2891   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2892   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2893   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2894   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2895   m    = B->rmap->n/bs;
2896 
2897   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2898   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2899   for (i=0; i<m; i++) {
2900     nz = ii[i+1]- ii[i];
2901     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2902     nz_max = PetscMax(nz_max, nz);
2903     nnz[i] = nz;
2904   }
2905   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2906   ierr = PetscFree(nnz);CHKERRQ(ierr);
2907 
2908   values = (PetscScalar*)V;
2909   if (!values) {
2910     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2911   }
2912   for (i=0; i<m; i++) {
2913     PetscInt          ncols  = ii[i+1] - ii[i];
2914     const PetscInt    *icols = jj + ii[i];
2915     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2916     if (!roworiented) {
2917       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2918     } else {
2919       PetscInt j;
2920       for (j=0; j<ncols; j++) {
2921         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2922         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2923       }
2924     }
2925   }
2926   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2927   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2928   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2929   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2930   PetscFunctionReturn(0);
2931 }
2932 
2933 /*MC
2934    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2935    block sparse compressed row format.
2936 
2937    Options Database Keys:
2938 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2939 
2940   Level: beginner
2941 
2942 .seealso: MatCreateSeqBAIJ()
2943 M*/
2944 
2945 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
2946 
2947 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2948 {
2949   PetscErrorCode ierr;
2950   PetscMPIInt    size;
2951   Mat_SeqBAIJ    *b;
2952 
2953   PetscFunctionBegin;
2954   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2955   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2956 
2957   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2958   B->data = (void*)b;
2959   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2960 
2961   b->row          = 0;
2962   b->col          = 0;
2963   b->icol         = 0;
2964   b->reallocs     = 0;
2965   b->saved_values = 0;
2966 
2967   b->roworiented        = PETSC_TRUE;
2968   b->nonew              = 0;
2969   b->diag               = 0;
2970   B->spptr              = 0;
2971   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
2972   b->keepnonzeropattern = PETSC_FALSE;
2973 
2974   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2975   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2976   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2977   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2978   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2979   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2980   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2981   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
2982   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
2983 #if defined(PETSC_HAVE_HYPRE)
2984   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
2985 #endif
2986   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
2991 {
2992   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2993   PetscErrorCode ierr;
2994   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2995 
2996   PetscFunctionBegin;
2997   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
2998 
2999   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3000     c->imax           = a->imax;
3001     c->ilen           = a->ilen;
3002     c->free_imax_ilen = PETSC_FALSE;
3003   } else {
3004     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3005     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3006     for (i=0; i<mbs; i++) {
3007       c->imax[i] = a->imax[i];
3008       c->ilen[i] = a->ilen[i];
3009     }
3010     c->free_imax_ilen = PETSC_TRUE;
3011   }
3012 
3013   /* allocate the matrix space */
3014   if (mallocmatspace) {
3015     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3016       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3017       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3018 
3019       c->i            = a->i;
3020       c->j            = a->j;
3021       c->singlemalloc = PETSC_FALSE;
3022       c->free_a       = PETSC_TRUE;
3023       c->free_ij      = PETSC_FALSE;
3024       c->parent       = A;
3025       C->preallocated = PETSC_TRUE;
3026       C->assembled    = PETSC_TRUE;
3027 
3028       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3029       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3030       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3031     } else {
3032       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3033       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3034 
3035       c->singlemalloc = PETSC_TRUE;
3036       c->free_a       = PETSC_TRUE;
3037       c->free_ij      = PETSC_TRUE;
3038 
3039       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3040       if (mbs > 0) {
3041         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3042         if (cpvalues == MAT_COPY_VALUES) {
3043           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3044         } else {
3045           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3046         }
3047       }
3048       C->preallocated = PETSC_TRUE;
3049       C->assembled    = PETSC_TRUE;
3050     }
3051   }
3052 
3053   c->roworiented = a->roworiented;
3054   c->nonew       = a->nonew;
3055 
3056   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3057   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3058 
3059   c->bs2         = a->bs2;
3060   c->mbs         = a->mbs;
3061   c->nbs         = a->nbs;
3062 
3063   if (a->diag) {
3064     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3065       c->diag      = a->diag;
3066       c->free_diag = PETSC_FALSE;
3067     } else {
3068       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3069       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3070       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3071       c->free_diag = PETSC_TRUE;
3072     }
3073   } else c->diag = 0;
3074 
3075   c->nz         = a->nz;
3076   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3077   c->solve_work = NULL;
3078   c->mult_work  = NULL;
3079   c->sor_workt  = NULL;
3080   c->sor_work   = NULL;
3081 
3082   c->compressedrow.use   = a->compressedrow.use;
3083   c->compressedrow.nrows = a->compressedrow.nrows;
3084   if (a->compressedrow.use) {
3085     i    = a->compressedrow.nrows;
3086     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3087     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3088     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3089     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3090   } else {
3091     c->compressedrow.use    = PETSC_FALSE;
3092     c->compressedrow.i      = NULL;
3093     c->compressedrow.rindex = NULL;
3094   }
3095   C->nonzerostate = A->nonzerostate;
3096 
3097   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3098   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3099   PetscFunctionReturn(0);
3100 }
3101 
3102 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3103 {
3104   PetscErrorCode ierr;
3105 
3106   PetscFunctionBegin;
3107   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3108   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3109   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3110   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3111   PetscFunctionReturn(0);
3112 }
3113 
3114 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3115 {
3116   Mat_SeqBAIJ    *a;
3117   PetscErrorCode ierr;
3118   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3119   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3120   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3121   PetscInt       *masked,nmask,tmp,bs2,ishift;
3122   PetscMPIInt    size;
3123   int            fd;
3124   PetscScalar    *aa;
3125   MPI_Comm       comm;
3126 
3127   PetscFunctionBegin;
3128   /* force binary viewer to load .info file if it has not yet done so */
3129   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3130   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3131   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3132   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3133   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3134   if (bs < 0) bs = 1;
3135   bs2  = bs*bs;
3136 
3137   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3138   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3139   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3140   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3141   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3142   M = header[1]; N = header[2]; nz = header[3];
3143 
3144   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3145   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3146 
3147   /*
3148      This code adds extra rows to make sure the number of rows is
3149     divisible by the blocksize
3150   */
3151   mbs        = M/bs;
3152   extra_rows = bs - M + bs*(mbs);
3153   if (extra_rows == bs) extra_rows = 0;
3154   else mbs++;
3155   if (extra_rows) {
3156     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3157   }
3158 
3159   /* Set global sizes if not already set */
3160   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3161     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3162   } else { /* Check if the matrix global sizes are correct */
3163     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3164     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3165       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3166     }
3167     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3168   }
3169 
3170   /* read in row lengths */
3171   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3172   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3173   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3174 
3175   /* read in column indices */
3176   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3177   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3178   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3179 
3180   /* loop over row lengths determining block row lengths */
3181   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3182   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3183   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3184   rowcount = 0;
3185   nzcount  = 0;
3186   for (i=0; i<mbs; i++) {
3187     nmask = 0;
3188     for (j=0; j<bs; j++) {
3189       kmax = rowlengths[rowcount];
3190       for (k=0; k<kmax; k++) {
3191         tmp = jj[nzcount++]/bs;
3192         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3193       }
3194       rowcount++;
3195     }
3196     browlengths[i] += nmask;
3197     /* zero out the mask elements we set */
3198     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3199   }
3200 
3201   /* Do preallocation  */
3202   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3203   a    = (Mat_SeqBAIJ*)newmat->data;
3204 
3205   /* set matrix "i" values */
3206   a->i[0] = 0;
3207   for (i=1; i<= mbs; i++) {
3208     a->i[i]      = a->i[i-1] + browlengths[i-1];
3209     a->ilen[i-1] = browlengths[i-1];
3210   }
3211   a->nz = 0;
3212   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3213 
3214   /* read in nonzero values */
3215   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3216   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3217   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3218 
3219   /* set "a" and "j" values into matrix */
3220   nzcount = 0; jcount = 0;
3221   for (i=0; i<mbs; i++) {
3222     nzcountb = nzcount;
3223     nmask    = 0;
3224     for (j=0; j<bs; j++) {
3225       kmax = rowlengths[i*bs+j];
3226       for (k=0; k<kmax; k++) {
3227         tmp = jj[nzcount++]/bs;
3228         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3229       }
3230     }
3231     /* sort the masked values */
3232     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3233 
3234     /* set "j" values into matrix */
3235     maskcount = 1;
3236     for (j=0; j<nmask; j++) {
3237       a->j[jcount++]  = masked[j];
3238       mask[masked[j]] = maskcount++;
3239     }
3240     /* set "a" values into matrix */
3241     ishift = bs2*a->i[i];
3242     for (j=0; j<bs; j++) {
3243       kmax = rowlengths[i*bs+j];
3244       for (k=0; k<kmax; k++) {
3245         tmp       = jj[nzcountb]/bs;
3246         block     = mask[tmp] - 1;
3247         point     = jj[nzcountb] - bs*tmp;
3248         idx       = ishift + bs2*block + j + bs*point;
3249         a->a[idx] = (MatScalar)aa[nzcountb++];
3250       }
3251     }
3252     /* zero out the mask elements we set */
3253     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3254   }
3255   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3256 
3257   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3258   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3259   ierr = PetscFree(aa);CHKERRQ(ierr);
3260   ierr = PetscFree(jj);CHKERRQ(ierr);
3261   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3262 
3263   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3264   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3265   PetscFunctionReturn(0);
3266 }
3267 
3268 /*@C
3269    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3270    compressed row) format.  For good matrix assembly performance the
3271    user should preallocate the matrix storage by setting the parameter nz
3272    (or the array nnz).  By setting these parameters accurately, performance
3273    during matrix assembly can be increased by more than a factor of 50.
3274 
3275    Collective on MPI_Comm
3276 
3277    Input Parameters:
3278 +  comm - MPI communicator, set to PETSC_COMM_SELF
3279 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3280           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3281 .  m - number of rows
3282 .  n - number of columns
3283 .  nz - number of nonzero blocks  per block row (same for all rows)
3284 -  nnz - array containing the number of nonzero blocks in the various block rows
3285          (possibly different for each block row) or NULL
3286 
3287    Output Parameter:
3288 .  A - the matrix
3289 
3290    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3291    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3292    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3293 
3294    Options Database Keys:
3295 .   -mat_no_unroll - uses code that does not unroll the loops in the
3296                      block calculations (much slower)
3297 .    -mat_block_size - size of the blocks to use
3298 
3299    Level: intermediate
3300 
3301    Notes:
3302    The number of rows and columns must be divisible by blocksize.
3303 
3304    If the nnz parameter is given then the nz parameter is ignored
3305 
3306    A nonzero block is any block that as 1 or more nonzeros in it
3307 
3308    The block AIJ format is fully compatible with standard Fortran 77
3309    storage.  That is, the stored row and column indices can begin at
3310    either one (as in Fortran) or zero.  See the users' manual for details.
3311 
3312    Specify the preallocated storage with either nz or nnz (not both).
3313    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3314    allocation.  See Users-Manual: ch_mat for details.
3315    matrices.
3316 
3317 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3318 @*/
3319 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3320 {
3321   PetscErrorCode ierr;
3322 
3323   PetscFunctionBegin;
3324   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3325   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3326   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3327   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3328   PetscFunctionReturn(0);
3329 }
3330 
3331 /*@C
3332    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3333    per row in the matrix. For good matrix assembly performance the
3334    user should preallocate the matrix storage by setting the parameter nz
3335    (or the array nnz).  By setting these parameters accurately, performance
3336    during matrix assembly can be increased by more than a factor of 50.
3337 
3338    Collective on MPI_Comm
3339 
3340    Input Parameters:
3341 +  B - the matrix
3342 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3343           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3344 .  nz - number of block nonzeros per block row (same for all rows)
3345 -  nnz - array containing the number of block nonzeros in the various block rows
3346          (possibly different for each block row) or NULL
3347 
3348    Options Database Keys:
3349 .   -mat_no_unroll - uses code that does not unroll the loops in the
3350                      block calculations (much slower)
3351 .   -mat_block_size - size of the blocks to use
3352 
3353    Level: intermediate
3354 
3355    Notes:
3356    If the nnz parameter is given then the nz parameter is ignored
3357 
3358    You can call MatGetInfo() to get information on how effective the preallocation was;
3359    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3360    You can also run with the option -info and look for messages with the string
3361    malloc in them to see if additional memory allocation was needed.
3362 
3363    The block AIJ format is fully compatible with standard Fortran 77
3364    storage.  That is, the stored row and column indices can begin at
3365    either one (as in Fortran) or zero.  See the users' manual for details.
3366 
3367    Specify the preallocated storage with either nz or nnz (not both).
3368    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3369    allocation.  See Users-Manual: ch_mat for details.
3370 
3371 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3372 @*/
3373 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3374 {
3375   PetscErrorCode ierr;
3376 
3377   PetscFunctionBegin;
3378   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3379   PetscValidType(B,1);
3380   PetscValidLogicalCollectiveInt(B,bs,2);
3381   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3382   PetscFunctionReturn(0);
3383 }
3384 
3385 /*@C
3386    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3387    (the default sequential PETSc format).
3388 
3389    Collective on MPI_Comm
3390 
3391    Input Parameters:
3392 +  B - the matrix
3393 .  i - the indices into j for the start of each local row (starts with zero)
3394 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3395 -  v - optional values in the matrix
3396 
3397    Level: developer
3398 
3399    Notes:
3400    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3401    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3402    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3403    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3404    block column and the second index is over columns within a block.
3405 
3406 .keywords: matrix, aij, compressed row, sparse
3407 
3408 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3409 @*/
3410 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3411 {
3412   PetscErrorCode ierr;
3413 
3414   PetscFunctionBegin;
3415   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3416   PetscValidType(B,1);
3417   PetscValidLogicalCollectiveInt(B,bs,2);
3418   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3419   PetscFunctionReturn(0);
3420 }
3421 
3422 
3423 /*@
3424      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3425 
3426      Collective on MPI_Comm
3427 
3428    Input Parameters:
3429 +  comm - must be an MPI communicator of size 1
3430 .  bs - size of block
3431 .  m - number of rows
3432 .  n - number of columns
3433 .  i - row indices
3434 .  j - column indices
3435 -  a - matrix values
3436 
3437    Output Parameter:
3438 .  mat - the matrix
3439 
3440    Level: advanced
3441 
3442    Notes:
3443        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3444     once the matrix is destroyed
3445 
3446        You cannot set new nonzero locations into this matrix, that will generate an error.
3447 
3448        The i and j indices are 0 based
3449 
3450        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3451 
3452       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3453       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3454       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3455       with column-major ordering within blocks.
3456 
3457 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3458 
3459 @*/
3460 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3461 {
3462   PetscErrorCode ierr;
3463   PetscInt       ii;
3464   Mat_SeqBAIJ    *baij;
3465 
3466   PetscFunctionBegin;
3467   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3468   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3469 
3470   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3471   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3472   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3473   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3474   baij = (Mat_SeqBAIJ*)(*mat)->data;
3475   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3476   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3477 
3478   baij->i = i;
3479   baij->j = j;
3480   baij->a = a;
3481 
3482   baij->singlemalloc = PETSC_FALSE;
3483   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3484   baij->free_a       = PETSC_FALSE;
3485   baij->free_ij      = PETSC_FALSE;
3486 
3487   for (ii=0; ii<m; ii++) {
3488     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3489 #if defined(PETSC_USE_DEBUG)
3490     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3491 #endif
3492   }
3493 #if defined(PETSC_USE_DEBUG)
3494   for (ii=0; ii<baij->i[m]; ii++) {
3495     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3496     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3497   }
3498 #endif
3499 
3500   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3501   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3502   PetscFunctionReturn(0);
3503 }
3504 
3505 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3506 {
3507   PetscErrorCode ierr;
3508   PetscMPIInt    size;
3509 
3510   PetscFunctionBegin;
3511   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3512   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3513     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3514   } else {
3515     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3516   }
3517   PetscFunctionReturn(0);
3518 }
3519