xref: /petsc/src/mat/impls/baij/seq/baij.c (revision a8f87f1dd689f82917d46bbc2fdc0da7b40626d1)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
20 
21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
22 {
23   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
24   PetscErrorCode ierr;
25   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
26   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
27   PetscReal      shift = 0.0;
28   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
29 
30   PetscFunctionBegin;
31   allowzeropivot = PetscNot(A->erroriffailure);
32 
33   if (a->idiagvalid) {
34     if (values) *values = a->idiag;
35     PetscFunctionReturn(0);
36   }
37   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
38   diag_offset = a->diag;
39   if (!a->idiag) {
40     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
41     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
42   }
43   diag  = a->idiag;
44   if (values) *values = a->idiag;
45   /* factor and invert each block */
46   switch (bs) {
47   case 1:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 1*diag_offset[i];
50       diag[0]  = odiag[0];
51 
52       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
53         if (allowzeropivot) {
54           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
55           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
56           A->factorerror_zeropivot_row   = i;
57           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
58         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
59       }
60 
61       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
62       diag    += 1;
63     }
64     break;
65   case 2:
66     for (i=0; i<mbs; i++) {
67       odiag    = v + 4*diag_offset[i];
68       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
70       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
71       diag    += 4;
72     }
73     break;
74   case 3:
75     for (i=0; i<mbs; i++) {
76       odiag    = v + 9*diag_offset[i];
77       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79       diag[8]  = odiag[8];
80       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
81       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
82       diag    += 9;
83     }
84     break;
85   case 4:
86     for (i=0; i<mbs; i++) {
87       odiag  = v + 16*diag_offset[i];
88       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92     }
93     break;
94   case 5:
95     for (i=0; i<mbs; i++) {
96       odiag  = v + 25*diag_offset[i];
97       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
98       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
99       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
100       diag  += 25;
101     }
102     break;
103   case 6:
104     for (i=0; i<mbs; i++) {
105       odiag  = v + 36*diag_offset[i];
106       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
107       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
108       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
109       diag  += 36;
110     }
111     break;
112   case 7:
113     for (i=0; i<mbs; i++) {
114       odiag  = v + 49*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
117       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118       diag  += 49;
119     }
120     break;
121   default:
122     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
123     for (i=0; i<mbs; i++) {
124       odiag  = v + bs2*diag_offset[i];
125       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += bs2;
129     }
130     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
131   }
132   a->idiagvalid = PETSC_TRUE;
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
137 {
138   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
139   PetscScalar       *x,*work,*w,*workt,*t;
140   const MatScalar   *v,*aa = a->a, *idiag;
141   const PetscScalar *b,*xb;
142   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
143   PetscErrorCode    ierr;
144   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
145   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
146 
147   PetscFunctionBegin;
148   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
149   its = its*lits;
150   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
151   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
152   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
153   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
154   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
155 
156   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
157 
158   if (!m) PetscFunctionReturn(0);
159   diag  = a->diag;
160   idiag = a->idiag;
161   k    = PetscMax(A->rmap->n,A->cmap->n);
162   if (!a->mult_work) {
163     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
164   }
165   if (!a->sor_workt) {
166     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
167   }
168   if (!a->sor_work) {
169     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
170   }
171   work = a->mult_work;
172   t    = a->sor_workt;
173   w    = a->sor_work;
174 
175   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
176   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
177 
178   if (flag & SOR_ZERO_INITIAL_GUESS) {
179     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
180       switch (bs) {
181       case 1:
182         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
183         t[0] = b[0];
184         i2     = 1;
185         idiag += 1;
186         for (i=1; i<m; i++) {
187           v  = aa + ai[i];
188           vi = aj + ai[i];
189           nz = diag[i] - ai[i];
190           s[0] = b[i2];
191           for (j=0; j<nz; j++) {
192             xw[0] = x[vi[j]];
193             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
194           }
195           t[i2] = s[0];
196           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
197           x[i2]  = xw[0];
198           idiag += 1;
199           i2    += 1;
200         }
201         break;
202       case 2:
203         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
204         t[0] = b[0]; t[1] = b[1];
205         i2     = 2;
206         idiag += 4;
207         for (i=1; i<m; i++) {
208           v  = aa + 4*ai[i];
209           vi = aj + ai[i];
210           nz = diag[i] - ai[i];
211           s[0] = b[i2]; s[1] = b[i2+1];
212           for (j=0; j<nz; j++) {
213             idx = 2*vi[j];
214             it  = 4*j;
215             xw[0] = x[idx]; xw[1] = x[1+idx];
216             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
217           }
218           t[i2] = s[0]; t[i2+1] = s[1];
219           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
220           x[i2]   = xw[0]; x[i2+1] = xw[1];
221           idiag  += 4;
222           i2     += 2;
223         }
224         break;
225       case 3:
226         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
227         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
228         i2     = 3;
229         idiag += 9;
230         for (i=1; i<m; i++) {
231           v  = aa + 9*ai[i];
232           vi = aj + ai[i];
233           nz = diag[i] - ai[i];
234           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
235           while (nz--) {
236             idx = 3*(*vi++);
237             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
238             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
239             v  += 9;
240           }
241           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
242           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
243           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
244           idiag  += 9;
245           i2     += 3;
246         }
247         break;
248       case 4:
249         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
250         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
251         i2     = 4;
252         idiag += 16;
253         for (i=1; i<m; i++) {
254           v  = aa + 16*ai[i];
255           vi = aj + ai[i];
256           nz = diag[i] - ai[i];
257           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
258           while (nz--) {
259             idx = 4*(*vi++);
260             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
261             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
262             v  += 16;
263           }
264           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
265           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
266           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
267           idiag  += 16;
268           i2     += 4;
269         }
270         break;
271       case 5:
272         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
273         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
274         i2     = 5;
275         idiag += 25;
276         for (i=1; i<m; i++) {
277           v  = aa + 25*ai[i];
278           vi = aj + ai[i];
279           nz = diag[i] - ai[i];
280           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
281           while (nz--) {
282             idx = 5*(*vi++);
283             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
284             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
285             v  += 25;
286           }
287           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
288           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
289           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
290           idiag  += 25;
291           i2     += 5;
292         }
293         break;
294       case 6:
295         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
296         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
297         i2     = 6;
298         idiag += 36;
299         for (i=1; i<m; i++) {
300           v  = aa + 36*ai[i];
301           vi = aj + ai[i];
302           nz = diag[i] - ai[i];
303           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
304           while (nz--) {
305             idx = 6*(*vi++);
306             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
307             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
308             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
309             v  += 36;
310           }
311           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
312           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
313           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
314           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
315           idiag  += 36;
316           i2     += 6;
317         }
318         break;
319       case 7:
320         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
321         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
322         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
323         i2     = 7;
324         idiag += 49;
325         for (i=1; i<m; i++) {
326           v  = aa + 49*ai[i];
327           vi = aj + ai[i];
328           nz = diag[i] - ai[i];
329           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
330           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
331           while (nz--) {
332             idx = 7*(*vi++);
333             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
334             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
335             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
336             v  += 49;
337           }
338           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
339           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
340           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
341           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
342           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
343           idiag  += 49;
344           i2     += 7;
345         }
346         break;
347       default:
348         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
349         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
350         i2     = bs;
351         idiag += bs2;
352         for (i=1; i<m; i++) {
353           v  = aa + bs2*ai[i];
354           vi = aj + ai[i];
355           nz = diag[i] - ai[i];
356 
357           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
358           /* copy all rows of x that are needed into contiguous space */
359           workt = work;
360           for (j=0; j<nz; j++) {
361             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
362             workt += bs;
363           }
364           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
365           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
366           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
367 
368           idiag += bs2;
369           i2    += bs;
370         }
371         break;
372       }
373       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
374       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
375       xb = t;
376     }
377     else xb = b;
378     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
379       idiag = a->idiag+bs2*(a->mbs-1);
380       i2 = bs * (m-1);
381       switch (bs) {
382       case 1:
383         s[0]  = xb[i2];
384         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
385         x[i2] = xw[0];
386         i2   -= 1;
387         for (i=m-2; i>=0; i--) {
388           v  = aa + (diag[i]+1);
389           vi = aj + diag[i] + 1;
390           nz = ai[i+1] - diag[i] - 1;
391           s[0] = xb[i2];
392           for (j=0; j<nz; j++) {
393             xw[0] = x[vi[j]];
394             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
395           }
396           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
397           x[i2]  = xw[0];
398           idiag -= 1;
399           i2    -= 1;
400         }
401         break;
402       case 2:
403         s[0]  = xb[i2]; s[1] = xb[i2+1];
404         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
405         x[i2] = xw[0]; x[i2+1] = xw[1];
406         i2    -= 2;
407         idiag -= 4;
408         for (i=m-2; i>=0; i--) {
409           v  = aa + 4*(diag[i] + 1);
410           vi = aj + diag[i] + 1;
411           nz = ai[i+1] - diag[i] - 1;
412           s[0] = xb[i2]; s[1] = xb[i2+1];
413           for (j=0; j<nz; j++) {
414             idx = 2*vi[j];
415             it  = 4*j;
416             xw[0] = x[idx]; xw[1] = x[1+idx];
417             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
418           }
419           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
420           x[i2]   = xw[0]; x[i2+1] = xw[1];
421           idiag  -= 4;
422           i2     -= 2;
423         }
424         break;
425       case 3:
426         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
427         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
428         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
429         i2    -= 3;
430         idiag -= 9;
431         for (i=m-2; i>=0; i--) {
432           v  = aa + 9*(diag[i]+1);
433           vi = aj + diag[i] + 1;
434           nz = ai[i+1] - diag[i] - 1;
435           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
436           while (nz--) {
437             idx = 3*(*vi++);
438             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
439             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
440             v  += 9;
441           }
442           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
443           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
444           idiag  -= 9;
445           i2     -= 3;
446         }
447         break;
448       case 4:
449         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
450         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
451         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
452         i2    -= 4;
453         idiag -= 16;
454         for (i=m-2; i>=0; i--) {
455           v  = aa + 16*(diag[i]+1);
456           vi = aj + diag[i] + 1;
457           nz = ai[i+1] - diag[i] - 1;
458           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
459           while (nz--) {
460             idx = 4*(*vi++);
461             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
462             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
463             v  += 16;
464           }
465           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
466           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
467           idiag  -= 16;
468           i2     -= 4;
469         }
470         break;
471       case 5:
472         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
473         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
474         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
475         i2    -= 5;
476         idiag -= 25;
477         for (i=m-2; i>=0; i--) {
478           v  = aa + 25*(diag[i]+1);
479           vi = aj + diag[i] + 1;
480           nz = ai[i+1] - diag[i] - 1;
481           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
482           while (nz--) {
483             idx = 5*(*vi++);
484             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
485             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
486             v  += 25;
487           }
488           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
489           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
490           idiag  -= 25;
491           i2     -= 5;
492         }
493         break;
494       case 6:
495         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
496         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
497         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
498         i2    -= 6;
499         idiag -= 36;
500         for (i=m-2; i>=0; i--) {
501           v  = aa + 36*(diag[i]+1);
502           vi = aj + diag[i] + 1;
503           nz = ai[i+1] - diag[i] - 1;
504           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
505           while (nz--) {
506             idx = 6*(*vi++);
507             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
508             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
509             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
510             v  += 36;
511           }
512           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
513           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
514           idiag  -= 36;
515           i2     -= 6;
516         }
517         break;
518       case 7:
519         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
520         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
521         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
522         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
523         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
524         i2    -= 7;
525         idiag -= 49;
526         for (i=m-2; i>=0; i--) {
527           v  = aa + 49*(diag[i]+1);
528           vi = aj + diag[i] + 1;
529           nz = ai[i+1] - diag[i] - 1;
530           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
531           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
532           while (nz--) {
533             idx = 7*(*vi++);
534             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
535             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
536             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
537             v  += 49;
538           }
539           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
540           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
541           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
542           idiag  -= 49;
543           i2     -= 7;
544         }
545         break;
546       default:
547         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
548         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
549         i2    -= bs;
550         idiag -= bs2;
551         for (i=m-2; i>=0; i--) {
552           v  = aa + bs2*(diag[i]+1);
553           vi = aj + diag[i] + 1;
554           nz = ai[i+1] - diag[i] - 1;
555 
556           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
557           /* copy all rows of x that are needed into contiguous space */
558           workt = work;
559           for (j=0; j<nz; j++) {
560             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
561             workt += bs;
562           }
563           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
564           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
565 
566           idiag -= bs2;
567           i2    -= bs;
568         }
569         break;
570       }
571       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
572     }
573     its--;
574   }
575   while (its--) {
576     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
577       idiag = a->idiag;
578       i2 = 0;
579       switch (bs) {
580       case 1:
581         for (i=0; i<m; i++) {
582           v  = aa + ai[i];
583           vi = aj + ai[i];
584           nz = ai[i+1] - ai[i];
585           s[0] = b[i2];
586           for (j=0; j<nz; j++) {
587             xw[0] = x[vi[j]];
588             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
589           }
590           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
591           x[i2] += xw[0];
592           idiag += 1;
593           i2    += 1;
594         }
595         break;
596       case 2:
597         for (i=0; i<m; i++) {
598           v  = aa + 4*ai[i];
599           vi = aj + ai[i];
600           nz = ai[i+1] - ai[i];
601           s[0] = b[i2]; s[1] = b[i2+1];
602           for (j=0; j<nz; j++) {
603             idx = 2*vi[j];
604             it  = 4*j;
605             xw[0] = x[idx]; xw[1] = x[1+idx];
606             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
607           }
608           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
609           x[i2]  += xw[0]; x[i2+1] += xw[1];
610           idiag  += 4;
611           i2     += 2;
612         }
613         break;
614       case 3:
615         for (i=0; i<m; i++) {
616           v  = aa + 9*ai[i];
617           vi = aj + ai[i];
618           nz = ai[i+1] - ai[i];
619           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
620           while (nz--) {
621             idx = 3*(*vi++);
622             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
623             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
624             v  += 9;
625           }
626           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
627           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
628           idiag  += 9;
629           i2     += 3;
630         }
631         break;
632       case 4:
633         for (i=0; i<m; i++) {
634           v  = aa + 16*ai[i];
635           vi = aj + ai[i];
636           nz = ai[i+1] - ai[i];
637           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
638           while (nz--) {
639             idx = 4*(*vi++);
640             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
641             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
642             v  += 16;
643           }
644           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
645           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
646           idiag  += 16;
647           i2     += 4;
648         }
649         break;
650       case 5:
651         for (i=0; i<m; i++) {
652           v  = aa + 25*ai[i];
653           vi = aj + ai[i];
654           nz = ai[i+1] - ai[i];
655           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
656           while (nz--) {
657             idx = 5*(*vi++);
658             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
659             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
660             v  += 25;
661           }
662           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
663           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
664           idiag  += 25;
665           i2     += 5;
666         }
667         break;
668       case 6:
669         for (i=0; i<m; i++) {
670           v  = aa + 36*ai[i];
671           vi = aj + ai[i];
672           nz = ai[i+1] - ai[i];
673           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
674           while (nz--) {
675             idx = 6*(*vi++);
676             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
677             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
678             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
679             v  += 36;
680           }
681           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
682           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
683           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
684           idiag  += 36;
685           i2     += 6;
686         }
687         break;
688       case 7:
689         for (i=0; i<m; i++) {
690           v  = aa + 49*ai[i];
691           vi = aj + ai[i];
692           nz = ai[i+1] - ai[i];
693           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
694           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
695           while (nz--) {
696             idx = 7*(*vi++);
697             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
698             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
699             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
700             v  += 49;
701           }
702           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
703           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
704           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
705           idiag  += 49;
706           i2     += 7;
707         }
708         break;
709       default:
710         for (i=0; i<m; i++) {
711           v  = aa + bs2*ai[i];
712           vi = aj + ai[i];
713           nz = ai[i+1] - ai[i];
714 
715           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
716           /* copy all rows of x that are needed into contiguous space */
717           workt = work;
718           for (j=0; j<nz; j++) {
719             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
720             workt += bs;
721           }
722           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
723           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
724 
725           idiag += bs2;
726           i2    += bs;
727         }
728         break;
729       }
730       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
731     }
732     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
733       idiag = a->idiag+bs2*(a->mbs-1);
734       i2 = bs * (m-1);
735       switch (bs) {
736       case 1:
737         for (i=m-1; i>=0; i--) {
738           v  = aa + ai[i];
739           vi = aj + ai[i];
740           nz = ai[i+1] - ai[i];
741           s[0] = b[i2];
742           for (j=0; j<nz; j++) {
743             xw[0] = x[vi[j]];
744             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
745           }
746           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
747           x[i2] += xw[0];
748           idiag -= 1;
749           i2    -= 1;
750         }
751         break;
752       case 2:
753         for (i=m-1; i>=0; i--) {
754           v  = aa + 4*ai[i];
755           vi = aj + ai[i];
756           nz = ai[i+1] - ai[i];
757           s[0] = b[i2]; s[1] = b[i2+1];
758           for (j=0; j<nz; j++) {
759             idx = 2*vi[j];
760             it  = 4*j;
761             xw[0] = x[idx]; xw[1] = x[1+idx];
762             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
763           }
764           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
765           x[i2]  += xw[0]; x[i2+1] += xw[1];
766           idiag  -= 4;
767           i2     -= 2;
768         }
769         break;
770       case 3:
771         for (i=m-1; i>=0; i--) {
772           v  = aa + 9*ai[i];
773           vi = aj + ai[i];
774           nz = ai[i+1] - ai[i];
775           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
776           while (nz--) {
777             idx = 3*(*vi++);
778             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
779             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
780             v  += 9;
781           }
782           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
783           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
784           idiag  -= 9;
785           i2     -= 3;
786         }
787         break;
788       case 4:
789         for (i=m-1; i>=0; i--) {
790           v  = aa + 16*ai[i];
791           vi = aj + ai[i];
792           nz = ai[i+1] - ai[i];
793           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
794           while (nz--) {
795             idx = 4*(*vi++);
796             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
797             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
798             v  += 16;
799           }
800           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
801           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
802           idiag  -= 16;
803           i2     -= 4;
804         }
805         break;
806       case 5:
807         for (i=m-1; i>=0; i--) {
808           v  = aa + 25*ai[i];
809           vi = aj + ai[i];
810           nz = ai[i+1] - ai[i];
811           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
812           while (nz--) {
813             idx = 5*(*vi++);
814             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
815             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
816             v  += 25;
817           }
818           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
819           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
820           idiag  -= 25;
821           i2     -= 5;
822         }
823         break;
824       case 6:
825         for (i=m-1; i>=0; i--) {
826           v  = aa + 36*ai[i];
827           vi = aj + ai[i];
828           nz = ai[i+1] - ai[i];
829           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
830           while (nz--) {
831             idx = 6*(*vi++);
832             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
833             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
834             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
835             v  += 36;
836           }
837           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
838           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
839           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
840           idiag  -= 36;
841           i2     -= 6;
842         }
843         break;
844       case 7:
845         for (i=m-1; i>=0; i--) {
846           v  = aa + 49*ai[i];
847           vi = aj + ai[i];
848           nz = ai[i+1] - ai[i];
849           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
850           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
851           while (nz--) {
852             idx = 7*(*vi++);
853             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
854             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
855             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
856             v  += 49;
857           }
858           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
859           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
860           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
861           idiag  -= 49;
862           i2     -= 7;
863         }
864         break;
865       default:
866         for (i=m-1; i>=0; i--) {
867           v  = aa + bs2*ai[i];
868           vi = aj + ai[i];
869           nz = ai[i+1] - ai[i];
870 
871           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
872           /* copy all rows of x that are needed into contiguous space */
873           workt = work;
874           for (j=0; j<nz; j++) {
875             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
876             workt += bs;
877           }
878           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
879           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
880 
881           idiag -= bs2;
882           i2    -= bs;
883         }
884         break;
885       }
886       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
887     }
888   }
889   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
890   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
891   PetscFunctionReturn(0);
892 }
893 
894 
895 /*
896     Special version for direct calls from Fortran (Used in PETSc-fun3d)
897 */
898 #if defined(PETSC_HAVE_FORTRAN_CAPS)
899 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
900 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
901 #define matsetvaluesblocked4_ matsetvaluesblocked4
902 #endif
903 
904 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
905 {
906   Mat               A  = *AA;
907   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
908   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
909   PetscInt          *ai    =a->i,*ailen=a->ilen;
910   PetscInt          *aj    =a->j,stepval,lastcol = -1;
911   const PetscScalar *value = v;
912   MatScalar         *ap,*aa = a->a,*bap;
913 
914   PetscFunctionBegin;
915   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
916   stepval = (n-1)*4;
917   for (k=0; k<m; k++) { /* loop over added rows */
918     row  = im[k];
919     rp   = aj + ai[row];
920     ap   = aa + 16*ai[row];
921     nrow = ailen[row];
922     low  = 0;
923     high = nrow;
924     for (l=0; l<n; l++) { /* loop over added columns */
925       col = in[l];
926       if (col <= lastcol)  low = 0;
927       else                high = nrow;
928       lastcol = col;
929       value   = v + k*(stepval+4 + l)*4;
930       while (high-low > 7) {
931         t = (low+high)/2;
932         if (rp[t] > col) high = t;
933         else             low  = t;
934       }
935       for (i=low; i<high; i++) {
936         if (rp[i] > col) break;
937         if (rp[i] == col) {
938           bap = ap +  16*i;
939           for (ii=0; ii<4; ii++,value+=stepval) {
940             for (jj=ii; jj<16; jj+=4) {
941               bap[jj] += *value++;
942             }
943           }
944           goto noinsert2;
945         }
946       }
947       N = nrow++ - 1;
948       high++; /* added new column index thus must search to one higher than before */
949       /* shift up all the later entries in this row */
950       for (ii=N; ii>=i; ii--) {
951         rp[ii+1] = rp[ii];
952         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
953       }
954       if (N >= i) {
955         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
956       }
957       rp[i] = col;
958       bap   = ap +  16*i;
959       for (ii=0; ii<4; ii++,value+=stepval) {
960         for (jj=ii; jj<16; jj+=4) {
961           bap[jj] = *value++;
962         }
963       }
964       noinsert2:;
965       low = i;
966     }
967     ailen[row] = nrow;
968   }
969   PetscFunctionReturnVoid();
970 }
971 
972 #if defined(PETSC_HAVE_FORTRAN_CAPS)
973 #define matsetvalues4_ MATSETVALUES4
974 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
975 #define matsetvalues4_ matsetvalues4
976 #endif
977 
978 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
979 {
980   Mat         A  = *AA;
981   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
982   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
983   PetscInt    *ai=a->i,*ailen=a->ilen;
984   PetscInt    *aj=a->j,brow,bcol;
985   PetscInt    ridx,cidx,lastcol = -1;
986   MatScalar   *ap,value,*aa=a->a,*bap;
987 
988   PetscFunctionBegin;
989   for (k=0; k<m; k++) { /* loop over added rows */
990     row  = im[k]; brow = row/4;
991     rp   = aj + ai[brow];
992     ap   = aa + 16*ai[brow];
993     nrow = ailen[brow];
994     low  = 0;
995     high = nrow;
996     for (l=0; l<n; l++) { /* loop over added columns */
997       col   = in[l]; bcol = col/4;
998       ridx  = row % 4; cidx = col % 4;
999       value = v[l + k*n];
1000       if (col <= lastcol)  low = 0;
1001       else                high = nrow;
1002       lastcol = col;
1003       while (high-low > 7) {
1004         t = (low+high)/2;
1005         if (rp[t] > bcol) high = t;
1006         else              low  = t;
1007       }
1008       for (i=low; i<high; i++) {
1009         if (rp[i] > bcol) break;
1010         if (rp[i] == bcol) {
1011           bap   = ap +  16*i + 4*cidx + ridx;
1012           *bap += value;
1013           goto noinsert1;
1014         }
1015       }
1016       N = nrow++ - 1;
1017       high++; /* added new column thus must search to one higher than before */
1018       /* shift up all the later entries in this row */
1019       for (ii=N; ii>=i; ii--) {
1020         rp[ii+1] = rp[ii];
1021         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1022       }
1023       if (N>=i) {
1024         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1025       }
1026       rp[i]                    = bcol;
1027       ap[16*i + 4*cidx + ridx] = value;
1028 noinsert1:;
1029       low = i;
1030     }
1031     ailen[brow] = nrow;
1032   }
1033   PetscFunctionReturnVoid();
1034 }
1035 
1036 /*
1037      Checks for missing diagonals
1038 */
1039 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1040 {
1041   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1042   PetscErrorCode ierr;
1043   PetscInt       *diag,*ii = a->i,i;
1044 
1045   PetscFunctionBegin;
1046   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1047   *missing = PETSC_FALSE;
1048   if (A->rmap->n > 0 && !ii) {
1049     *missing = PETSC_TRUE;
1050     if (d) *d = 0;
1051     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1052   } else {
1053     diag = a->diag;
1054     for (i=0; i<a->mbs; i++) {
1055       if (diag[i] >= ii[i+1]) {
1056         *missing = PETSC_TRUE;
1057         if (d) *d = i;
1058         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1059         break;
1060       }
1061     }
1062   }
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1067 {
1068   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1069   PetscErrorCode ierr;
1070   PetscInt       i,j,m = a->mbs;
1071 
1072   PetscFunctionBegin;
1073   if (!a->diag) {
1074     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1075     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1076     a->free_diag = PETSC_TRUE;
1077   }
1078   for (i=0; i<m; i++) {
1079     a->diag[i] = a->i[i+1];
1080     for (j=a->i[i]; j<a->i[i+1]; j++) {
1081       if (a->j[j] == i) {
1082         a->diag[i] = j;
1083         break;
1084       }
1085     }
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 
1091 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1092 {
1093   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1094   PetscErrorCode ierr;
1095   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1096   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1097 
1098   PetscFunctionBegin;
1099   *nn = n;
1100   if (!ia) PetscFunctionReturn(0);
1101   if (symmetric) {
1102     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1103     nz   = tia[n];
1104   } else {
1105     tia = a->i; tja = a->j;
1106   }
1107 
1108   if (!blockcompressed && bs > 1) {
1109     (*nn) *= bs;
1110     /* malloc & create the natural set of indices */
1111     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1112     if (n) {
1113       (*ia)[0] = oshift;
1114       for (j=1; j<bs; j++) {
1115         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1116       }
1117     }
1118 
1119     for (i=1; i<n; i++) {
1120       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1121       for (j=1; j<bs; j++) {
1122         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1123       }
1124     }
1125     if (n) {
1126       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1127     }
1128 
1129     if (inja) {
1130       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1131       cnt = 0;
1132       for (i=0; i<n; i++) {
1133         for (j=0; j<bs; j++) {
1134           for (k=tia[i]; k<tia[i+1]; k++) {
1135             for (l=0; l<bs; l++) {
1136               (*ja)[cnt++] = bs*tja[k] + l;
1137             }
1138           }
1139         }
1140       }
1141     }
1142 
1143     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1144       ierr = PetscFree(tia);CHKERRQ(ierr);
1145       ierr = PetscFree(tja);CHKERRQ(ierr);
1146     }
1147   } else if (oshift == 1) {
1148     if (symmetric) {
1149       nz = tia[A->rmap->n/bs];
1150       /*  add 1 to i and j indices */
1151       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1152       *ia = tia;
1153       if (ja) {
1154         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1155         *ja = tja;
1156       }
1157     } else {
1158       nz = a->i[A->rmap->n/bs];
1159       /* malloc space and  add 1 to i and j indices */
1160       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1161       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1162       if (ja) {
1163         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1164         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1165       }
1166     }
1167   } else {
1168     *ia = tia;
1169     if (ja) *ja = tja;
1170   }
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1175 {
1176   PetscErrorCode ierr;
1177 
1178   PetscFunctionBegin;
1179   if (!ia) PetscFunctionReturn(0);
1180   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1181     ierr = PetscFree(*ia);CHKERRQ(ierr);
1182     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1188 {
1189   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1190   PetscErrorCode ierr;
1191 
1192   PetscFunctionBegin;
1193 #if defined(PETSC_USE_LOG)
1194   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1195 #endif
1196   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1197   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1198   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1199   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1200   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1201   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1202   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1203   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1204   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1205   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1206   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1207   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1208   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1209 
1210   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1211   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1212   ierr = PetscFree(A->data);CHKERRQ(ierr);
1213 
1214   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1225 #if defined(PETSC_HAVE_HYPRE)
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1227 #endif
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1234 {
1235   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   switch (op) {
1240   case MAT_ROW_ORIENTED:
1241     a->roworiented = flg;
1242     break;
1243   case MAT_KEEP_NONZERO_PATTERN:
1244     a->keepnonzeropattern = flg;
1245     break;
1246   case MAT_NEW_NONZERO_LOCATIONS:
1247     a->nonew = (flg ? 0 : 1);
1248     break;
1249   case MAT_NEW_NONZERO_LOCATION_ERR:
1250     a->nonew = (flg ? -1 : 0);
1251     break;
1252   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1253     a->nonew = (flg ? -2 : 0);
1254     break;
1255   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1256     a->nounused = (flg ? -1 : 0);
1257     break;
1258   case MAT_NEW_DIAGONALS:
1259   case MAT_IGNORE_OFF_PROC_ENTRIES:
1260   case MAT_USE_HASH_TABLE:
1261     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1262     break;
1263   case MAT_SPD:
1264   case MAT_SYMMETRIC:
1265   case MAT_STRUCTURALLY_SYMMETRIC:
1266   case MAT_HERMITIAN:
1267   case MAT_SYMMETRY_ETERNAL:
1268   case MAT_SUBMAT_SINGLEIS:
1269   case MAT_STRUCTURE_ONLY:
1270     /* These options are handled directly by MatSetOption() */
1271     break;
1272   default:
1273     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1274   }
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1279 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1280 {
1281   PetscErrorCode ierr;
1282   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1283   MatScalar      *aa_i;
1284   PetscScalar    *v_i;
1285 
1286   PetscFunctionBegin;
1287   bs  = A->rmap->bs;
1288   bs2 = bs*bs;
1289   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1290 
1291   bn  = row/bs;   /* Block number */
1292   bp  = row % bs; /* Block Position */
1293   M   = ai[bn+1] - ai[bn];
1294   *nz = bs*M;
1295 
1296   if (v) {
1297     *v = 0;
1298     if (*nz) {
1299       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1300       for (i=0; i<M; i++) { /* for each block in the block row */
1301         v_i  = *v + i*bs;
1302         aa_i = aa + bs2*(ai[bn] + i);
1303         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1304       }
1305     }
1306   }
1307 
1308   if (idx) {
1309     *idx = 0;
1310     if (*nz) {
1311       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1312       for (i=0; i<M; i++) { /* for each block in the block row */
1313         idx_i = *idx + i*bs;
1314         itmp  = bs*aj[ai[bn] + i];
1315         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1316       }
1317     }
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1323 {
1324   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1333 {
1334   PetscErrorCode ierr;
1335 
1336   PetscFunctionBegin;
1337   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1338   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1343 {
1344   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1345   Mat            C;
1346   PetscErrorCode ierr;
1347   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1348   PetscInt       *rows,*cols,bs2=a->bs2;
1349   MatScalar      *array;
1350 
1351   PetscFunctionBegin;
1352   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1353     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1354 
1355     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1356     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1357     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1358     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1359     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1360     ierr = PetscFree(col);CHKERRQ(ierr);
1361   } else {
1362     C = *B;
1363   }
1364 
1365   array = a->a;
1366   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1367   for (i=0; i<mbs; i++) {
1368     cols[0] = i*bs;
1369     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1370     len = ai[i+1] - ai[i];
1371     for (j=0; j<len; j++) {
1372       rows[0] = (*aj++)*bs;
1373       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1374       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1375       array += bs2;
1376     }
1377   }
1378   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1379 
1380   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1381   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1382 
1383   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1384     *B = C;
1385   } else {
1386     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1387   }
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1392 {
1393   PetscErrorCode ierr;
1394   Mat            Btrans;
1395 
1396   PetscFunctionBegin;
1397   *f   = PETSC_FALSE;
1398   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1399   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1400   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1405 {
1406   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1407   PetscErrorCode ierr;
1408   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1409   int            fd;
1410   PetscScalar    *aa;
1411   FILE           *file;
1412 
1413   PetscFunctionBegin;
1414   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1415   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1416   col_lens[0] = MAT_FILE_CLASSID;
1417 
1418   col_lens[1] = A->rmap->N;
1419   col_lens[2] = A->cmap->n;
1420   col_lens[3] = a->nz*bs2;
1421 
1422   /* store lengths of each row and write (including header) to file */
1423   count = 0;
1424   for (i=0; i<a->mbs; i++) {
1425     for (j=0; j<bs; j++) {
1426       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1427     }
1428   }
1429   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1430   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1431 
1432   /* store column indices (zero start index) */
1433   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1434   count = 0;
1435   for (i=0; i<a->mbs; i++) {
1436     for (j=0; j<bs; j++) {
1437       for (k=a->i[i]; k<a->i[i+1]; k++) {
1438         for (l=0; l<bs; l++) {
1439           jj[count++] = bs*a->j[k] + l;
1440         }
1441       }
1442     }
1443   }
1444   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1445   ierr = PetscFree(jj);CHKERRQ(ierr);
1446 
1447   /* store nonzero values */
1448   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1449   count = 0;
1450   for (i=0; i<a->mbs; i++) {
1451     for (j=0; j<bs; j++) {
1452       for (k=a->i[i]; k<a->i[i+1]; k++) {
1453         for (l=0; l<bs; l++) {
1454           aa[count++] = a->a[bs2*k + l*bs + j];
1455         }
1456       }
1457     }
1458   }
1459   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1460   ierr = PetscFree(aa);CHKERRQ(ierr);
1461 
1462   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1463   if (file) {
1464     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1465   }
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1470 {
1471   PetscErrorCode ierr;
1472   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1473   PetscInt       i,bs = A->rmap->bs,k;
1474 
1475   PetscFunctionBegin;
1476   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1477   for (i=0; i<a->mbs; i++) {
1478     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1479     for (k=a->i[i]; k<a->i[i+1]; k++) {
1480       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1481     }
1482     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1483   }
1484   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1489 {
1490   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1491   PetscErrorCode    ierr;
1492   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1493   PetscViewerFormat format;
1494 
1495   PetscFunctionBegin;
1496   if (A->structure_only) {
1497     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1498     PetscFunctionReturn(0);
1499   }
1500 
1501   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1502   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1503     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1504   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1505     const char *matname;
1506     Mat        aij;
1507     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1508     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1509     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1510     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1511     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1512   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1513       PetscFunctionReturn(0);
1514   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1515     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1516     for (i=0; i<a->mbs; i++) {
1517       for (j=0; j<bs; j++) {
1518         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1519         for (k=a->i[i]; k<a->i[i+1]; k++) {
1520           for (l=0; l<bs; l++) {
1521 #if defined(PETSC_USE_COMPLEX)
1522             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1523               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1524                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1525             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1526               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1527                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1528             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1529               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1530             }
1531 #else
1532             if (a->a[bs2*k + l*bs + j] != 0.0) {
1533               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1534             }
1535 #endif
1536           }
1537         }
1538         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1539       }
1540     }
1541     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1542   } else {
1543     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1544     for (i=0; i<a->mbs; i++) {
1545       for (j=0; j<bs; j++) {
1546         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1547         for (k=a->i[i]; k<a->i[i+1]; k++) {
1548           for (l=0; l<bs; l++) {
1549 #if defined(PETSC_USE_COMPLEX)
1550             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1551               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1552                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1553             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1554               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1555                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1556             } else {
1557               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1558             }
1559 #else
1560             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1561 #endif
1562           }
1563         }
1564         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1565       }
1566     }
1567     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1568   }
1569   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 #include <petscdraw.h>
1574 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1575 {
1576   Mat               A = (Mat) Aa;
1577   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1578   PetscErrorCode    ierr;
1579   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1580   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1581   MatScalar         *aa;
1582   PetscViewer       viewer;
1583   PetscViewerFormat format;
1584 
1585   PetscFunctionBegin;
1586   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1587   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1588   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1589 
1590   /* loop over matrix elements drawing boxes */
1591 
1592   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1593     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1594     /* Blue for negative, Cyan for zero and  Red for positive */
1595     color = PETSC_DRAW_BLUE;
1596     for (i=0,row=0; i<mbs; i++,row+=bs) {
1597       for (j=a->i[i]; j<a->i[i+1]; j++) {
1598         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1599         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1600         aa  = a->a + j*bs2;
1601         for (k=0; k<bs; k++) {
1602           for (l=0; l<bs; l++) {
1603             if (PetscRealPart(*aa++) >=  0.) continue;
1604             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1605           }
1606         }
1607       }
1608     }
1609     color = PETSC_DRAW_CYAN;
1610     for (i=0,row=0; i<mbs; i++,row+=bs) {
1611       for (j=a->i[i]; j<a->i[i+1]; j++) {
1612         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1613         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1614         aa  = a->a + j*bs2;
1615         for (k=0; k<bs; k++) {
1616           for (l=0; l<bs; l++) {
1617             if (PetscRealPart(*aa++) != 0.) continue;
1618             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1619           }
1620         }
1621       }
1622     }
1623     color = PETSC_DRAW_RED;
1624     for (i=0,row=0; i<mbs; i++,row+=bs) {
1625       for (j=a->i[i]; j<a->i[i+1]; j++) {
1626         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1627         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1628         aa  = a->a + j*bs2;
1629         for (k=0; k<bs; k++) {
1630           for (l=0; l<bs; l++) {
1631             if (PetscRealPart(*aa++) <= 0.) continue;
1632             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1633           }
1634         }
1635       }
1636     }
1637     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1638   } else {
1639     /* use contour shading to indicate magnitude of values */
1640     /* first determine max of all nonzero values */
1641     PetscReal minv = 0.0, maxv = 0.0;
1642     PetscDraw popup;
1643 
1644     for (i=0; i<a->nz*a->bs2; i++) {
1645       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1646     }
1647     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1648     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1649     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1650 
1651     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1652     for (i=0,row=0; i<mbs; i++,row+=bs) {
1653       for (j=a->i[i]; j<a->i[i+1]; j++) {
1654         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1655         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1656         aa  = a->a + j*bs2;
1657         for (k=0; k<bs; k++) {
1658           for (l=0; l<bs; l++) {
1659             MatScalar v = *aa++;
1660             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1661             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1662           }
1663         }
1664       }
1665     }
1666     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1667   }
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1672 {
1673   PetscErrorCode ierr;
1674   PetscReal      xl,yl,xr,yr,w,h;
1675   PetscDraw      draw;
1676   PetscBool      isnull;
1677 
1678   PetscFunctionBegin;
1679   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1680   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1681   if (isnull) PetscFunctionReturn(0);
1682 
1683   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1684   xr  += w;          yr += h;        xl = -w;     yl = -h;
1685   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1686   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1687   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1688   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1689   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1694 {
1695   PetscErrorCode ierr;
1696   PetscBool      iascii,isbinary,isdraw;
1697 
1698   PetscFunctionBegin;
1699   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1700   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1701   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1702   if (iascii) {
1703     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1704   } else if (isbinary) {
1705     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1706   } else if (isdraw) {
1707     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1708   } else {
1709     Mat B;
1710     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1711     ierr = MatView(B,viewer);CHKERRQ(ierr);
1712     ierr = MatDestroy(&B);CHKERRQ(ierr);
1713   }
1714   PetscFunctionReturn(0);
1715 }
1716 
1717 
1718 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1719 {
1720   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1721   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1722   PetscInt    *ai = a->i,*ailen = a->ilen;
1723   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1724   MatScalar   *ap,*aa = a->a;
1725 
1726   PetscFunctionBegin;
1727   for (k=0; k<m; k++) { /* loop over rows */
1728     row = im[k]; brow = row/bs;
1729     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1730     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1731     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1732     nrow = ailen[brow];
1733     for (l=0; l<n; l++) { /* loop over columns */
1734       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1735       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1736       col  = in[l];
1737       bcol = col/bs;
1738       cidx = col%bs;
1739       ridx = row%bs;
1740       high = nrow;
1741       low  = 0; /* assume unsorted */
1742       while (high-low > 5) {
1743         t = (low+high)/2;
1744         if (rp[t] > bcol) high = t;
1745         else             low  = t;
1746       }
1747       for (i=low; i<high; i++) {
1748         if (rp[i] > bcol) break;
1749         if (rp[i] == bcol) {
1750           *v++ = ap[bs2*i+bs*cidx+ridx];
1751           goto finished;
1752         }
1753       }
1754       *v++ = 0.0;
1755 finished:;
1756     }
1757   }
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1762 {
1763   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1764   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1765   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1766   PetscErrorCode    ierr;
1767   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1768   PetscBool         roworiented=a->roworiented;
1769   const PetscScalar *value     = v;
1770   MatScalar         *ap=NULL,*aa = a->a,*bap;
1771 
1772   PetscFunctionBegin;
1773   if (roworiented) {
1774     stepval = (n-1)*bs;
1775   } else {
1776     stepval = (m-1)*bs;
1777   }
1778   for (k=0; k<m; k++) { /* loop over added rows */
1779     row = im[k];
1780     if (row < 0) continue;
1781 #if defined(PETSC_USE_DEBUG)
1782     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1783 #endif
1784     rp   = aj + ai[row];
1785     if (!A->structure_only) ap = aa + bs2*ai[row];
1786     rmax = imax[row];
1787     nrow = ailen[row];
1788     low  = 0;
1789     high = nrow;
1790     for (l=0; l<n; l++) { /* loop over added columns */
1791       if (in[l] < 0) continue;
1792 #if defined(PETSC_USE_DEBUG)
1793       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1794 #endif
1795       col = in[l];
1796       if (!A->structure_only) {
1797         if (roworiented) {
1798           value = v + (k*(stepval+bs) + l)*bs;
1799         } else {
1800           value = v + (l*(stepval+bs) + k)*bs;
1801         }
1802       }
1803       if (col <= lastcol) low = 0;
1804       else high = nrow;
1805       lastcol = col;
1806       while (high-low > 7) {
1807         t = (low+high)/2;
1808         if (rp[t] > col) high = t;
1809         else             low  = t;
1810       }
1811       for (i=low; i<high; i++) {
1812         if (rp[i] > col) break;
1813         if (rp[i] == col) {
1814           if (A->structure_only) goto noinsert2;
1815           bap = ap +  bs2*i;
1816           if (roworiented) {
1817             if (is == ADD_VALUES) {
1818               for (ii=0; ii<bs; ii++,value+=stepval) {
1819                 for (jj=ii; jj<bs2; jj+=bs) {
1820                   bap[jj] += *value++;
1821                 }
1822               }
1823             } else {
1824               for (ii=0; ii<bs; ii++,value+=stepval) {
1825                 for (jj=ii; jj<bs2; jj+=bs) {
1826                   bap[jj] = *value++;
1827                 }
1828               }
1829             }
1830           } else {
1831             if (is == ADD_VALUES) {
1832               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1833                 for (jj=0; jj<bs; jj++) {
1834                   bap[jj] += value[jj];
1835                 }
1836                 bap += bs;
1837               }
1838             } else {
1839               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1840                 for (jj=0; jj<bs; jj++) {
1841                   bap[jj]  = value[jj];
1842                 }
1843                 bap += bs;
1844               }
1845             }
1846           }
1847           goto noinsert2;
1848         }
1849       }
1850       if (nonew == 1) goto noinsert2;
1851       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1852       if (A->structure_only) {
1853         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1854       } else {
1855         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1856       }
1857       N = nrow++ - 1; high++;
1858       /* shift up all the later entries in this row */
1859       for (ii=N; ii>=i; ii--) {
1860         rp[ii+1] = rp[ii];
1861         if (!A->structure_only) {
1862           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1863         }
1864       }
1865       if (N >= i && !A->structure_only) {
1866         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1867       }
1868 
1869       rp[i] = col;
1870       if (!A->structure_only) {
1871         bap   = ap +  bs2*i;
1872         if (roworiented) {
1873           for (ii=0; ii<bs; ii++,value+=stepval) {
1874             for (jj=ii; jj<bs2; jj+=bs) {
1875               bap[jj] = *value++;
1876             }
1877           }
1878         } else {
1879           for (ii=0; ii<bs; ii++,value+=stepval) {
1880             for (jj=0; jj<bs; jj++) {
1881               *bap++ = *value++;
1882             }
1883           }
1884         }
1885       }
1886 noinsert2:;
1887       low = i;
1888     }
1889     ailen[row] = nrow;
1890   }
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1895 {
1896   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1897   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1898   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1899   PetscErrorCode ierr;
1900   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1901   MatScalar      *aa  = a->a,*ap;
1902   PetscReal      ratio=0.6;
1903 
1904   PetscFunctionBegin;
1905   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1906 
1907   if (m) rmax = ailen[0];
1908   for (i=1; i<mbs; i++) {
1909     /* move each row back by the amount of empty slots (fshift) before it*/
1910     fshift += imax[i-1] - ailen[i-1];
1911     rmax    = PetscMax(rmax,ailen[i]);
1912     if (fshift) {
1913       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1914       N  = ailen[i];
1915       for (j=0; j<N; j++) {
1916         ip[j-fshift] = ip[j];
1917         if (!A->structure_only) {
1918           ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1919         }
1920       }
1921     }
1922     ai[i] = ai[i-1] + ailen[i-1];
1923   }
1924   if (mbs) {
1925     fshift += imax[mbs-1] - ailen[mbs-1];
1926     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1927   }
1928 
1929   /* reset ilen and imax for each row */
1930   a->nonzerorowcnt = 0;
1931   if (A->structure_only) {
1932     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1933   } else { /* !A->structure_only */
1934     for (i=0; i<mbs; i++) {
1935       ailen[i] = imax[i] = ai[i+1] - ai[i];
1936       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1937     }
1938   }
1939   a->nz = ai[mbs];
1940 
1941   /* diagonals may have moved, so kill the diagonal pointers */
1942   a->idiagvalid = PETSC_FALSE;
1943   if (fshift && a->diag) {
1944     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1945     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1946     a->diag = 0;
1947   }
1948   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1949   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1950   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1951   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1952 
1953   A->info.mallocs    += a->reallocs;
1954   a->reallocs         = 0;
1955   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1956   a->rmax             = rmax;
1957 
1958   if (!A->structure_only) {
1959     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1960   }
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 /*
1965    This function returns an array of flags which indicate the locations of contiguous
1966    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1967    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1968    Assume: sizes should be long enough to hold all the values.
1969 */
1970 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1971 {
1972   PetscInt  i,j,k,row;
1973   PetscBool flg;
1974 
1975   PetscFunctionBegin;
1976   for (i=0,j=0; i<n; j++) {
1977     row = idx[i];
1978     if (row%bs!=0) { /* Not the begining of a block */
1979       sizes[j] = 1;
1980       i++;
1981     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1982       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1983       i++;
1984     } else { /* Begining of the block, so check if the complete block exists */
1985       flg = PETSC_TRUE;
1986       for (k=1; k<bs; k++) {
1987         if (row+k != idx[i+k]) { /* break in the block */
1988           flg = PETSC_FALSE;
1989           break;
1990         }
1991       }
1992       if (flg) { /* No break in the bs */
1993         sizes[j] = bs;
1994         i       += bs;
1995       } else {
1996         sizes[j] = 1;
1997         i++;
1998       }
1999     }
2000   }
2001   *bs_max = j;
2002   PetscFunctionReturn(0);
2003 }
2004 
2005 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2006 {
2007   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2008   PetscErrorCode    ierr;
2009   PetscInt          i,j,k,count,*rows;
2010   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2011   PetscScalar       zero = 0.0;
2012   MatScalar         *aa;
2013   const PetscScalar *xx;
2014   PetscScalar       *bb;
2015 
2016   PetscFunctionBegin;
2017   /* fix right hand side if needed */
2018   if (x && b) {
2019     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2020     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2021     for (i=0; i<is_n; i++) {
2022       bb[is_idx[i]] = diag*xx[is_idx[i]];
2023     }
2024     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2025     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2026   }
2027 
2028   /* Make a copy of the IS and  sort it */
2029   /* allocate memory for rows,sizes */
2030   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2031 
2032   /* copy IS values to rows, and sort them */
2033   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2034   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2035 
2036   if (baij->keepnonzeropattern) {
2037     for (i=0; i<is_n; i++) sizes[i] = 1;
2038     bs_max          = is_n;
2039   } else {
2040     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2041     A->nonzerostate++;
2042   }
2043 
2044   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2045     row = rows[j];
2046     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2047     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2048     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2049     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2050       if (diag != (PetscScalar)0.0) {
2051         if (baij->ilen[row/bs] > 0) {
2052           baij->ilen[row/bs]       = 1;
2053           baij->j[baij->i[row/bs]] = row/bs;
2054 
2055           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2056         }
2057         /* Now insert all the diagonal values for this bs */
2058         for (k=0; k<bs; k++) {
2059           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2060         }
2061       } else { /* (diag == 0.0) */
2062         baij->ilen[row/bs] = 0;
2063       } /* end (diag == 0.0) */
2064     } else { /* (sizes[i] != bs) */
2065 #if defined(PETSC_USE_DEBUG)
2066       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2067 #endif
2068       for (k=0; k<count; k++) {
2069         aa[0] =  zero;
2070         aa   += bs;
2071       }
2072       if (diag != (PetscScalar)0.0) {
2073         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2074       }
2075     }
2076   }
2077 
2078   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2079   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2084 {
2085   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2086   PetscErrorCode    ierr;
2087   PetscInt          i,j,k,count;
2088   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2089   PetscScalar       zero = 0.0;
2090   MatScalar         *aa;
2091   const PetscScalar *xx;
2092   PetscScalar       *bb;
2093   PetscBool         *zeroed,vecs = PETSC_FALSE;
2094 
2095   PetscFunctionBegin;
2096   /* fix right hand side if needed */
2097   if (x && b) {
2098     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2099     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2100     vecs = PETSC_TRUE;
2101   }
2102 
2103   /* zero the columns */
2104   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2105   for (i=0; i<is_n; i++) {
2106     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2107     zeroed[is_idx[i]] = PETSC_TRUE;
2108   }
2109   for (i=0; i<A->rmap->N; i++) {
2110     if (!zeroed[i]) {
2111       row = i/bs;
2112       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2113         for (k=0; k<bs; k++) {
2114           col = bs*baij->j[j] + k;
2115           if (zeroed[col]) {
2116             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2117             if (vecs) bb[i] -= aa[0]*xx[col];
2118             aa[0] = 0.0;
2119           }
2120         }
2121       }
2122     } else if (vecs) bb[i] = diag*xx[i];
2123   }
2124   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2125   if (vecs) {
2126     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2127     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2128   }
2129 
2130   /* zero the rows */
2131   for (i=0; i<is_n; i++) {
2132     row   = is_idx[i];
2133     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2134     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2135     for (k=0; k<count; k++) {
2136       aa[0] =  zero;
2137       aa   += bs;
2138     }
2139     if (diag != (PetscScalar)0.0) {
2140       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2141     }
2142   }
2143   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2148 {
2149   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2150   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2151   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2152   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2153   PetscErrorCode ierr;
2154   PetscInt       ridx,cidx,bs2=a->bs2;
2155   PetscBool      roworiented=a->roworiented;
2156   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2157 
2158   PetscFunctionBegin;
2159   for (k=0; k<m; k++) { /* loop over added rows */
2160     row  = im[k];
2161     brow = row/bs;
2162     if (row < 0) continue;
2163 #if defined(PETSC_USE_DEBUG)
2164     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2165 #endif
2166     rp   = aj + ai[brow];
2167     if (!A->structure_only) ap = aa + bs2*ai[brow];
2168     rmax = imax[brow];
2169     nrow = ailen[brow];
2170     low  = 0;
2171     high = nrow;
2172     for (l=0; l<n; l++) { /* loop over added columns */
2173       if (in[l] < 0) continue;
2174 #if defined(PETSC_USE_DEBUG)
2175       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2176 #endif
2177       col  = in[l]; bcol = col/bs;
2178       ridx = row % bs; cidx = col % bs;
2179       if (!A->structure_only) {
2180         if (roworiented) {
2181           value = v[l + k*n];
2182         } else {
2183           value = v[k + l*m];
2184         }
2185       }
2186       if (col <= lastcol) low = 0; else high = nrow;
2187       lastcol = col;
2188       while (high-low > 7) {
2189         t = (low+high)/2;
2190         if (rp[t] > bcol) high = t;
2191         else              low  = t;
2192       }
2193       for (i=low; i<high; i++) {
2194         if (rp[i] > bcol) break;
2195         if (rp[i] == bcol) {
2196           bap = ap +  bs2*i + bs*cidx + ridx;
2197           if (!A->structure_only) {
2198             if (is == ADD_VALUES) *bap += value;
2199             else                  *bap  = value;
2200           }
2201           goto noinsert1;
2202         }
2203       }
2204       if (nonew == 1) goto noinsert1;
2205       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2206       if (A->structure_only) {
2207         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2208       } else {
2209         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2210       }
2211       N = nrow++ - 1; high++;
2212       /* shift up all the later entries in this row */
2213       for (ii=N; ii>=i; ii--) {
2214         rp[ii+1] = rp[ii];
2215         if (!A->structure_only) {
2216           ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2217         }
2218       }
2219       if (N>=i && !A->structure_only) {
2220         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2221       }
2222       rp[i]                      = bcol;
2223       if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2224       a->nz++;
2225       A->nonzerostate++;
2226 noinsert1:;
2227       low = i;
2228     }
2229     ailen[brow] = nrow;
2230   }
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2235 {
2236   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2237   Mat            outA;
2238   PetscErrorCode ierr;
2239   PetscBool      row_identity,col_identity;
2240 
2241   PetscFunctionBegin;
2242   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2243   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2244   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2245   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2246 
2247   outA            = inA;
2248   inA->factortype = MAT_FACTOR_LU;
2249   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2250   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2251 
2252   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2253 
2254   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2255   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2256   a->row = row;
2257   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2258   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2259   a->col = col;
2260 
2261   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2262   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2263   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2264   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2265 
2266   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2267   if (!a->solve_work) {
2268     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2269     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2270   }
2271   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2272   PetscFunctionReturn(0);
2273 }
2274 
2275 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2276 {
2277   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2278   PetscInt    i,nz,mbs;
2279 
2280   PetscFunctionBegin;
2281   nz  = baij->maxnz;
2282   mbs = baij->mbs;
2283   for (i=0; i<nz; i++) {
2284     baij->j[i] = indices[i];
2285   }
2286   baij->nz = nz;
2287   for (i=0; i<mbs; i++) {
2288     baij->ilen[i] = baij->imax[i];
2289   }
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 /*@
2294     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2295        in the matrix.
2296 
2297   Input Parameters:
2298 +  mat - the SeqBAIJ matrix
2299 -  indices - the column indices
2300 
2301   Level: advanced
2302 
2303   Notes:
2304     This can be called if you have precomputed the nonzero structure of the
2305   matrix and want to provide it to the matrix object to improve the performance
2306   of the MatSetValues() operation.
2307 
2308     You MUST have set the correct numbers of nonzeros per row in the call to
2309   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2310 
2311     MUST be called before any calls to MatSetValues();
2312 
2313 @*/
2314 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2315 {
2316   PetscErrorCode ierr;
2317 
2318   PetscFunctionBegin;
2319   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2320   PetscValidPointer(indices,2);
2321   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2326 {
2327   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2328   PetscErrorCode ierr;
2329   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2330   PetscReal      atmp;
2331   PetscScalar    *x,zero = 0.0;
2332   MatScalar      *aa;
2333   PetscInt       ncols,brow,krow,kcol;
2334 
2335   PetscFunctionBegin;
2336   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2337   bs  = A->rmap->bs;
2338   aa  = a->a;
2339   ai  = a->i;
2340   aj  = a->j;
2341   mbs = a->mbs;
2342 
2343   ierr = VecSet(v,zero);CHKERRQ(ierr);
2344   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2345   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2346   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2347   for (i=0; i<mbs; i++) {
2348     ncols = ai[1] - ai[0]; ai++;
2349     brow  = bs*i;
2350     for (j=0; j<ncols; j++) {
2351       for (kcol=0; kcol<bs; kcol++) {
2352         for (krow=0; krow<bs; krow++) {
2353           atmp = PetscAbsScalar(*aa);aa++;
2354           row  = brow + krow;   /* row index */
2355           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2356         }
2357       }
2358       aj++;
2359     }
2360   }
2361   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2366 {
2367   PetscErrorCode ierr;
2368 
2369   PetscFunctionBegin;
2370   /* If the two matrices have the same copy implementation, use fast copy. */
2371   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2372     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2373     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2374     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2375 
2376     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2377     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2378     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2379     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2380   } else {
2381     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2382   }
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2387 {
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2396 {
2397   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2398 
2399   PetscFunctionBegin;
2400   *array = a->a;
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2405 {
2406   PetscFunctionBegin;
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2411 {
2412   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2413   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2414   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2415   PetscErrorCode ierr;
2416 
2417   PetscFunctionBegin;
2418   /* Set the number of nonzeros in the new matrix */
2419   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2424 {
2425   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2426   PetscErrorCode ierr;
2427   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2428   PetscBLASInt   one=1;
2429 
2430   PetscFunctionBegin;
2431   if (str == SAME_NONZERO_PATTERN) {
2432     PetscScalar  alpha = a;
2433     PetscBLASInt bnz;
2434     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2435     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2436     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2437   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2438     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2439   } else {
2440     Mat      B;
2441     PetscInt *nnz;
2442     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2443     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2444     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2445     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2446     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2447     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2448     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2449     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2450     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2451     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2452     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2453     ierr = PetscFree(nnz);CHKERRQ(ierr);
2454   }
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2459 {
2460   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2461   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2462   MatScalar   *aa = a->a;
2463 
2464   PetscFunctionBegin;
2465   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2470 {
2471   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2472   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2473   MatScalar   *aa = a->a;
2474 
2475   PetscFunctionBegin;
2476   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 /*
2481     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2482 */
2483 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2484 {
2485   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2486   PetscErrorCode ierr;
2487   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2488   PetscInt       nz = a->i[m],row,*jj,mr,col;
2489 
2490   PetscFunctionBegin;
2491   *nn = n;
2492   if (!ia) PetscFunctionReturn(0);
2493   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2494   else {
2495     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2496     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2497     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2498     jj   = a->j;
2499     for (i=0; i<nz; i++) {
2500       collengths[jj[i]]++;
2501     }
2502     cia[0] = oshift;
2503     for (i=0; i<n; i++) {
2504       cia[i+1] = cia[i] + collengths[i];
2505     }
2506     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2507     jj   = a->j;
2508     for (row=0; row<m; row++) {
2509       mr = a->i[row+1] - a->i[row];
2510       for (i=0; i<mr; i++) {
2511         col = *jj++;
2512 
2513         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2514       }
2515     }
2516     ierr = PetscFree(collengths);CHKERRQ(ierr);
2517     *ia  = cia; *ja = cja;
2518   }
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2523 {
2524   PetscErrorCode ierr;
2525 
2526   PetscFunctionBegin;
2527   if (!ia) PetscFunctionReturn(0);
2528   ierr = PetscFree(*ia);CHKERRQ(ierr);
2529   ierr = PetscFree(*ja);CHKERRQ(ierr);
2530   PetscFunctionReturn(0);
2531 }
2532 
2533 /*
2534  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2535  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2536  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2537  */
2538 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2539 {
2540   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2541   PetscErrorCode ierr;
2542   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2543   PetscInt       nz = a->i[m],row,*jj,mr,col;
2544   PetscInt       *cspidx;
2545 
2546   PetscFunctionBegin;
2547   *nn = n;
2548   if (!ia) PetscFunctionReturn(0);
2549 
2550   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2551   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2552   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2553   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2554   jj   = a->j;
2555   for (i=0; i<nz; i++) {
2556     collengths[jj[i]]++;
2557   }
2558   cia[0] = oshift;
2559   for (i=0; i<n; i++) {
2560     cia[i+1] = cia[i] + collengths[i];
2561   }
2562   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2563   jj   = a->j;
2564   for (row=0; row<m; row++) {
2565     mr = a->i[row+1] - a->i[row];
2566     for (i=0; i<mr; i++) {
2567       col = *jj++;
2568       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2569       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2570     }
2571   }
2572   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2573   *ia    = cia; *ja = cja;
2574   *spidx = cspidx;
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2579 {
2580   PetscErrorCode ierr;
2581 
2582   PetscFunctionBegin;
2583   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2584   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2589 {
2590   PetscErrorCode ierr;
2591   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2592 
2593   PetscFunctionBegin;
2594   if (!Y->preallocated || !aij->nz) {
2595     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2596   }
2597   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 /* -------------------------------------------------------------------*/
2602 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2603                                        MatGetRow_SeqBAIJ,
2604                                        MatRestoreRow_SeqBAIJ,
2605                                        MatMult_SeqBAIJ_N,
2606                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2607                                        MatMultTranspose_SeqBAIJ,
2608                                        MatMultTransposeAdd_SeqBAIJ,
2609                                        0,
2610                                        0,
2611                                        0,
2612                                /* 10*/ 0,
2613                                        MatLUFactor_SeqBAIJ,
2614                                        0,
2615                                        0,
2616                                        MatTranspose_SeqBAIJ,
2617                                /* 15*/ MatGetInfo_SeqBAIJ,
2618                                        MatEqual_SeqBAIJ,
2619                                        MatGetDiagonal_SeqBAIJ,
2620                                        MatDiagonalScale_SeqBAIJ,
2621                                        MatNorm_SeqBAIJ,
2622                                /* 20*/ 0,
2623                                        MatAssemblyEnd_SeqBAIJ,
2624                                        MatSetOption_SeqBAIJ,
2625                                        MatZeroEntries_SeqBAIJ,
2626                                /* 24*/ MatZeroRows_SeqBAIJ,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                /* 29*/ MatSetUp_SeqBAIJ,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                /* 34*/ MatDuplicate_SeqBAIJ,
2637                                        0,
2638                                        0,
2639                                        MatILUFactor_SeqBAIJ,
2640                                        0,
2641                                /* 39*/ MatAXPY_SeqBAIJ,
2642                                        MatCreateSubMatrices_SeqBAIJ,
2643                                        MatIncreaseOverlap_SeqBAIJ,
2644                                        MatGetValues_SeqBAIJ,
2645                                        MatCopy_SeqBAIJ,
2646                                /* 44*/ 0,
2647                                        MatScale_SeqBAIJ,
2648                                        MatShift_SeqBAIJ,
2649                                        0,
2650                                        MatZeroRowsColumns_SeqBAIJ,
2651                                /* 49*/ 0,
2652                                        MatGetRowIJ_SeqBAIJ,
2653                                        MatRestoreRowIJ_SeqBAIJ,
2654                                        MatGetColumnIJ_SeqBAIJ,
2655                                        MatRestoreColumnIJ_SeqBAIJ,
2656                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                        MatSetValuesBlocked_SeqBAIJ,
2661                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2662                                        MatDestroy_SeqBAIJ,
2663                                        MatView_SeqBAIJ,
2664                                        0,
2665                                        0,
2666                                /* 64*/ 0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2672                                        0,
2673                                        MatConvert_Basic,
2674                                        0,
2675                                        0,
2676                                /* 74*/ 0,
2677                                        MatFDColoringApply_BAIJ,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                /* 79*/ 0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                        MatLoad_SeqBAIJ,
2686                                /* 84*/ 0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                /* 89*/ 0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                /* 94*/ 0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                /* 99*/ 0,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                /*104*/ 0,
2707                                        MatRealPart_SeqBAIJ,
2708                                        MatImaginaryPart_SeqBAIJ,
2709                                        0,
2710                                        0,
2711                                /*109*/ 0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                        MatMissingDiagonal_SeqBAIJ,
2716                                /*114*/ 0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                /*119*/ 0,
2722                                        0,
2723                                        MatMultHermitianTranspose_SeqBAIJ,
2724                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2725                                        0,
2726                                /*124*/ 0,
2727                                        0,
2728                                        MatInvertBlockDiagonal_SeqBAIJ,
2729                                        0,
2730                                        0,
2731                                /*129*/ 0,
2732                                        0,
2733                                        0,
2734                                        0,
2735                                        0,
2736                                /*134*/ 0,
2737                                        0,
2738                                        0,
2739                                        0,
2740                                        0,
2741                                /*139*/ MatSetBlockSizes_Default,
2742                                        0,
2743                                        0,
2744                                        MatFDColoringSetUp_SeqXAIJ,
2745                                        0,
2746                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2747                                        MatDestroySubMatrices_SeqBAIJ
2748 };
2749 
2750 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2751 {
2752   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2753   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2754   PetscErrorCode ierr;
2755 
2756   PetscFunctionBegin;
2757   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2758 
2759   /* allocate space for values if not already there */
2760   if (!aij->saved_values) {
2761     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2762     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2763   }
2764 
2765   /* copy values over */
2766   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2767   PetscFunctionReturn(0);
2768 }
2769 
2770 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2771 {
2772   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2773   PetscErrorCode ierr;
2774   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2775 
2776   PetscFunctionBegin;
2777   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2778   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2779 
2780   /* copy values over */
2781   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2782   PetscFunctionReturn(0);
2783 }
2784 
2785 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2786 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2787 
2788 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2789 {
2790   Mat_SeqBAIJ    *b;
2791   PetscErrorCode ierr;
2792   PetscInt       i,mbs,nbs,bs2;
2793   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2794 
2795   PetscFunctionBegin;
2796   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2797   if (nz == MAT_SKIP_ALLOCATION) {
2798     skipallocation = PETSC_TRUE;
2799     nz             = 0;
2800   }
2801 
2802   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2803   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2804   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2805   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2806 
2807   B->preallocated = PETSC_TRUE;
2808 
2809   mbs = B->rmap->n/bs;
2810   nbs = B->cmap->n/bs;
2811   bs2 = bs*bs;
2812 
2813   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2814 
2815   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2816   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2817   if (nnz) {
2818     for (i=0; i<mbs; i++) {
2819       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2820       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2821     }
2822   }
2823 
2824   b    = (Mat_SeqBAIJ*)B->data;
2825   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2826   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2827   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2828 
2829   if (!flg) {
2830     switch (bs) {
2831     case 1:
2832       B->ops->mult    = MatMult_SeqBAIJ_1;
2833       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2834       break;
2835     case 2:
2836       B->ops->mult    = MatMult_SeqBAIJ_2;
2837       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2838       break;
2839     case 3:
2840       B->ops->mult    = MatMult_SeqBAIJ_3;
2841       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2842       break;
2843     case 4:
2844       B->ops->mult    = MatMult_SeqBAIJ_4;
2845       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2846       break;
2847     case 5:
2848       B->ops->mult    = MatMult_SeqBAIJ_5;
2849       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2850       break;
2851     case 6:
2852       B->ops->mult    = MatMult_SeqBAIJ_6;
2853       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2854       break;
2855     case 7:
2856       B->ops->mult    = MatMult_SeqBAIJ_7;
2857       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2858       break;
2859     case 9:
2860 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2861       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2862       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2863 #else
2864       B->ops->mult    = MatMult_SeqBAIJ_N;
2865       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2866 #endif
2867       break;
2868     case 11:
2869       B->ops->mult    = MatMult_SeqBAIJ_11;
2870       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2871       break;
2872     case 15:
2873       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2874       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2875       break;
2876     default:
2877       B->ops->mult    = MatMult_SeqBAIJ_N;
2878       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2879       break;
2880     }
2881   }
2882   B->ops->sor = MatSOR_SeqBAIJ;
2883   b->mbs = mbs;
2884   b->nbs = nbs;
2885   if (!skipallocation) {
2886     if (!b->imax) {
2887       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2888       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2889 
2890       b->free_imax_ilen = PETSC_TRUE;
2891     }
2892     /* b->ilen will count nonzeros in each block row so far. */
2893     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2894     if (!nnz) {
2895       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2896       else if (nz < 0) nz = 1;
2897       for (i=0; i<mbs; i++) b->imax[i] = nz;
2898       nz = nz*mbs;
2899     } else {
2900       nz = 0;
2901       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2902     }
2903 
2904     /* allocate the matrix space */
2905     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2906     if (B->structure_only) {
2907       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2908       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2909       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2910     } else {
2911       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2912       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2913       ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2914     }
2915     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2916 
2917     if (B->structure_only) {
2918       b->singlemalloc = PETSC_FALSE;
2919       b->free_a       = PETSC_FALSE;
2920     } else {
2921       b->singlemalloc = PETSC_TRUE;
2922       b->free_a       = PETSC_TRUE;
2923     }
2924     b->free_ij = PETSC_TRUE;
2925 
2926     b->i[0] = 0;
2927     for (i=1; i<mbs+1; i++) {
2928       b->i[i] = b->i[i-1] + b->imax[i-1];
2929     }
2930 
2931   } else {
2932     b->free_a  = PETSC_FALSE;
2933     b->free_ij = PETSC_FALSE;
2934   }
2935 
2936   b->bs2              = bs2;
2937   b->mbs              = mbs;
2938   b->nz               = 0;
2939   b->maxnz            = nz;
2940   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2941   B->was_assembled    = PETSC_FALSE;
2942   B->assembled        = PETSC_FALSE;
2943   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2944   PetscFunctionReturn(0);
2945 }
2946 
2947 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2948 {
2949   PetscInt       i,m,nz,nz_max=0,*nnz;
2950   PetscScalar    *values=0;
2951   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2952   PetscErrorCode ierr;
2953 
2954   PetscFunctionBegin;
2955   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2956   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2957   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2958   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2959   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2960   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2961   m    = B->rmap->n/bs;
2962 
2963   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2964   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2965   for (i=0; i<m; i++) {
2966     nz = ii[i+1]- ii[i];
2967     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2968     nz_max = PetscMax(nz_max, nz);
2969     nnz[i] = nz;
2970   }
2971   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2972   ierr = PetscFree(nnz);CHKERRQ(ierr);
2973 
2974   values = (PetscScalar*)V;
2975   if (!values) {
2976     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2977   }
2978   for (i=0; i<m; i++) {
2979     PetscInt          ncols  = ii[i+1] - ii[i];
2980     const PetscInt    *icols = jj + ii[i];
2981     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2982     if (!roworiented) {
2983       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2984     } else {
2985       PetscInt j;
2986       for (j=0; j<ncols; j++) {
2987         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2988         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2989       }
2990     }
2991   }
2992   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2993   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2994   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2995   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2996   PetscFunctionReturn(0);
2997 }
2998 
2999 /*MC
3000    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3001    block sparse compressed row format.
3002 
3003    Options Database Keys:
3004 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3005 
3006   Level: beginner
3007 
3008 .seealso: MatCreateSeqBAIJ()
3009 M*/
3010 
3011 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3012 
3013 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3014 {
3015   PetscErrorCode ierr;
3016   PetscMPIInt    size;
3017   Mat_SeqBAIJ    *b;
3018 
3019   PetscFunctionBegin;
3020   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3021   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3022 
3023   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3024   B->data = (void*)b;
3025   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3026 
3027   b->row          = 0;
3028   b->col          = 0;
3029   b->icol         = 0;
3030   b->reallocs     = 0;
3031   b->saved_values = 0;
3032 
3033   b->roworiented        = PETSC_TRUE;
3034   b->nonew              = 0;
3035   b->diag               = 0;
3036   B->spptr              = 0;
3037   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3038   b->keepnonzeropattern = PETSC_FALSE;
3039 
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3049 #if defined(PETSC_HAVE_HYPRE)
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3051 #endif
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3055   PetscFunctionReturn(0);
3056 }
3057 
3058 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3059 {
3060   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3061   PetscErrorCode ierr;
3062   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3063 
3064   PetscFunctionBegin;
3065   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3066 
3067   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3068     c->imax           = a->imax;
3069     c->ilen           = a->ilen;
3070     c->free_imax_ilen = PETSC_FALSE;
3071   } else {
3072     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3073     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3074     for (i=0; i<mbs; i++) {
3075       c->imax[i] = a->imax[i];
3076       c->ilen[i] = a->ilen[i];
3077     }
3078     c->free_imax_ilen = PETSC_TRUE;
3079   }
3080 
3081   /* allocate the matrix space */
3082   if (mallocmatspace) {
3083     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3084       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3085       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3086 
3087       c->i            = a->i;
3088       c->j            = a->j;
3089       c->singlemalloc = PETSC_FALSE;
3090       c->free_a       = PETSC_TRUE;
3091       c->free_ij      = PETSC_FALSE;
3092       c->parent       = A;
3093       C->preallocated = PETSC_TRUE;
3094       C->assembled    = PETSC_TRUE;
3095 
3096       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3097       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3098       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3099     } else {
3100       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3101       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3102 
3103       c->singlemalloc = PETSC_TRUE;
3104       c->free_a       = PETSC_TRUE;
3105       c->free_ij      = PETSC_TRUE;
3106 
3107       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3108       if (mbs > 0) {
3109         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3110         if (cpvalues == MAT_COPY_VALUES) {
3111           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3112         } else {
3113           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3114         }
3115       }
3116       C->preallocated = PETSC_TRUE;
3117       C->assembled    = PETSC_TRUE;
3118     }
3119   }
3120 
3121   c->roworiented = a->roworiented;
3122   c->nonew       = a->nonew;
3123 
3124   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3125   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3126 
3127   c->bs2         = a->bs2;
3128   c->mbs         = a->mbs;
3129   c->nbs         = a->nbs;
3130 
3131   if (a->diag) {
3132     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3133       c->diag      = a->diag;
3134       c->free_diag = PETSC_FALSE;
3135     } else {
3136       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3137       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3138       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3139       c->free_diag = PETSC_TRUE;
3140     }
3141   } else c->diag = 0;
3142 
3143   c->nz         = a->nz;
3144   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3145   c->solve_work = NULL;
3146   c->mult_work  = NULL;
3147   c->sor_workt  = NULL;
3148   c->sor_work   = NULL;
3149 
3150   c->compressedrow.use   = a->compressedrow.use;
3151   c->compressedrow.nrows = a->compressedrow.nrows;
3152   if (a->compressedrow.use) {
3153     i    = a->compressedrow.nrows;
3154     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3155     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3156     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3157     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3158   } else {
3159     c->compressedrow.use    = PETSC_FALSE;
3160     c->compressedrow.i      = NULL;
3161     c->compressedrow.rindex = NULL;
3162   }
3163   C->nonzerostate = A->nonzerostate;
3164 
3165   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3166   PetscFunctionReturn(0);
3167 }
3168 
3169 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3170 {
3171   PetscErrorCode ierr;
3172 
3173   PetscFunctionBegin;
3174   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3175   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3176   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3177   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3178   PetscFunctionReturn(0);
3179 }
3180 
3181 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3182 {
3183   Mat_SeqBAIJ    *a;
3184   PetscErrorCode ierr;
3185   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3186   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3187   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3188   PetscInt       *masked,nmask,tmp,bs2,ishift;
3189   PetscMPIInt    size;
3190   int            fd;
3191   PetscScalar    *aa;
3192   MPI_Comm       comm;
3193 
3194   PetscFunctionBegin;
3195   /* force binary viewer to load .info file if it has not yet done so */
3196   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3197   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3198   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3199   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3200   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3201   if (bs < 0) bs = 1;
3202   bs2  = bs*bs;
3203 
3204   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3205   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3206   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3207   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3208   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3209   M = header[1]; N = header[2]; nz = header[3];
3210 
3211   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3212   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3213 
3214   /*
3215      This code adds extra rows to make sure the number of rows is
3216     divisible by the blocksize
3217   */
3218   mbs        = M/bs;
3219   extra_rows = bs - M + bs*(mbs);
3220   if (extra_rows == bs) extra_rows = 0;
3221   else mbs++;
3222   if (extra_rows) {
3223     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3224   }
3225 
3226   /* Set global sizes if not already set */
3227   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3228     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3229   } else { /* Check if the matrix global sizes are correct */
3230     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3231     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3232       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3233     }
3234     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3235   }
3236 
3237   /* read in row lengths */
3238   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3239   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3240   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3241 
3242   /* read in column indices */
3243   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3244   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3245   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3246 
3247   /* loop over row lengths determining block row lengths */
3248   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3249   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3250   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3251   rowcount = 0;
3252   nzcount  = 0;
3253   for (i=0; i<mbs; i++) {
3254     nmask = 0;
3255     for (j=0; j<bs; j++) {
3256       kmax = rowlengths[rowcount];
3257       for (k=0; k<kmax; k++) {
3258         tmp = jj[nzcount++]/bs;
3259         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3260       }
3261       rowcount++;
3262     }
3263     browlengths[i] += nmask;
3264     /* zero out the mask elements we set */
3265     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3266   }
3267 
3268   /* Do preallocation  */
3269   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3270   a    = (Mat_SeqBAIJ*)newmat->data;
3271 
3272   /* set matrix "i" values */
3273   a->i[0] = 0;
3274   for (i=1; i<= mbs; i++) {
3275     a->i[i]      = a->i[i-1] + browlengths[i-1];
3276     a->ilen[i-1] = browlengths[i-1];
3277   }
3278   a->nz = 0;
3279   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3280 
3281   /* read in nonzero values */
3282   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3283   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3284   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3285 
3286   /* set "a" and "j" values into matrix */
3287   nzcount = 0; jcount = 0;
3288   for (i=0; i<mbs; i++) {
3289     nzcountb = nzcount;
3290     nmask    = 0;
3291     for (j=0; j<bs; j++) {
3292       kmax = rowlengths[i*bs+j];
3293       for (k=0; k<kmax; k++) {
3294         tmp = jj[nzcount++]/bs;
3295         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3296       }
3297     }
3298     /* sort the masked values */
3299     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3300 
3301     /* set "j" values into matrix */
3302     maskcount = 1;
3303     for (j=0; j<nmask; j++) {
3304       a->j[jcount++]  = masked[j];
3305       mask[masked[j]] = maskcount++;
3306     }
3307     /* set "a" values into matrix */
3308     ishift = bs2*a->i[i];
3309     for (j=0; j<bs; j++) {
3310       kmax = rowlengths[i*bs+j];
3311       for (k=0; k<kmax; k++) {
3312         tmp       = jj[nzcountb]/bs;
3313         block     = mask[tmp] - 1;
3314         point     = jj[nzcountb] - bs*tmp;
3315         idx       = ishift + bs2*block + j + bs*point;
3316         a->a[idx] = (MatScalar)aa[nzcountb++];
3317       }
3318     }
3319     /* zero out the mask elements we set */
3320     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3321   }
3322   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3323 
3324   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3325   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3326   ierr = PetscFree(aa);CHKERRQ(ierr);
3327   ierr = PetscFree(jj);CHKERRQ(ierr);
3328   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3329 
3330   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3331   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3332   PetscFunctionReturn(0);
3333 }
3334 
3335 /*@C
3336    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3337    compressed row) format.  For good matrix assembly performance the
3338    user should preallocate the matrix storage by setting the parameter nz
3339    (or the array nnz).  By setting these parameters accurately, performance
3340    during matrix assembly can be increased by more than a factor of 50.
3341 
3342    Collective on MPI_Comm
3343 
3344    Input Parameters:
3345 +  comm - MPI communicator, set to PETSC_COMM_SELF
3346 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3347           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3348 .  m - number of rows
3349 .  n - number of columns
3350 .  nz - number of nonzero blocks  per block row (same for all rows)
3351 -  nnz - array containing the number of nonzero blocks in the various block rows
3352          (possibly different for each block row) or NULL
3353 
3354    Output Parameter:
3355 .  A - the matrix
3356 
3357    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3358    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3359    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3360 
3361    Options Database Keys:
3362 .   -mat_no_unroll - uses code that does not unroll the loops in the
3363                      block calculations (much slower)
3364 .    -mat_block_size - size of the blocks to use
3365 
3366    Level: intermediate
3367 
3368    Notes:
3369    The number of rows and columns must be divisible by blocksize.
3370 
3371    If the nnz parameter is given then the nz parameter is ignored
3372 
3373    A nonzero block is any block that as 1 or more nonzeros in it
3374 
3375    The block AIJ format is fully compatible with standard Fortran 77
3376    storage.  That is, the stored row and column indices can begin at
3377    either one (as in Fortran) or zero.  See the users' manual for details.
3378 
3379    Specify the preallocated storage with either nz or nnz (not both).
3380    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3381    allocation.  See Users-Manual: ch_mat for details.
3382    matrices.
3383 
3384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3385 @*/
3386 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3387 {
3388   PetscErrorCode ierr;
3389 
3390   PetscFunctionBegin;
3391   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3392   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3393   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3394   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3395   PetscFunctionReturn(0);
3396 }
3397 
3398 /*@C
3399    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3400    per row in the matrix. For good matrix assembly performance the
3401    user should preallocate the matrix storage by setting the parameter nz
3402    (or the array nnz).  By setting these parameters accurately, performance
3403    during matrix assembly can be increased by more than a factor of 50.
3404 
3405    Collective on MPI_Comm
3406 
3407    Input Parameters:
3408 +  B - the matrix
3409 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3410           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3411 .  nz - number of block nonzeros per block row (same for all rows)
3412 -  nnz - array containing the number of block nonzeros in the various block rows
3413          (possibly different for each block row) or NULL
3414 
3415    Options Database Keys:
3416 .   -mat_no_unroll - uses code that does not unroll the loops in the
3417                      block calculations (much slower)
3418 .   -mat_block_size - size of the blocks to use
3419 
3420    Level: intermediate
3421 
3422    Notes:
3423    If the nnz parameter is given then the nz parameter is ignored
3424 
3425    You can call MatGetInfo() to get information on how effective the preallocation was;
3426    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3427    You can also run with the option -info and look for messages with the string
3428    malloc in them to see if additional memory allocation was needed.
3429 
3430    The block AIJ format is fully compatible with standard Fortran 77
3431    storage.  That is, the stored row and column indices can begin at
3432    either one (as in Fortran) or zero.  See the users' manual for details.
3433 
3434    Specify the preallocated storage with either nz or nnz (not both).
3435    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3436    allocation.  See Users-Manual: ch_mat for details.
3437 
3438 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3439 @*/
3440 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3441 {
3442   PetscErrorCode ierr;
3443 
3444   PetscFunctionBegin;
3445   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3446   PetscValidType(B,1);
3447   PetscValidLogicalCollectiveInt(B,bs,2);
3448   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3449   PetscFunctionReturn(0);
3450 }
3451 
3452 /*@C
3453    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3454    (the default sequential PETSc format).
3455 
3456    Collective on MPI_Comm
3457 
3458    Input Parameters:
3459 +  B - the matrix
3460 .  i - the indices into j for the start of each local row (starts with zero)
3461 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3462 -  v - optional values in the matrix
3463 
3464    Level: developer
3465 
3466    Notes:
3467    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3468    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3469    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3470    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3471    block column and the second index is over columns within a block.
3472 
3473 .keywords: matrix, aij, compressed row, sparse
3474 
3475 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3476 @*/
3477 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3478 {
3479   PetscErrorCode ierr;
3480 
3481   PetscFunctionBegin;
3482   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3483   PetscValidType(B,1);
3484   PetscValidLogicalCollectiveInt(B,bs,2);
3485   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3486   PetscFunctionReturn(0);
3487 }
3488 
3489 
3490 /*@
3491      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3492 
3493      Collective on MPI_Comm
3494 
3495    Input Parameters:
3496 +  comm - must be an MPI communicator of size 1
3497 .  bs - size of block
3498 .  m - number of rows
3499 .  n - number of columns
3500 .  i - row indices
3501 .  j - column indices
3502 -  a - matrix values
3503 
3504    Output Parameter:
3505 .  mat - the matrix
3506 
3507    Level: advanced
3508 
3509    Notes:
3510        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3511     once the matrix is destroyed
3512 
3513        You cannot set new nonzero locations into this matrix, that will generate an error.
3514 
3515        The i and j indices are 0 based
3516 
3517        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3518 
3519       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3520       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3521       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3522       with column-major ordering within blocks.
3523 
3524 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3525 
3526 @*/
3527 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3528 {
3529   PetscErrorCode ierr;
3530   PetscInt       ii;
3531   Mat_SeqBAIJ    *baij;
3532 
3533   PetscFunctionBegin;
3534   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3535   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3536 
3537   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3538   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3539   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3540   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3541   baij = (Mat_SeqBAIJ*)(*mat)->data;
3542   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3543   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3544 
3545   baij->i = i;
3546   baij->j = j;
3547   baij->a = a;
3548 
3549   baij->singlemalloc = PETSC_FALSE;
3550   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3551   baij->free_a       = PETSC_FALSE;
3552   baij->free_ij      = PETSC_FALSE;
3553 
3554   for (ii=0; ii<m; ii++) {
3555     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3556 #if defined(PETSC_USE_DEBUG)
3557     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3558 #endif
3559   }
3560 #if defined(PETSC_USE_DEBUG)
3561   for (ii=0; ii<baij->i[m]; ii++) {
3562     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3563     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3564   }
3565 #endif
3566 
3567   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3568   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3569   PetscFunctionReturn(0);
3570 }
3571 
3572 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3573 {
3574   PetscErrorCode ierr;
3575   PetscMPIInt    size;
3576 
3577   PetscFunctionBegin;
3578   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3579   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3580     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3581   } else {
3582     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3583   }
3584   PetscFunctionReturn(0);
3585 }
3586