xref: /petsc/src/mat/impls/baij/seq/baij.c (revision cd545bacbb74b28ee04a97aa2a6bca79b7d5aa2e)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       PetscBool wouldcrash;
67       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,A->erroriffailure,&wouldcrash);CHKERRQ(ierr);
68       diag    += 9;
69       mdiag   += 9;
70     }
71     break;
72   case 4:
73     for (i=0; i<mbs; i++) {
74       odiag  = v + 16*diag_offset[i];
75       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
77       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
78       diag  += 16;
79       mdiag += 16;
80     }
81     break;
82   case 5:
83     for (i=0; i<mbs; i++) {
84       odiag  = v + 25*diag_offset[i];
85       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
87       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
88       diag  += 25;
89       mdiag += 25;
90     }
91     break;
92   case 6:
93     for (i=0; i<mbs; i++) {
94       odiag  = v + 36*diag_offset[i];
95       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
97       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
98       diag  += 36;
99       mdiag += 36;
100     }
101     break;
102   case 7:
103     for (i=0; i<mbs; i++) {
104       odiag  = v + 49*diag_offset[i];
105       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
107       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
108       diag  += 49;
109       mdiag += 49;
110     }
111     break;
112   default:
113     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
114     for (i=0; i<mbs; i++) {
115       odiag  = v + bs2*diag_offset[i];
116       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
118       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
119       diag  += bs2;
120       mdiag += bs2;
121     }
122     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
123   }
124   a->idiagvalid = PETSC_TRUE;
125   PetscFunctionReturn(0);
126 }
127 
128 #undef __FUNCT__
129 #define __FUNCT__ "MatSOR_SeqBAIJ"
130 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
131 {
132   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
133   PetscScalar       *x,*work,*w,*workt,*t;
134   const MatScalar   *v,*aa = a->a, *idiag;
135   const PetscScalar *b,*xb;
136   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
137   PetscErrorCode    ierr;
138   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
139   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
140 
141   PetscFunctionBegin;
142   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
143   its = its*lits;
144   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
145   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
146   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
147   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
148   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
149 
150   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
151 
152   if (!m) PetscFunctionReturn(0);
153   diag  = a->diag;
154   idiag = a->idiag;
155   k    = PetscMax(A->rmap->n,A->cmap->n);
156   if (!a->mult_work) {
157     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
158   }
159   if (!a->sor_workt) {
160     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
161   }
162   if (!a->sor_work) {
163     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
164   }
165   work = a->mult_work;
166   t    = a->sor_workt;
167   w    = a->sor_work;
168 
169   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
170   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
171 
172   if (flag & SOR_ZERO_INITIAL_GUESS) {
173     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
174       switch (bs) {
175       case 1:
176         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
177         t[0] = b[0];
178         i2     = 1;
179         idiag += 1;
180         for (i=1; i<m; i++) {
181           v  = aa + ai[i];
182           vi = aj + ai[i];
183           nz = diag[i] - ai[i];
184           s[0] = b[i2];
185           for (j=0; j<nz; j++) {
186             xw[0] = x[vi[j]];
187             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
188           }
189           t[i2] = s[0];
190           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
191           x[i2]  = xw[0];
192           idiag += 1;
193           i2    += 1;
194         }
195         break;
196       case 2:
197         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
198         t[0] = b[0]; t[1] = b[1];
199         i2     = 2;
200         idiag += 4;
201         for (i=1; i<m; i++) {
202           v  = aa + 4*ai[i];
203           vi = aj + ai[i];
204           nz = diag[i] - ai[i];
205           s[0] = b[i2]; s[1] = b[i2+1];
206           for (j=0; j<nz; j++) {
207             idx = 2*vi[j];
208             it  = 4*j;
209             xw[0] = x[idx]; xw[1] = x[1+idx];
210             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
211           }
212           t[i2] = s[0]; t[i2+1] = s[1];
213           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
214           x[i2]   = xw[0]; x[i2+1] = xw[1];
215           idiag  += 4;
216           i2     += 2;
217         }
218         break;
219       case 3:
220         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
221         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
222         i2     = 3;
223         idiag += 9;
224         for (i=1; i<m; i++) {
225           v  = aa + 9*ai[i];
226           vi = aj + ai[i];
227           nz = diag[i] - ai[i];
228           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
229           while (nz--) {
230             idx = 3*(*vi++);
231             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
232             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
233             v  += 9;
234           }
235           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
236           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
237           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
238           idiag  += 9;
239           i2     += 3;
240         }
241         break;
242       case 4:
243         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
244         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
245         i2     = 4;
246         idiag += 16;
247         for (i=1; i<m; i++) {
248           v  = aa + 16*ai[i];
249           vi = aj + ai[i];
250           nz = diag[i] - ai[i];
251           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
252           while (nz--) {
253             idx = 4*(*vi++);
254             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
255             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
256             v  += 16;
257           }
258           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
259           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
260           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
261           idiag  += 16;
262           i2     += 4;
263         }
264         break;
265       case 5:
266         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
267         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
268         i2     = 5;
269         idiag += 25;
270         for (i=1; i<m; i++) {
271           v  = aa + 25*ai[i];
272           vi = aj + ai[i];
273           nz = diag[i] - ai[i];
274           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
275           while (nz--) {
276             idx = 5*(*vi++);
277             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
278             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
279             v  += 25;
280           }
281           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
282           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
283           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
284           idiag  += 25;
285           i2     += 5;
286         }
287         break;
288       case 6:
289         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
290         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
291         i2     = 6;
292         idiag += 36;
293         for (i=1; i<m; i++) {
294           v  = aa + 36*ai[i];
295           vi = aj + ai[i];
296           nz = diag[i] - ai[i];
297           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
298           while (nz--) {
299             idx = 6*(*vi++);
300             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
301             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
302             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
303             v  += 36;
304           }
305           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
306           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
307           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
308           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
309           idiag  += 36;
310           i2     += 6;
311         }
312         break;
313       case 7:
314         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
315         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
316         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
317         i2     = 7;
318         idiag += 49;
319         for (i=1; i<m; i++) {
320           v  = aa + 49*ai[i];
321           vi = aj + ai[i];
322           nz = diag[i] - ai[i];
323           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
324           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
325           while (nz--) {
326             idx = 7*(*vi++);
327             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
328             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
329             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
330             v  += 49;
331           }
332           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
333           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
334           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
335           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
336           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
337           idiag  += 49;
338           i2     += 7;
339         }
340         break;
341       default:
342         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
343         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
344         i2     = bs;
345         idiag += bs2;
346         for (i=1; i<m; i++) {
347           v  = aa + bs2*ai[i];
348           vi = aj + ai[i];
349           nz = diag[i] - ai[i];
350 
351           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
352           /* copy all rows of x that are needed into contiguous space */
353           workt = work;
354           for (j=0; j<nz; j++) {
355             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
356             workt += bs;
357           }
358           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
359           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
360           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
361 
362           idiag += bs2;
363           i2    += bs;
364         }
365         break;
366       }
367       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
368       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
369       xb = t;
370     }
371     else xb = b;
372     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
373       idiag = a->idiag+bs2*(a->mbs-1);
374       i2 = bs * (m-1);
375       switch (bs) {
376       case 1:
377         s[0]  = xb[i2];
378         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
379         x[i2] = xw[0];
380         i2   -= 1;
381         for (i=m-2; i>=0; i--) {
382           v  = aa + (diag[i]+1);
383           vi = aj + diag[i] + 1;
384           nz = ai[i+1] - diag[i] - 1;
385           s[0] = xb[i2];
386           for (j=0; j<nz; j++) {
387             xw[0] = x[vi[j]];
388             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
389           }
390           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
391           x[i2]  = xw[0];
392           idiag -= 1;
393           i2    -= 1;
394         }
395         break;
396       case 2:
397         s[0]  = xb[i2]; s[1] = xb[i2+1];
398         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
399         x[i2] = xw[0]; x[i2+1] = xw[1];
400         i2    -= 2;
401         idiag -= 4;
402         for (i=m-2; i>=0; i--) {
403           v  = aa + 4*(diag[i] + 1);
404           vi = aj + diag[i] + 1;
405           nz = ai[i+1] - diag[i] - 1;
406           s[0] = xb[i2]; s[1] = xb[i2+1];
407           for (j=0; j<nz; j++) {
408             idx = 2*vi[j];
409             it  = 4*j;
410             xw[0] = x[idx]; xw[1] = x[1+idx];
411             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
412           }
413           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
414           x[i2]   = xw[0]; x[i2+1] = xw[1];
415           idiag  -= 4;
416           i2     -= 2;
417         }
418         break;
419       case 3:
420         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
421         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
422         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
423         i2    -= 3;
424         idiag -= 9;
425         for (i=m-2; i>=0; i--) {
426           v  = aa + 9*(diag[i]+1);
427           vi = aj + diag[i] + 1;
428           nz = ai[i+1] - diag[i] - 1;
429           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
430           while (nz--) {
431             idx = 3*(*vi++);
432             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
433             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
434             v  += 9;
435           }
436           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
437           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
438           idiag  -= 9;
439           i2     -= 3;
440         }
441         break;
442       case 4:
443         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
444         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
445         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
446         i2    -= 4;
447         idiag -= 16;
448         for (i=m-2; i>=0; i--) {
449           v  = aa + 16*(diag[i]+1);
450           vi = aj + diag[i] + 1;
451           nz = ai[i+1] - diag[i] - 1;
452           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
453           while (nz--) {
454             idx = 4*(*vi++);
455             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
456             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
457             v  += 16;
458           }
459           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
460           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
461           idiag  -= 16;
462           i2     -= 4;
463         }
464         break;
465       case 5:
466         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
467         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
468         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
469         i2    -= 5;
470         idiag -= 25;
471         for (i=m-2; i>=0; i--) {
472           v  = aa + 25*(diag[i]+1);
473           vi = aj + diag[i] + 1;
474           nz = ai[i+1] - diag[i] - 1;
475           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
476           while (nz--) {
477             idx = 5*(*vi++);
478             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
479             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
480             v  += 25;
481           }
482           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
483           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
484           idiag  -= 25;
485           i2     -= 5;
486         }
487         break;
488       case 6:
489         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
490         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
491         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
492         i2    -= 6;
493         idiag -= 36;
494         for (i=m-2; i>=0; i--) {
495           v  = aa + 36*(diag[i]+1);
496           vi = aj + diag[i] + 1;
497           nz = ai[i+1] - diag[i] - 1;
498           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
499           while (nz--) {
500             idx = 6*(*vi++);
501             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
502             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
503             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
504             v  += 36;
505           }
506           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
507           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
508           idiag  -= 36;
509           i2     -= 6;
510         }
511         break;
512       case 7:
513         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
514         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
515         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
516         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
517         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
518         i2    -= 7;
519         idiag -= 49;
520         for (i=m-2; i>=0; i--) {
521           v  = aa + 49*(diag[i]+1);
522           vi = aj + diag[i] + 1;
523           nz = ai[i+1] - diag[i] - 1;
524           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
525           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
526           while (nz--) {
527             idx = 7*(*vi++);
528             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
529             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
530             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
531             v  += 49;
532           }
533           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
534           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
535           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
536           idiag  -= 49;
537           i2     -= 7;
538         }
539         break;
540       default:
541         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
542         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
543         i2    -= bs;
544         idiag -= bs2;
545         for (i=m-2; i>=0; i--) {
546           v  = aa + bs2*(diag[i]+1);
547           vi = aj + diag[i] + 1;
548           nz = ai[i+1] - diag[i] - 1;
549 
550           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
551           /* copy all rows of x that are needed into contiguous space */
552           workt = work;
553           for (j=0; j<nz; j++) {
554             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
555             workt += bs;
556           }
557           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
558           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
559 
560           idiag -= bs2;
561           i2    -= bs;
562         }
563         break;
564       }
565       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
566     }
567     its--;
568   }
569   while (its--) {
570     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
571       idiag = a->idiag;
572       i2 = 0;
573       switch (bs) {
574       case 1:
575         for (i=0; i<m; i++) {
576           v  = aa + ai[i];
577           vi = aj + ai[i];
578           nz = ai[i+1] - ai[i];
579           s[0] = b[i2];
580           for (j=0; j<nz; j++) {
581             xw[0] = x[vi[j]];
582             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
583           }
584           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
585           x[i2] += xw[0];
586           idiag += 1;
587           i2    += 1;
588         }
589         break;
590       case 2:
591         for (i=0; i<m; i++) {
592           v  = aa + 4*ai[i];
593           vi = aj + ai[i];
594           nz = ai[i+1] - ai[i];
595           s[0] = b[i2]; s[1] = b[i2+1];
596           for (j=0; j<nz; j++) {
597             idx = 2*vi[j];
598             it  = 4*j;
599             xw[0] = x[idx]; xw[1] = x[1+idx];
600             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
601           }
602           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
603           x[i2]  += xw[0]; x[i2+1] += xw[1];
604           idiag  += 4;
605           i2     += 2;
606         }
607         break;
608       case 3:
609         for (i=0; i<m; i++) {
610           v  = aa + 9*ai[i];
611           vi = aj + ai[i];
612           nz = ai[i+1] - ai[i];
613           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
614           while (nz--) {
615             idx = 3*(*vi++);
616             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
617             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
618             v  += 9;
619           }
620           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
621           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
622           idiag  += 9;
623           i2     += 3;
624         }
625         break;
626       case 4:
627         for (i=0; i<m; i++) {
628           v  = aa + 16*ai[i];
629           vi = aj + ai[i];
630           nz = ai[i+1] - ai[i];
631           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
632           while (nz--) {
633             idx = 4*(*vi++);
634             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
635             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
636             v  += 16;
637           }
638           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
639           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
640           idiag  += 16;
641           i2     += 4;
642         }
643         break;
644       case 5:
645         for (i=0; i<m; i++) {
646           v  = aa + 25*ai[i];
647           vi = aj + ai[i];
648           nz = ai[i+1] - ai[i];
649           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
650           while (nz--) {
651             idx = 5*(*vi++);
652             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
653             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
654             v  += 25;
655           }
656           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
657           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
658           idiag  += 25;
659           i2     += 5;
660         }
661         break;
662       case 6:
663         for (i=0; i<m; i++) {
664           v  = aa + 36*ai[i];
665           vi = aj + ai[i];
666           nz = ai[i+1] - ai[i];
667           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
668           while (nz--) {
669             idx = 6*(*vi++);
670             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
671             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
672             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
673             v  += 36;
674           }
675           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
676           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
677           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
678           idiag  += 36;
679           i2     += 6;
680         }
681         break;
682       case 7:
683         for (i=0; i<m; i++) {
684           v  = aa + 49*ai[i];
685           vi = aj + ai[i];
686           nz = ai[i+1] - ai[i];
687           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
688           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
689           while (nz--) {
690             idx = 7*(*vi++);
691             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
692             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
693             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
694             v  += 49;
695           }
696           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
697           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
698           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
699           idiag  += 49;
700           i2     += 7;
701         }
702         break;
703       default:
704         for (i=0; i<m; i++) {
705           v  = aa + bs2*ai[i];
706           vi = aj + ai[i];
707           nz = ai[i+1] - ai[i];
708 
709           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
710           /* copy all rows of x that are needed into contiguous space */
711           workt = work;
712           for (j=0; j<nz; j++) {
713             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
714             workt += bs;
715           }
716           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
717           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
718 
719           idiag += bs2;
720           i2    += bs;
721         }
722         break;
723       }
724       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
725     }
726     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
727       idiag = a->idiag+bs2*(a->mbs-1);
728       i2 = bs * (m-1);
729       switch (bs) {
730       case 1:
731         for (i=m-1; i>=0; i--) {
732           v  = aa + ai[i];
733           vi = aj + ai[i];
734           nz = ai[i+1] - ai[i];
735           s[0] = b[i2];
736           for (j=0; j<nz; j++) {
737             xw[0] = x[vi[j]];
738             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
739           }
740           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
741           x[i2] += xw[0];
742           idiag -= 1;
743           i2    -= 1;
744         }
745         break;
746       case 2:
747         for (i=m-1; i>=0; i--) {
748           v  = aa + 4*ai[i];
749           vi = aj + ai[i];
750           nz = ai[i+1] - ai[i];
751           s[0] = b[i2]; s[1] = b[i2+1];
752           for (j=0; j<nz; j++) {
753             idx = 2*vi[j];
754             it  = 4*j;
755             xw[0] = x[idx]; xw[1] = x[1+idx];
756             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
757           }
758           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
759           x[i2]  += xw[0]; x[i2+1] += xw[1];
760           idiag  -= 4;
761           i2     -= 2;
762         }
763         break;
764       case 3:
765         for (i=m-1; i>=0; i--) {
766           v  = aa + 9*ai[i];
767           vi = aj + ai[i];
768           nz = ai[i+1] - ai[i];
769           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
770           while (nz--) {
771             idx = 3*(*vi++);
772             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
773             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
774             v  += 9;
775           }
776           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
777           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
778           idiag  -= 9;
779           i2     -= 3;
780         }
781         break;
782       case 4:
783         for (i=m-1; i>=0; i--) {
784           v  = aa + 16*ai[i];
785           vi = aj + ai[i];
786           nz = ai[i+1] - ai[i];
787           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
788           while (nz--) {
789             idx = 4*(*vi++);
790             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
791             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
792             v  += 16;
793           }
794           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
795           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
796           idiag  -= 16;
797           i2     -= 4;
798         }
799         break;
800       case 5:
801         for (i=m-1; i>=0; i--) {
802           v  = aa + 25*ai[i];
803           vi = aj + ai[i];
804           nz = ai[i+1] - ai[i];
805           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
806           while (nz--) {
807             idx = 5*(*vi++);
808             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
809             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
810             v  += 25;
811           }
812           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
813           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
814           idiag  -= 25;
815           i2     -= 5;
816         }
817         break;
818       case 6:
819         for (i=m-1; i>=0; i--) {
820           v  = aa + 36*ai[i];
821           vi = aj + ai[i];
822           nz = ai[i+1] - ai[i];
823           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
824           while (nz--) {
825             idx = 6*(*vi++);
826             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
827             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
828             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
829             v  += 36;
830           }
831           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
832           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
833           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
834           idiag  -= 36;
835           i2     -= 6;
836         }
837         break;
838       case 7:
839         for (i=m-1; i>=0; i--) {
840           v  = aa + 49*ai[i];
841           vi = aj + ai[i];
842           nz = ai[i+1] - ai[i];
843           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
844           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
845           while (nz--) {
846             idx = 7*(*vi++);
847             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
848             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
849             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
850             v  += 49;
851           }
852           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
853           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
854           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
855           idiag  -= 49;
856           i2     -= 7;
857         }
858         break;
859       default:
860         for (i=m-1; i>=0; i--) {
861           v  = aa + bs2*ai[i];
862           vi = aj + ai[i];
863           nz = ai[i+1] - ai[i];
864 
865           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
866           /* copy all rows of x that are needed into contiguous space */
867           workt = work;
868           for (j=0; j<nz; j++) {
869             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
870             workt += bs;
871           }
872           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
873           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
874 
875           idiag -= bs2;
876           i2    -= bs;
877         }
878         break;
879       }
880       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
881     }
882   }
883   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
884   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
885   PetscFunctionReturn(0);
886 }
887 
888 
889 /*
890     Special version for direct calls from Fortran (Used in PETSc-fun3d)
891 */
892 #if defined(PETSC_HAVE_FORTRAN_CAPS)
893 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
894 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
895 #define matsetvaluesblocked4_ matsetvaluesblocked4
896 #endif
897 
898 #undef __FUNCT__
899 #define __FUNCT__ "matsetvaluesblocked4_"
900 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
901 {
902   Mat               A  = *AA;
903   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
904   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
905   PetscInt          *ai    =a->i,*ailen=a->ilen;
906   PetscInt          *aj    =a->j,stepval,lastcol = -1;
907   const PetscScalar *value = v;
908   MatScalar         *ap,*aa = a->a,*bap;
909 
910   PetscFunctionBegin;
911   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
912   stepval = (n-1)*4;
913   for (k=0; k<m; k++) { /* loop over added rows */
914     row  = im[k];
915     rp   = aj + ai[row];
916     ap   = aa + 16*ai[row];
917     nrow = ailen[row];
918     low  = 0;
919     high = nrow;
920     for (l=0; l<n; l++) { /* loop over added columns */
921       col = in[l];
922       if (col <= lastcol)  low = 0;
923       else                high = nrow;
924       lastcol = col;
925       value   = v + k*(stepval+4 + l)*4;
926       while (high-low > 7) {
927         t = (low+high)/2;
928         if (rp[t] > col) high = t;
929         else             low  = t;
930       }
931       for (i=low; i<high; i++) {
932         if (rp[i] > col) break;
933         if (rp[i] == col) {
934           bap = ap +  16*i;
935           for (ii=0; ii<4; ii++,value+=stepval) {
936             for (jj=ii; jj<16; jj+=4) {
937               bap[jj] += *value++;
938             }
939           }
940           goto noinsert2;
941         }
942       }
943       N = nrow++ - 1;
944       high++; /* added new column index thus must search to one higher than before */
945       /* shift up all the later entries in this row */
946       for (ii=N; ii>=i; ii--) {
947         rp[ii+1] = rp[ii];
948         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
949       }
950       if (N >= i) {
951         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
952       }
953       rp[i] = col;
954       bap   = ap +  16*i;
955       for (ii=0; ii<4; ii++,value+=stepval) {
956         for (jj=ii; jj<16; jj+=4) {
957           bap[jj] = *value++;
958         }
959       }
960       noinsert2:;
961       low = i;
962     }
963     ailen[row] = nrow;
964   }
965   PetscFunctionReturnVoid();
966 }
967 
968 #if defined(PETSC_HAVE_FORTRAN_CAPS)
969 #define matsetvalues4_ MATSETVALUES4
970 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
971 #define matsetvalues4_ matsetvalues4
972 #endif
973 
974 #undef __FUNCT__
975 #define __FUNCT__ "MatSetValues4_"
976 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
977 {
978   Mat         A  = *AA;
979   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
980   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
981   PetscInt    *ai=a->i,*ailen=a->ilen;
982   PetscInt    *aj=a->j,brow,bcol;
983   PetscInt    ridx,cidx,lastcol = -1;
984   MatScalar   *ap,value,*aa=a->a,*bap;
985 
986   PetscFunctionBegin;
987   for (k=0; k<m; k++) { /* loop over added rows */
988     row  = im[k]; brow = row/4;
989     rp   = aj + ai[brow];
990     ap   = aa + 16*ai[brow];
991     nrow = ailen[brow];
992     low  = 0;
993     high = nrow;
994     for (l=0; l<n; l++) { /* loop over added columns */
995       col   = in[l]; bcol = col/4;
996       ridx  = row % 4; cidx = col % 4;
997       value = v[l + k*n];
998       if (col <= lastcol)  low = 0;
999       else                high = nrow;
1000       lastcol = col;
1001       while (high-low > 7) {
1002         t = (low+high)/2;
1003         if (rp[t] > bcol) high = t;
1004         else              low  = t;
1005       }
1006       for (i=low; i<high; i++) {
1007         if (rp[i] > bcol) break;
1008         if (rp[i] == bcol) {
1009           bap   = ap +  16*i + 4*cidx + ridx;
1010           *bap += value;
1011           goto noinsert1;
1012         }
1013       }
1014       N = nrow++ - 1;
1015       high++; /* added new column thus must search to one higher than before */
1016       /* shift up all the later entries in this row */
1017       for (ii=N; ii>=i; ii--) {
1018         rp[ii+1] = rp[ii];
1019         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1020       }
1021       if (N>=i) {
1022         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1023       }
1024       rp[i]                    = bcol;
1025       ap[16*i + 4*cidx + ridx] = value;
1026 noinsert1:;
1027       low = i;
1028     }
1029     ailen[brow] = nrow;
1030   }
1031   PetscFunctionReturnVoid();
1032 }
1033 
1034 /*
1035      Checks for missing diagonals
1036 */
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1039 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1040 {
1041   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1042   PetscErrorCode ierr;
1043   PetscInt       *diag,*ii = a->i,i;
1044 
1045   PetscFunctionBegin;
1046   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1047   *missing = PETSC_FALSE;
1048   if (A->rmap->n > 0 && !ii) {
1049     *missing = PETSC_TRUE;
1050     if (d) *d = 0;
1051     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1052   } else {
1053     diag = a->diag;
1054     for (i=0; i<a->mbs; i++) {
1055       if (diag[i] >= ii[i+1]) {
1056         *missing = PETSC_TRUE;
1057         if (d) *d = i;
1058         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1059         break;
1060       }
1061     }
1062   }
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1068 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1069 {
1070   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1071   PetscErrorCode ierr;
1072   PetscInt       i,j,m = a->mbs;
1073 
1074   PetscFunctionBegin;
1075   if (!a->diag) {
1076     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1077     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1078     a->free_diag = PETSC_TRUE;
1079   }
1080   for (i=0; i<m; i++) {
1081     a->diag[i] = a->i[i+1];
1082     for (j=a->i[i]; j<a->i[i+1]; j++) {
1083       if (a->j[j] == i) {
1084         a->diag[i] = j;
1085         break;
1086       }
1087     }
1088   }
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1095 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1096 {
1097   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1098   PetscErrorCode ierr;
1099   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1100   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1101 
1102   PetscFunctionBegin;
1103   *nn = n;
1104   if (!ia) PetscFunctionReturn(0);
1105   if (symmetric) {
1106     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1107     nz   = tia[n];
1108   } else {
1109     tia = a->i; tja = a->j;
1110   }
1111 
1112   if (!blockcompressed && bs > 1) {
1113     (*nn) *= bs;
1114     /* malloc & create the natural set of indices */
1115     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1116     if (n) {
1117       (*ia)[0] = 0;
1118       for (j=1; j<bs; j++) {
1119         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1120       }
1121     }
1122 
1123     for (i=1; i<n; i++) {
1124       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1125       for (j=1; j<bs; j++) {
1126         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1127       }
1128     }
1129     if (n) {
1130       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1131     }
1132 
1133     if (inja) {
1134       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1135       cnt = 0;
1136       for (i=0; i<n; i++) {
1137         for (j=0; j<bs; j++) {
1138           for (k=tia[i]; k<tia[i+1]; k++) {
1139             for (l=0; l<bs; l++) {
1140               (*ja)[cnt++] = bs*tja[k] + l;
1141             }
1142           }
1143         }
1144       }
1145     }
1146 
1147     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1148       ierr = PetscFree(tia);CHKERRQ(ierr);
1149       ierr = PetscFree(tja);CHKERRQ(ierr);
1150     }
1151   } else if (oshift == 1) {
1152     if (symmetric) {
1153       nz = tia[A->rmap->n/bs];
1154       /*  add 1 to i and j indices */
1155       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1156       *ia = tia;
1157       if (ja) {
1158         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1159         *ja = tja;
1160       }
1161     } else {
1162       nz = a->i[A->rmap->n/bs];
1163       /* malloc space and  add 1 to i and j indices */
1164       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1165       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1166       if (ja) {
1167         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1168         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1169       }
1170     }
1171   } else {
1172     *ia = tia;
1173     if (ja) *ja = tja;
1174   }
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNCT__
1179 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1180 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1181 {
1182   PetscErrorCode ierr;
1183 
1184   PetscFunctionBegin;
1185   if (!ia) PetscFunctionReturn(0);
1186   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1187     ierr = PetscFree(*ia);CHKERRQ(ierr);
1188     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1189   }
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1195 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1196 {
1197   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1198   PetscErrorCode ierr;
1199 
1200   PetscFunctionBegin;
1201 #if defined(PETSC_USE_LOG)
1202   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1203 #endif
1204   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1205   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1206   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1207   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1208   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1209   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1210   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1211   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1212   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1213   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1214   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1215   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1216   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1217 
1218   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1219   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1220   ierr = PetscFree(A->data);CHKERRQ(ierr);
1221 
1222   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1238 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1239 {
1240   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   switch (op) {
1245   case MAT_ROW_ORIENTED:
1246     a->roworiented = flg;
1247     break;
1248   case MAT_KEEP_NONZERO_PATTERN:
1249     a->keepnonzeropattern = flg;
1250     break;
1251   case MAT_NEW_NONZERO_LOCATIONS:
1252     a->nonew = (flg ? 0 : 1);
1253     break;
1254   case MAT_NEW_NONZERO_LOCATION_ERR:
1255     a->nonew = (flg ? -1 : 0);
1256     break;
1257   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1258     a->nonew = (flg ? -2 : 0);
1259     break;
1260   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1261     a->nounused = (flg ? -1 : 0);
1262     break;
1263   case MAT_NEW_DIAGONALS:
1264   case MAT_IGNORE_OFF_PROC_ENTRIES:
1265   case MAT_USE_HASH_TABLE:
1266     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1267     break;
1268   case MAT_SPD:
1269   case MAT_SYMMETRIC:
1270   case MAT_STRUCTURALLY_SYMMETRIC:
1271   case MAT_HERMITIAN:
1272   case MAT_SYMMETRY_ETERNAL:
1273     /* These options are handled directly by MatSetOption() */
1274     break;
1275   default:
1276     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1277   }
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1282 #undef __FUNCT__
1283 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1284 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1285 {
1286   PetscErrorCode ierr;
1287   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1288   MatScalar      *aa_i;
1289   PetscScalar    *v_i;
1290 
1291   PetscFunctionBegin;
1292   bs  = A->rmap->bs;
1293   bs2 = bs*bs;
1294   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1295 
1296   bn  = row/bs;   /* Block number */
1297   bp  = row % bs; /* Block Position */
1298   M   = ai[bn+1] - ai[bn];
1299   *nz = bs*M;
1300 
1301   if (v) {
1302     *v = 0;
1303     if (*nz) {
1304       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1305       for (i=0; i<M; i++) { /* for each block in the block row */
1306         v_i  = *v + i*bs;
1307         aa_i = aa + bs2*(ai[bn] + i);
1308         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1309       }
1310     }
1311   }
1312 
1313   if (idx) {
1314     *idx = 0;
1315     if (*nz) {
1316       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1317       for (i=0; i<M; i++) { /* for each block in the block row */
1318         idx_i = *idx + i*bs;
1319         itmp  = bs*aj[ai[bn] + i];
1320         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1321       }
1322     }
1323   }
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1329 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1330 {
1331   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1332   PetscErrorCode ierr;
1333 
1334   PetscFunctionBegin;
1335   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1341 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   PetscErrorCode ierr;
1344 
1345   PetscFunctionBegin;
1346   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1347   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1355 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1356 {
1357   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1358   Mat            C;
1359   PetscErrorCode ierr;
1360   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1361   PetscInt       *rows,*cols,bs2=a->bs2;
1362   MatScalar      *array;
1363 
1364   PetscFunctionBegin;
1365   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1366   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1367     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1368 
1369     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1370     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1371     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1372     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1373     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1374     ierr = PetscFree(col);CHKERRQ(ierr);
1375   } else {
1376     C = *B;
1377   }
1378 
1379   array = a->a;
1380   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1381   for (i=0; i<mbs; i++) {
1382     cols[0] = i*bs;
1383     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1384     len = ai[i+1] - ai[i];
1385     for (j=0; j<len; j++) {
1386       rows[0] = (*aj++)*bs;
1387       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1388       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1389       array += bs2;
1390     }
1391   }
1392   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1393 
1394   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1395   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1396 
1397   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1398     *B = C;
1399   } else {
1400     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1407 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1408 {
1409   PetscErrorCode ierr;
1410   Mat            Btrans;
1411 
1412   PetscFunctionBegin;
1413   *f   = PETSC_FALSE;
1414   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1415   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1416   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1422 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1423 {
1424   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1425   PetscErrorCode ierr;
1426   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1427   int            fd;
1428   PetscScalar    *aa;
1429   FILE           *file;
1430 
1431   PetscFunctionBegin;
1432   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1433   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1434   col_lens[0] = MAT_FILE_CLASSID;
1435 
1436   col_lens[1] = A->rmap->N;
1437   col_lens[2] = A->cmap->n;
1438   col_lens[3] = a->nz*bs2;
1439 
1440   /* store lengths of each row and write (including header) to file */
1441   count = 0;
1442   for (i=0; i<a->mbs; i++) {
1443     for (j=0; j<bs; j++) {
1444       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1445     }
1446   }
1447   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1448   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1449 
1450   /* store column indices (zero start index) */
1451   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1452   count = 0;
1453   for (i=0; i<a->mbs; i++) {
1454     for (j=0; j<bs; j++) {
1455       for (k=a->i[i]; k<a->i[i+1]; k++) {
1456         for (l=0; l<bs; l++) {
1457           jj[count++] = bs*a->j[k] + l;
1458         }
1459       }
1460     }
1461   }
1462   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1463   ierr = PetscFree(jj);CHKERRQ(ierr);
1464 
1465   /* store nonzero values */
1466   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1467   count = 0;
1468   for (i=0; i<a->mbs; i++) {
1469     for (j=0; j<bs; j++) {
1470       for (k=a->i[i]; k<a->i[i+1]; k++) {
1471         for (l=0; l<bs; l++) {
1472           aa[count++] = a->a[bs2*k + l*bs + j];
1473         }
1474       }
1475     }
1476   }
1477   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1478   ierr = PetscFree(aa);CHKERRQ(ierr);
1479 
1480   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1481   if (file) {
1482     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1483   }
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 #undef __FUNCT__
1488 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1489 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1490 {
1491   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1492   PetscErrorCode    ierr;
1493   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1494   PetscViewerFormat format;
1495 
1496   PetscFunctionBegin;
1497   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1498   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1499     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1500   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1501     const char *matname;
1502     Mat        aij;
1503     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1504     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1505     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1506     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1507     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1508   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1509       PetscFunctionReturn(0);
1510   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1511     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1512     for (i=0; i<a->mbs; i++) {
1513       for (j=0; j<bs; j++) {
1514         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1515         for (k=a->i[i]; k<a->i[i+1]; k++) {
1516           for (l=0; l<bs; l++) {
1517 #if defined(PETSC_USE_COMPLEX)
1518             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1519               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1520                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1521             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1522               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1523                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1524             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1525               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1526             }
1527 #else
1528             if (a->a[bs2*k + l*bs + j] != 0.0) {
1529               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1530             }
1531 #endif
1532           }
1533         }
1534         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1535       }
1536     }
1537     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1538   } else {
1539     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1540     for (i=0; i<a->mbs; i++) {
1541       for (j=0; j<bs; j++) {
1542         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1543         for (k=a->i[i]; k<a->i[i+1]; k++) {
1544           for (l=0; l<bs; l++) {
1545 #if defined(PETSC_USE_COMPLEX)
1546             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1547               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1548                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1549             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1550               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1551                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1552             } else {
1553               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1554             }
1555 #else
1556             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1557 #endif
1558           }
1559         }
1560         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1561       }
1562     }
1563     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1564   }
1565   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 #include <petscdraw.h>
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1572 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1573 {
1574   Mat               A = (Mat) Aa;
1575   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1576   PetscErrorCode    ierr;
1577   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1578   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1579   MatScalar         *aa;
1580   PetscViewer       viewer;
1581   PetscViewerFormat format;
1582 
1583   PetscFunctionBegin;
1584   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1585   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1586 
1587   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1588 
1589   /* loop over matrix elements drawing boxes */
1590 
1591   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1592     color = PETSC_DRAW_BLUE;
1593     for (i=0,row=0; i<mbs; i++,row+=bs) {
1594       for (j=a->i[i]; j<a->i[i+1]; j++) {
1595         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1596         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1597         aa  = a->a + j*bs2;
1598         for (k=0; k<bs; k++) {
1599           for (l=0; l<bs; l++) {
1600             if (PetscRealPart(*aa++) >=  0.) continue;
1601             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1602           }
1603         }
1604       }
1605     }
1606     color = PETSC_DRAW_CYAN;
1607     for (i=0,row=0; i<mbs; i++,row+=bs) {
1608       for (j=a->i[i]; j<a->i[i+1]; j++) {
1609         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1610         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1611         aa  = a->a + j*bs2;
1612         for (k=0; k<bs; k++) {
1613           for (l=0; l<bs; l++) {
1614             if (PetscRealPart(*aa++) != 0.) continue;
1615             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1616           }
1617         }
1618       }
1619     }
1620     color = PETSC_DRAW_RED;
1621     for (i=0,row=0; i<mbs; i++,row+=bs) {
1622       for (j=a->i[i]; j<a->i[i+1]; j++) {
1623         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1624         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1625         aa  = a->a + j*bs2;
1626         for (k=0; k<bs; k++) {
1627           for (l=0; l<bs; l++) {
1628             if (PetscRealPart(*aa++) <= 0.) continue;
1629             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1630           }
1631         }
1632       }
1633     }
1634   } else {
1635     /* use contour shading to indicate magnitude of values */
1636     /* first determine max of all nonzero values */
1637     PetscDraw popup;
1638     PetscReal scale,maxv = 0.0;
1639 
1640     for (i=0; i<a->nz*a->bs2; i++) {
1641       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1642     }
1643     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1644     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1645     if (popup) {
1646       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1647     }
1648     for (i=0,row=0; i<mbs; i++,row+=bs) {
1649       for (j=a->i[i]; j<a->i[i+1]; j++) {
1650         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1651         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1652         aa  = a->a + j*bs2;
1653         for (k=0; k<bs; k++) {
1654           for (l=0; l<bs; l++) {
1655             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1656             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1657           }
1658         }
1659       }
1660     }
1661   }
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 #undef __FUNCT__
1666 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1667 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1668 {
1669   PetscErrorCode ierr;
1670   PetscReal      xl,yl,xr,yr,w,h;
1671   PetscDraw      draw;
1672   PetscBool      isnull;
1673 
1674   PetscFunctionBegin;
1675   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1676   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1677 
1678   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1679   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1680   xr  += w;    yr += h;  xl = -w;     yl = -h;
1681   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1682   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1683   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatView_SeqBAIJ"
1689 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1690 {
1691   PetscErrorCode ierr;
1692   PetscBool      iascii,isbinary,isdraw;
1693 
1694   PetscFunctionBegin;
1695   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1696   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1697   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1698   if (iascii) {
1699     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1700   } else if (isbinary) {
1701     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1702   } else if (isdraw) {
1703     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1704   } else {
1705     Mat B;
1706     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1707     ierr = MatView(B,viewer);CHKERRQ(ierr);
1708     ierr = MatDestroy(&B);CHKERRQ(ierr);
1709   }
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1716 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1717 {
1718   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1719   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1720   PetscInt    *ai = a->i,*ailen = a->ilen;
1721   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1722   MatScalar   *ap,*aa = a->a;
1723 
1724   PetscFunctionBegin;
1725   for (k=0; k<m; k++) { /* loop over rows */
1726     row = im[k]; brow = row/bs;
1727     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1728     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1729     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1730     nrow = ailen[brow];
1731     for (l=0; l<n; l++) { /* loop over columns */
1732       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1733       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1734       col  = in[l];
1735       bcol = col/bs;
1736       cidx = col%bs;
1737       ridx = row%bs;
1738       high = nrow;
1739       low  = 0; /* assume unsorted */
1740       while (high-low > 5) {
1741         t = (low+high)/2;
1742         if (rp[t] > bcol) high = t;
1743         else             low  = t;
1744       }
1745       for (i=low; i<high; i++) {
1746         if (rp[i] > bcol) break;
1747         if (rp[i] == bcol) {
1748           *v++ = ap[bs2*i+bs*cidx+ridx];
1749           goto finished;
1750         }
1751       }
1752       *v++ = 0.0;
1753 finished:;
1754     }
1755   }
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1761 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1762 {
1763   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1764   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1765   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1766   PetscErrorCode    ierr;
1767   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1768   PetscBool         roworiented=a->roworiented;
1769   const PetscScalar *value     = v;
1770   MatScalar         *ap,*aa = a->a,*bap;
1771 
1772   PetscFunctionBegin;
1773   if (roworiented) {
1774     stepval = (n-1)*bs;
1775   } else {
1776     stepval = (m-1)*bs;
1777   }
1778   for (k=0; k<m; k++) { /* loop over added rows */
1779     row = im[k];
1780     if (row < 0) continue;
1781 #if defined(PETSC_USE_DEBUG)
1782     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1783 #endif
1784     rp   = aj + ai[row];
1785     ap   = aa + bs2*ai[row];
1786     rmax = imax[row];
1787     nrow = ailen[row];
1788     low  = 0;
1789     high = nrow;
1790     for (l=0; l<n; l++) { /* loop over added columns */
1791       if (in[l] < 0) continue;
1792 #if defined(PETSC_USE_DEBUG)
1793       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1794 #endif
1795       col = in[l];
1796       if (roworiented) {
1797         value = v + (k*(stepval+bs) + l)*bs;
1798       } else {
1799         value = v + (l*(stepval+bs) + k)*bs;
1800       }
1801       if (col <= lastcol) low = 0;
1802       else high = nrow;
1803       lastcol = col;
1804       while (high-low > 7) {
1805         t = (low+high)/2;
1806         if (rp[t] > col) high = t;
1807         else             low  = t;
1808       }
1809       for (i=low; i<high; i++) {
1810         if (rp[i] > col) break;
1811         if (rp[i] == col) {
1812           bap = ap +  bs2*i;
1813           if (roworiented) {
1814             if (is == ADD_VALUES) {
1815               for (ii=0; ii<bs; ii++,value+=stepval) {
1816                 for (jj=ii; jj<bs2; jj+=bs) {
1817                   bap[jj] += *value++;
1818                 }
1819               }
1820             } else {
1821               for (ii=0; ii<bs; ii++,value+=stepval) {
1822                 for (jj=ii; jj<bs2; jj+=bs) {
1823                   bap[jj] = *value++;
1824                 }
1825               }
1826             }
1827           } else {
1828             if (is == ADD_VALUES) {
1829               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1830                 for (jj=0; jj<bs; jj++) {
1831                   bap[jj] += value[jj];
1832                 }
1833                 bap += bs;
1834               }
1835             } else {
1836               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1837                 for (jj=0; jj<bs; jj++) {
1838                   bap[jj]  = value[jj];
1839                 }
1840                 bap += bs;
1841               }
1842             }
1843           }
1844           goto noinsert2;
1845         }
1846       }
1847       if (nonew == 1) goto noinsert2;
1848       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1849       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1850       N = nrow++ - 1; high++;
1851       /* shift up all the later entries in this row */
1852       for (ii=N; ii>=i; ii--) {
1853         rp[ii+1] = rp[ii];
1854         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1855       }
1856       if (N >= i) {
1857         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1858       }
1859       rp[i] = col;
1860       bap   = ap +  bs2*i;
1861       if (roworiented) {
1862         for (ii=0; ii<bs; ii++,value+=stepval) {
1863           for (jj=ii; jj<bs2; jj+=bs) {
1864             bap[jj] = *value++;
1865           }
1866         }
1867       } else {
1868         for (ii=0; ii<bs; ii++,value+=stepval) {
1869           for (jj=0; jj<bs; jj++) {
1870             *bap++ = *value++;
1871           }
1872         }
1873       }
1874 noinsert2:;
1875       low = i;
1876     }
1877     ailen[row] = nrow;
1878   }
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 #undef __FUNCT__
1883 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1884 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1885 {
1886   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1887   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1888   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1889   PetscErrorCode ierr;
1890   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1891   MatScalar      *aa  = a->a,*ap;
1892   PetscReal      ratio=0.6;
1893 
1894   PetscFunctionBegin;
1895   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1896 
1897   if (m) rmax = ailen[0];
1898   for (i=1; i<mbs; i++) {
1899     /* move each row back by the amount of empty slots (fshift) before it*/
1900     fshift += imax[i-1] - ailen[i-1];
1901     rmax    = PetscMax(rmax,ailen[i]);
1902     if (fshift) {
1903       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1904       N  = ailen[i];
1905       for (j=0; j<N; j++) {
1906         ip[j-fshift] = ip[j];
1907 
1908         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1909       }
1910     }
1911     ai[i] = ai[i-1] + ailen[i-1];
1912   }
1913   if (mbs) {
1914     fshift += imax[mbs-1] - ailen[mbs-1];
1915     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1916   }
1917 
1918   /* reset ilen and imax for each row */
1919   a->nonzerorowcnt = 0;
1920   for (i=0; i<mbs; i++) {
1921     ailen[i] = imax[i] = ai[i+1] - ai[i];
1922     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1923   }
1924   a->nz = ai[mbs];
1925 
1926   /* diagonals may have moved, so kill the diagonal pointers */
1927   a->idiagvalid = PETSC_FALSE;
1928   if (fshift && a->diag) {
1929     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1930     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1931     a->diag = 0;
1932   }
1933   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1934   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1935   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1936   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1937 
1938   A->info.mallocs    += a->reallocs;
1939   a->reallocs         = 0;
1940   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1941   a->rmax             = rmax;
1942 
1943   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 /*
1948    This function returns an array of flags which indicate the locations of contiguous
1949    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1950    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1951    Assume: sizes should be long enough to hold all the values.
1952 */
1953 #undef __FUNCT__
1954 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1955 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1956 {
1957   PetscInt  i,j,k,row;
1958   PetscBool flg;
1959 
1960   PetscFunctionBegin;
1961   for (i=0,j=0; i<n; j++) {
1962     row = idx[i];
1963     if (row%bs!=0) { /* Not the begining of a block */
1964       sizes[j] = 1;
1965       i++;
1966     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1967       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1968       i++;
1969     } else { /* Begining of the block, so check if the complete block exists */
1970       flg = PETSC_TRUE;
1971       for (k=1; k<bs; k++) {
1972         if (row+k != idx[i+k]) { /* break in the block */
1973           flg = PETSC_FALSE;
1974           break;
1975         }
1976       }
1977       if (flg) { /* No break in the bs */
1978         sizes[j] = bs;
1979         i       += bs;
1980       } else {
1981         sizes[j] = 1;
1982         i++;
1983       }
1984     }
1985   }
1986   *bs_max = j;
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 #undef __FUNCT__
1991 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1992 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1993 {
1994   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1995   PetscErrorCode    ierr;
1996   PetscInt          i,j,k,count,*rows;
1997   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1998   PetscScalar       zero = 0.0;
1999   MatScalar         *aa;
2000   const PetscScalar *xx;
2001   PetscScalar       *bb;
2002 
2003   PetscFunctionBegin;
2004   /* fix right hand side if needed */
2005   if (x && b) {
2006     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2007     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2008     for (i=0; i<is_n; i++) {
2009       bb[is_idx[i]] = diag*xx[is_idx[i]];
2010     }
2011     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2012     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2013   }
2014 
2015   /* Make a copy of the IS and  sort it */
2016   /* allocate memory for rows,sizes */
2017   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2018 
2019   /* copy IS values to rows, and sort them */
2020   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2021   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2022 
2023   if (baij->keepnonzeropattern) {
2024     for (i=0; i<is_n; i++) sizes[i] = 1;
2025     bs_max          = is_n;
2026   } else {
2027     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2028     A->nonzerostate++;
2029   }
2030 
2031   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2032     row = rows[j];
2033     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2034     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2035     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2036     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2037       if (diag != (PetscScalar)0.0) {
2038         if (baij->ilen[row/bs] > 0) {
2039           baij->ilen[row/bs]       = 1;
2040           baij->j[baij->i[row/bs]] = row/bs;
2041 
2042           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2043         }
2044         /* Now insert all the diagonal values for this bs */
2045         for (k=0; k<bs; k++) {
2046           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2047         }
2048       } else { /* (diag == 0.0) */
2049         baij->ilen[row/bs] = 0;
2050       } /* end (diag == 0.0) */
2051     } else { /* (sizes[i] != bs) */
2052 #if defined(PETSC_USE_DEBUG)
2053       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2054 #endif
2055       for (k=0; k<count; k++) {
2056         aa[0] =  zero;
2057         aa   += bs;
2058       }
2059       if (diag != (PetscScalar)0.0) {
2060         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2061       }
2062     }
2063   }
2064 
2065   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2066   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2072 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2073 {
2074   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2075   PetscErrorCode    ierr;
2076   PetscInt          i,j,k,count;
2077   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2078   PetscScalar       zero = 0.0;
2079   MatScalar         *aa;
2080   const PetscScalar *xx;
2081   PetscScalar       *bb;
2082   PetscBool         *zeroed,vecs = PETSC_FALSE;
2083 
2084   PetscFunctionBegin;
2085   /* fix right hand side if needed */
2086   if (x && b) {
2087     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2088     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2089     vecs = PETSC_TRUE;
2090   }
2091 
2092   /* zero the columns */
2093   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2094   for (i=0; i<is_n; i++) {
2095     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2096     zeroed[is_idx[i]] = PETSC_TRUE;
2097   }
2098   for (i=0; i<A->rmap->N; i++) {
2099     if (!zeroed[i]) {
2100       row = i/bs;
2101       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2102         for (k=0; k<bs; k++) {
2103           col = bs*baij->j[j] + k;
2104           if (zeroed[col]) {
2105             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2106             if (vecs) bb[i] -= aa[0]*xx[col];
2107             aa[0] = 0.0;
2108           }
2109         }
2110       }
2111     } else if (vecs) bb[i] = diag*xx[i];
2112   }
2113   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2114   if (vecs) {
2115     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2116     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2117   }
2118 
2119   /* zero the rows */
2120   for (i=0; i<is_n; i++) {
2121     row   = is_idx[i];
2122     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2123     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2124     for (k=0; k<count; k++) {
2125       aa[0] =  zero;
2126       aa   += bs;
2127     }
2128     if (diag != (PetscScalar)0.0) {
2129       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2130     }
2131   }
2132   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 #undef __FUNCT__
2137 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2138 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2139 {
2140   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2141   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2142   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2143   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2144   PetscErrorCode ierr;
2145   PetscInt       ridx,cidx,bs2=a->bs2;
2146   PetscBool      roworiented=a->roworiented;
2147   MatScalar      *ap,value,*aa=a->a,*bap;
2148 
2149   PetscFunctionBegin;
2150   for (k=0; k<m; k++) { /* loop over added rows */
2151     row  = im[k];
2152     brow = row/bs;
2153     if (row < 0) continue;
2154 #if defined(PETSC_USE_DEBUG)
2155     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2156 #endif
2157     rp   = aj + ai[brow];
2158     ap   = aa + bs2*ai[brow];
2159     rmax = imax[brow];
2160     nrow = ailen[brow];
2161     low  = 0;
2162     high = nrow;
2163     for (l=0; l<n; l++) { /* loop over added columns */
2164       if (in[l] < 0) continue;
2165 #if defined(PETSC_USE_DEBUG)
2166       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2167 #endif
2168       col  = in[l]; bcol = col/bs;
2169       ridx = row % bs; cidx = col % bs;
2170       if (roworiented) {
2171         value = v[l + k*n];
2172       } else {
2173         value = v[k + l*m];
2174       }
2175       if (col <= lastcol) low = 0; else high = nrow;
2176       lastcol = col;
2177       while (high-low > 7) {
2178         t = (low+high)/2;
2179         if (rp[t] > bcol) high = t;
2180         else              low  = t;
2181       }
2182       for (i=low; i<high; i++) {
2183         if (rp[i] > bcol) break;
2184         if (rp[i] == bcol) {
2185           bap = ap +  bs2*i + bs*cidx + ridx;
2186           if (is == ADD_VALUES) *bap += value;
2187           else                  *bap  = value;
2188           goto noinsert1;
2189         }
2190       }
2191       if (nonew == 1) goto noinsert1;
2192       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2193       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2194       N = nrow++ - 1; high++;
2195       /* shift up all the later entries in this row */
2196       for (ii=N; ii>=i; ii--) {
2197         rp[ii+1] = rp[ii];
2198         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2199       }
2200       if (N>=i) {
2201         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2202       }
2203       rp[i]                      = bcol;
2204       ap[bs2*i + bs*cidx + ridx] = value;
2205       a->nz++;
2206       A->nonzerostate++;
2207 noinsert1:;
2208       low = i;
2209     }
2210     ailen[brow] = nrow;
2211   }
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 #undef __FUNCT__
2216 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2217 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2218 {
2219   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2220   Mat            outA;
2221   PetscErrorCode ierr;
2222   PetscBool      row_identity,col_identity;
2223 
2224   PetscFunctionBegin;
2225   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2226   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2227   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2228   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2229 
2230   outA            = inA;
2231   inA->factortype = MAT_FACTOR_LU;
2232 
2233   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2234 
2235   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2236   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2237   a->row = row;
2238   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2239   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2240   a->col = col;
2241 
2242   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2243   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2244   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2245   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2246 
2247   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2248   if (!a->solve_work) {
2249     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2250     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2251   }
2252   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2258 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2259 {
2260   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2261   PetscInt    i,nz,mbs;
2262 
2263   PetscFunctionBegin;
2264   nz  = baij->maxnz;
2265   mbs = baij->mbs;
2266   for (i=0; i<nz; i++) {
2267     baij->j[i] = indices[i];
2268   }
2269   baij->nz = nz;
2270   for (i=0; i<mbs; i++) {
2271     baij->ilen[i] = baij->imax[i];
2272   }
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 #undef __FUNCT__
2277 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2278 /*@
2279     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2280        in the matrix.
2281 
2282   Input Parameters:
2283 +  mat - the SeqBAIJ matrix
2284 -  indices - the column indices
2285 
2286   Level: advanced
2287 
2288   Notes:
2289     This can be called if you have precomputed the nonzero structure of the
2290   matrix and want to provide it to the matrix object to improve the performance
2291   of the MatSetValues() operation.
2292 
2293     You MUST have set the correct numbers of nonzeros per row in the call to
2294   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2295 
2296     MUST be called before any calls to MatSetValues();
2297 
2298 @*/
2299 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2300 {
2301   PetscErrorCode ierr;
2302 
2303   PetscFunctionBegin;
2304   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2305   PetscValidPointer(indices,2);
2306   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 #undef __FUNCT__
2311 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2312 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2313 {
2314   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2315   PetscErrorCode ierr;
2316   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2317   PetscReal      atmp;
2318   PetscScalar    *x,zero = 0.0;
2319   MatScalar      *aa;
2320   PetscInt       ncols,brow,krow,kcol;
2321 
2322   PetscFunctionBegin;
2323   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2324   bs  = A->rmap->bs;
2325   aa  = a->a;
2326   ai  = a->i;
2327   aj  = a->j;
2328   mbs = a->mbs;
2329 
2330   ierr = VecSet(v,zero);CHKERRQ(ierr);
2331   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2332   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2333   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2334   for (i=0; i<mbs; i++) {
2335     ncols = ai[1] - ai[0]; ai++;
2336     brow  = bs*i;
2337     for (j=0; j<ncols; j++) {
2338       for (kcol=0; kcol<bs; kcol++) {
2339         for (krow=0; krow<bs; krow++) {
2340           atmp = PetscAbsScalar(*aa);aa++;
2341           row  = brow + krow;   /* row index */
2342           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2343         }
2344       }
2345       aj++;
2346     }
2347   }
2348   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 #undef __FUNCT__
2353 #define __FUNCT__ "MatCopy_SeqBAIJ"
2354 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2355 {
2356   PetscErrorCode ierr;
2357 
2358   PetscFunctionBegin;
2359   /* If the two matrices have the same copy implementation, use fast copy. */
2360   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2361     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2362     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2363     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2364 
2365     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2366     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2367     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2368   } else {
2369     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2370   }
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 #undef __FUNCT__
2375 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2376 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2377 {
2378   PetscErrorCode ierr;
2379 
2380   PetscFunctionBegin;
2381   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 #undef __FUNCT__
2386 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2387 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2388 {
2389   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2390 
2391   PetscFunctionBegin;
2392   *array = a->a;
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2398 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2399 {
2400   PetscFunctionBegin;
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 #undef __FUNCT__
2405 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2406 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2407 {
2408   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2409   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2410   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2411   PetscErrorCode ierr;
2412 
2413   PetscFunctionBegin;
2414   /* Set the number of nonzeros in the new matrix */
2415   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 #undef __FUNCT__
2420 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2421 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2422 {
2423   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2424   PetscErrorCode ierr;
2425   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2426   PetscBLASInt   one=1;
2427 
2428   PetscFunctionBegin;
2429   if (str == SAME_NONZERO_PATTERN) {
2430     PetscScalar  alpha = a;
2431     PetscBLASInt bnz;
2432     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2433     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2434     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2435   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2436     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2437   } else {
2438     Mat      B;
2439     PetscInt *nnz;
2440     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2441     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2442     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2443     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2444     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2445     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2446     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2447     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2448     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2449     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2450     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2451     ierr = PetscFree(nnz);CHKERRQ(ierr);
2452   }
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2458 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2459 {
2460   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2461   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2462   MatScalar   *aa = a->a;
2463 
2464   PetscFunctionBegin;
2465   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 #undef __FUNCT__
2470 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2471 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2472 {
2473   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2474   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2475   MatScalar   *aa = a->a;
2476 
2477   PetscFunctionBegin;
2478   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2479   PetscFunctionReturn(0);
2480 }
2481 
2482 #undef __FUNCT__
2483 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2484 /*
2485     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2486 */
2487 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2488 {
2489   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2490   PetscErrorCode ierr;
2491   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2492   PetscInt       nz = a->i[m],row,*jj,mr,col;
2493 
2494   PetscFunctionBegin;
2495   *nn = n;
2496   if (!ia) PetscFunctionReturn(0);
2497   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2498   else {
2499     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2500     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2501     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2502     jj   = a->j;
2503     for (i=0; i<nz; i++) {
2504       collengths[jj[i]]++;
2505     }
2506     cia[0] = oshift;
2507     for (i=0; i<n; i++) {
2508       cia[i+1] = cia[i] + collengths[i];
2509     }
2510     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2511     jj   = a->j;
2512     for (row=0; row<m; row++) {
2513       mr = a->i[row+1] - a->i[row];
2514       for (i=0; i<mr; i++) {
2515         col = *jj++;
2516 
2517         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2518       }
2519     }
2520     ierr = PetscFree(collengths);CHKERRQ(ierr);
2521     *ia  = cia; *ja = cja;
2522   }
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2528 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2529 {
2530   PetscErrorCode ierr;
2531 
2532   PetscFunctionBegin;
2533   if (!ia) PetscFunctionReturn(0);
2534   ierr = PetscFree(*ia);CHKERRQ(ierr);
2535   ierr = PetscFree(*ja);CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 /*
2540  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2541  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2542  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2543  */
2544 #undef __FUNCT__
2545 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2546 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2547 {
2548   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2549   PetscErrorCode ierr;
2550   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2551   PetscInt       nz = a->i[m],row,*jj,mr,col;
2552   PetscInt       *cspidx;
2553 
2554   PetscFunctionBegin;
2555   *nn = n;
2556   if (!ia) PetscFunctionReturn(0);
2557 
2558   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2559   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2560   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2561   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2562   jj   = a->j;
2563   for (i=0; i<nz; i++) {
2564     collengths[jj[i]]++;
2565   }
2566   cia[0] = oshift;
2567   for (i=0; i<n; i++) {
2568     cia[i+1] = cia[i] + collengths[i];
2569   }
2570   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2571   jj   = a->j;
2572   for (row=0; row<m; row++) {
2573     mr = a->i[row+1] - a->i[row];
2574     for (i=0; i<mr; i++) {
2575       col = *jj++;
2576       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2577       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2578     }
2579   }
2580   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2581   *ia    = cia; *ja = cja;
2582   *spidx = cspidx;
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 #undef __FUNCT__
2587 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2588 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2589 {
2590   PetscErrorCode ierr;
2591 
2592   PetscFunctionBegin;
2593   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2594   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 #undef __FUNCT__
2599 #define __FUNCT__ "MatShift_SeqBAIJ"
2600 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2601 {
2602   PetscErrorCode ierr;
2603   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2604 
2605   PetscFunctionBegin;
2606   if (!Y->preallocated || !aij->nz) {
2607     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2608   }
2609   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 /* -------------------------------------------------------------------*/
2614 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2615                                        MatGetRow_SeqBAIJ,
2616                                        MatRestoreRow_SeqBAIJ,
2617                                        MatMult_SeqBAIJ_N,
2618                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2619                                        MatMultTranspose_SeqBAIJ,
2620                                        MatMultTransposeAdd_SeqBAIJ,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                /* 10*/ 0,
2625                                        MatLUFactor_SeqBAIJ,
2626                                        0,
2627                                        0,
2628                                        MatTranspose_SeqBAIJ,
2629                                /* 15*/ MatGetInfo_SeqBAIJ,
2630                                        MatEqual_SeqBAIJ,
2631                                        MatGetDiagonal_SeqBAIJ,
2632                                        MatDiagonalScale_SeqBAIJ,
2633                                        MatNorm_SeqBAIJ,
2634                                /* 20*/ 0,
2635                                        MatAssemblyEnd_SeqBAIJ,
2636                                        MatSetOption_SeqBAIJ,
2637                                        MatZeroEntries_SeqBAIJ,
2638                                /* 24*/ MatZeroRows_SeqBAIJ,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                /* 29*/ MatSetUp_SeqBAIJ,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                /* 34*/ MatDuplicate_SeqBAIJ,
2649                                        0,
2650                                        0,
2651                                        MatILUFactor_SeqBAIJ,
2652                                        0,
2653                                /* 39*/ MatAXPY_SeqBAIJ,
2654                                        MatGetSubMatrices_SeqBAIJ,
2655                                        MatIncreaseOverlap_SeqBAIJ,
2656                                        MatGetValues_SeqBAIJ,
2657                                        MatCopy_SeqBAIJ,
2658                                /* 44*/ 0,
2659                                        MatScale_SeqBAIJ,
2660                                        MatShift_SeqBAIJ,
2661                                        0,
2662                                        MatZeroRowsColumns_SeqBAIJ,
2663                                /* 49*/ 0,
2664                                        MatGetRowIJ_SeqBAIJ,
2665                                        MatRestoreRowIJ_SeqBAIJ,
2666                                        MatGetColumnIJ_SeqBAIJ,
2667                                        MatRestoreColumnIJ_SeqBAIJ,
2668                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                        MatSetValuesBlocked_SeqBAIJ,
2673                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2674                                        MatDestroy_SeqBAIJ,
2675                                        MatView_SeqBAIJ,
2676                                        0,
2677                                        0,
2678                                /* 64*/ 0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2684                                        0,
2685                                        MatConvert_Basic,
2686                                        0,
2687                                        0,
2688                                /* 74*/ 0,
2689                                        MatFDColoringApply_BAIJ,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                /* 79*/ 0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                        MatLoad_SeqBAIJ,
2698                                /* 84*/ 0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                /* 89*/ 0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                /* 94*/ 0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                        0,
2713                                /* 99*/ 0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                /*104*/ 0,
2719                                        MatRealPart_SeqBAIJ,
2720                                        MatImaginaryPart_SeqBAIJ,
2721                                        0,
2722                                        0,
2723                                /*109*/ 0,
2724                                        0,
2725                                        0,
2726                                        0,
2727                                        MatMissingDiagonal_SeqBAIJ,
2728                                /*114*/ 0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                        0,
2733                                /*119*/ 0,
2734                                        0,
2735                                        MatMultHermitianTranspose_SeqBAIJ,
2736                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2737                                        0,
2738                                /*124*/ 0,
2739                                        0,
2740                                        MatInvertBlockDiagonal_SeqBAIJ,
2741                                        0,
2742                                        0,
2743                                /*129*/ 0,
2744                                        0,
2745                                        0,
2746                                        0,
2747                                        0,
2748                                /*134*/ 0,
2749                                        0,
2750                                        0,
2751                                        0,
2752                                        0,
2753                                /*139*/ 0,
2754                                        0,
2755                                        0,
2756                                        MatFDColoringSetUp_SeqXAIJ,
2757                                        0,
2758                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2759 };
2760 
2761 #undef __FUNCT__
2762 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2763 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2764 {
2765   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2766   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2767   PetscErrorCode ierr;
2768 
2769   PetscFunctionBegin;
2770   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2771 
2772   /* allocate space for values if not already there */
2773   if (!aij->saved_values) {
2774     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2775     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2776   }
2777 
2778   /* copy values over */
2779   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2780   PetscFunctionReturn(0);
2781 }
2782 
2783 #undef __FUNCT__
2784 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2785 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2786 {
2787   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2788   PetscErrorCode ierr;
2789   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2790 
2791   PetscFunctionBegin;
2792   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2793   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2794 
2795   /* copy values over */
2796   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2797   PetscFunctionReturn(0);
2798 }
2799 
2800 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2801 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2802 
2803 #undef __FUNCT__
2804 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2805 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2806 {
2807   Mat_SeqBAIJ    *b;
2808   PetscErrorCode ierr;
2809   PetscInt       i,mbs,nbs,bs2;
2810   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2811 
2812   PetscFunctionBegin;
2813   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2814   if (nz == MAT_SKIP_ALLOCATION) {
2815     skipallocation = PETSC_TRUE;
2816     nz             = 0;
2817   }
2818 
2819   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2820   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2821   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2822   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2823 
2824   B->preallocated = PETSC_TRUE;
2825 
2826   mbs = B->rmap->n/bs;
2827   nbs = B->cmap->n/bs;
2828   bs2 = bs*bs;
2829 
2830   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2831 
2832   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2833   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2834   if (nnz) {
2835     for (i=0; i<mbs; i++) {
2836       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2837       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2838     }
2839   }
2840 
2841   b    = (Mat_SeqBAIJ*)B->data;
2842   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2843   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2844   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2845 
2846   if (!flg) {
2847     switch (bs) {
2848     case 1:
2849       B->ops->mult    = MatMult_SeqBAIJ_1;
2850       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2851       break;
2852     case 2:
2853       B->ops->mult    = MatMult_SeqBAIJ_2;
2854       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2855       break;
2856     case 3:
2857       B->ops->mult    = MatMult_SeqBAIJ_3;
2858       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2859       break;
2860     case 4:
2861       B->ops->mult    = MatMult_SeqBAIJ_4;
2862       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2863       break;
2864     case 5:
2865       B->ops->mult    = MatMult_SeqBAIJ_5;
2866       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2867       break;
2868     case 6:
2869       B->ops->mult    = MatMult_SeqBAIJ_6;
2870       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2871       break;
2872     case 7:
2873       B->ops->mult    = MatMult_SeqBAIJ_7;
2874       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2875       break;
2876     case 15:
2877       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2878       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2879       break;
2880     default:
2881       B->ops->mult    = MatMult_SeqBAIJ_N;
2882       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2883       break;
2884     }
2885   }
2886   B->ops->sor = MatSOR_SeqBAIJ;
2887   b->mbs = mbs;
2888   b->nbs = nbs;
2889   if (!skipallocation) {
2890     if (!b->imax) {
2891       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2892       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2893 
2894       b->free_imax_ilen = PETSC_TRUE;
2895     }
2896     /* b->ilen will count nonzeros in each block row so far. */
2897     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2898     if (!nnz) {
2899       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2900       else if (nz < 0) nz = 1;
2901       for (i=0; i<mbs; i++) b->imax[i] = nz;
2902       nz = nz*mbs;
2903     } else {
2904       nz = 0;
2905       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2906     }
2907 
2908     /* allocate the matrix space */
2909     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2910     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2911     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2912     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2913     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2914 
2915     b->singlemalloc = PETSC_TRUE;
2916     b->i[0]         = 0;
2917     for (i=1; i<mbs+1; i++) {
2918       b->i[i] = b->i[i-1] + b->imax[i-1];
2919     }
2920     b->free_a  = PETSC_TRUE;
2921     b->free_ij = PETSC_TRUE;
2922   } else {
2923     b->free_a  = PETSC_FALSE;
2924     b->free_ij = PETSC_FALSE;
2925   }
2926 
2927   b->bs2              = bs2;
2928   b->mbs              = mbs;
2929   b->nz               = 0;
2930   b->maxnz            = nz;
2931   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2932   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2933   PetscFunctionReturn(0);
2934 }
2935 
2936 #undef __FUNCT__
2937 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2938 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2939 {
2940   PetscInt       i,m,nz,nz_max=0,*nnz;
2941   PetscScalar    *values=0;
2942   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2943   PetscErrorCode ierr;
2944 
2945   PetscFunctionBegin;
2946   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2947   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2948   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2949   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2950   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2951   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2952   m    = B->rmap->n/bs;
2953 
2954   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2955   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2956   for (i=0; i<m; i++) {
2957     nz = ii[i+1]- ii[i];
2958     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2959     nz_max = PetscMax(nz_max, nz);
2960     nnz[i] = nz;
2961   }
2962   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2963   ierr = PetscFree(nnz);CHKERRQ(ierr);
2964 
2965   values = (PetscScalar*)V;
2966   if (!values) {
2967     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2968   }
2969   for (i=0; i<m; i++) {
2970     PetscInt          ncols  = ii[i+1] - ii[i];
2971     const PetscInt    *icols = jj + ii[i];
2972     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2973     if (!roworiented) {
2974       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2975     } else {
2976       PetscInt j;
2977       for (j=0; j<ncols; j++) {
2978         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2979         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2980       }
2981     }
2982   }
2983   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2984   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2985   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2986   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 /*MC
2991    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2992    block sparse compressed row format.
2993 
2994    Options Database Keys:
2995 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2996 
2997   Level: beginner
2998 
2999 .seealso: MatCreateSeqBAIJ()
3000 M*/
3001 
3002 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3003 
3004 #undef __FUNCT__
3005 #define __FUNCT__ "MatCreate_SeqBAIJ"
3006 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3007 {
3008   PetscErrorCode ierr;
3009   PetscMPIInt    size;
3010   Mat_SeqBAIJ    *b;
3011 
3012   PetscFunctionBegin;
3013   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3014   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3015 
3016   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3017   B->data = (void*)b;
3018   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3019 
3020   b->row          = 0;
3021   b->col          = 0;
3022   b->icol         = 0;
3023   b->reallocs     = 0;
3024   b->saved_values = 0;
3025 
3026   b->roworiented        = PETSC_TRUE;
3027   b->nonew              = 0;
3028   b->diag               = 0;
3029   B->spptr              = 0;
3030   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3031   b->keepnonzeropattern = PETSC_FALSE;
3032 
3033   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3034   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3035   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3044   PetscFunctionReturn(0);
3045 }
3046 
3047 #undef __FUNCT__
3048 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3049 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3050 {
3051   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3052   PetscErrorCode ierr;
3053   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3054 
3055   PetscFunctionBegin;
3056   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3057 
3058   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3059     c->imax           = a->imax;
3060     c->ilen           = a->ilen;
3061     c->free_imax_ilen = PETSC_FALSE;
3062   } else {
3063     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3064     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3065     for (i=0; i<mbs; i++) {
3066       c->imax[i] = a->imax[i];
3067       c->ilen[i] = a->ilen[i];
3068     }
3069     c->free_imax_ilen = PETSC_TRUE;
3070   }
3071 
3072   /* allocate the matrix space */
3073   if (mallocmatspace) {
3074     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3075       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3076       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3077 
3078       c->i            = a->i;
3079       c->j            = a->j;
3080       c->singlemalloc = PETSC_FALSE;
3081       c->free_a       = PETSC_TRUE;
3082       c->free_ij      = PETSC_FALSE;
3083       c->parent       = A;
3084       C->preallocated = PETSC_TRUE;
3085       C->assembled    = PETSC_TRUE;
3086 
3087       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3088       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3089       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3090     } else {
3091       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3092       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3093 
3094       c->singlemalloc = PETSC_TRUE;
3095       c->free_a       = PETSC_TRUE;
3096       c->free_ij      = PETSC_TRUE;
3097 
3098       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3099       if (mbs > 0) {
3100         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3101         if (cpvalues == MAT_COPY_VALUES) {
3102           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3103         } else {
3104           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3105         }
3106       }
3107       C->preallocated = PETSC_TRUE;
3108       C->assembled    = PETSC_TRUE;
3109     }
3110   }
3111 
3112   c->roworiented = a->roworiented;
3113   c->nonew       = a->nonew;
3114 
3115   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3116   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3117 
3118   c->bs2         = a->bs2;
3119   c->mbs         = a->mbs;
3120   c->nbs         = a->nbs;
3121 
3122   if (a->diag) {
3123     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3124       c->diag      = a->diag;
3125       c->free_diag = PETSC_FALSE;
3126     } else {
3127       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3128       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3129       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3130       c->free_diag = PETSC_TRUE;
3131     }
3132   } else c->diag = 0;
3133 
3134   c->nz         = a->nz;
3135   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3136   c->solve_work = NULL;
3137   c->mult_work  = NULL;
3138   c->sor_workt  = NULL;
3139   c->sor_work   = NULL;
3140 
3141   c->compressedrow.use   = a->compressedrow.use;
3142   c->compressedrow.nrows = a->compressedrow.nrows;
3143   if (a->compressedrow.use) {
3144     i    = a->compressedrow.nrows;
3145     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3146     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3147     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3148     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3149   } else {
3150     c->compressedrow.use    = PETSC_FALSE;
3151     c->compressedrow.i      = NULL;
3152     c->compressedrow.rindex = NULL;
3153   }
3154   C->nonzerostate = A->nonzerostate;
3155 
3156   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3157   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3158   PetscFunctionReturn(0);
3159 }
3160 
3161 #undef __FUNCT__
3162 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3163 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3164 {
3165   PetscErrorCode ierr;
3166 
3167   PetscFunctionBegin;
3168   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3169   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3170   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3171   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3172   PetscFunctionReturn(0);
3173 }
3174 
3175 #undef __FUNCT__
3176 #define __FUNCT__ "MatLoad_SeqBAIJ"
3177 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3178 {
3179   Mat_SeqBAIJ    *a;
3180   PetscErrorCode ierr;
3181   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3182   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3183   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3184   PetscInt       *masked,nmask,tmp,bs2,ishift;
3185   PetscMPIInt    size;
3186   int            fd;
3187   PetscScalar    *aa;
3188   MPI_Comm       comm;
3189 
3190   PetscFunctionBegin;
3191   /* force binary viewer to load .info file if it has not yet done so */
3192   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3193   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3194   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3195   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3196   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3197   if (bs < 0) bs = 1;
3198   bs2  = bs*bs;
3199 
3200   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3201   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3202   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3203   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3204   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3205   M = header[1]; N = header[2]; nz = header[3];
3206 
3207   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3208   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3209 
3210   /*
3211      This code adds extra rows to make sure the number of rows is
3212     divisible by the blocksize
3213   */
3214   mbs        = M/bs;
3215   extra_rows = bs - M + bs*(mbs);
3216   if (extra_rows == bs) extra_rows = 0;
3217   else mbs++;
3218   if (extra_rows) {
3219     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3220   }
3221 
3222   /* Set global sizes if not already set */
3223   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3224     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3225   } else { /* Check if the matrix global sizes are correct */
3226     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3227     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3228       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3229     }
3230     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3231   }
3232 
3233   /* read in row lengths */
3234   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3235   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3236   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3237 
3238   /* read in column indices */
3239   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3240   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3241   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3242 
3243   /* loop over row lengths determining block row lengths */
3244   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3245   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3246   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3247   rowcount = 0;
3248   nzcount  = 0;
3249   for (i=0; i<mbs; i++) {
3250     nmask = 0;
3251     for (j=0; j<bs; j++) {
3252       kmax = rowlengths[rowcount];
3253       for (k=0; k<kmax; k++) {
3254         tmp = jj[nzcount++]/bs;
3255         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3256       }
3257       rowcount++;
3258     }
3259     browlengths[i] += nmask;
3260     /* zero out the mask elements we set */
3261     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3262   }
3263 
3264   /* Do preallocation  */
3265   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3266   a    = (Mat_SeqBAIJ*)newmat->data;
3267 
3268   /* set matrix "i" values */
3269   a->i[0] = 0;
3270   for (i=1; i<= mbs; i++) {
3271     a->i[i]      = a->i[i-1] + browlengths[i-1];
3272     a->ilen[i-1] = browlengths[i-1];
3273   }
3274   a->nz = 0;
3275   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3276 
3277   /* read in nonzero values */
3278   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3279   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3280   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3281 
3282   /* set "a" and "j" values into matrix */
3283   nzcount = 0; jcount = 0;
3284   for (i=0; i<mbs; i++) {
3285     nzcountb = nzcount;
3286     nmask    = 0;
3287     for (j=0; j<bs; j++) {
3288       kmax = rowlengths[i*bs+j];
3289       for (k=0; k<kmax; k++) {
3290         tmp = jj[nzcount++]/bs;
3291         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3292       }
3293     }
3294     /* sort the masked values */
3295     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3296 
3297     /* set "j" values into matrix */
3298     maskcount = 1;
3299     for (j=0; j<nmask; j++) {
3300       a->j[jcount++]  = masked[j];
3301       mask[masked[j]] = maskcount++;
3302     }
3303     /* set "a" values into matrix */
3304     ishift = bs2*a->i[i];
3305     for (j=0; j<bs; j++) {
3306       kmax = rowlengths[i*bs+j];
3307       for (k=0; k<kmax; k++) {
3308         tmp       = jj[nzcountb]/bs;
3309         block     = mask[tmp] - 1;
3310         point     = jj[nzcountb] - bs*tmp;
3311         idx       = ishift + bs2*block + j + bs*point;
3312         a->a[idx] = (MatScalar)aa[nzcountb++];
3313       }
3314     }
3315     /* zero out the mask elements we set */
3316     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3317   }
3318   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3319 
3320   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3321   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3322   ierr = PetscFree(aa);CHKERRQ(ierr);
3323   ierr = PetscFree(jj);CHKERRQ(ierr);
3324   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3325 
3326   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3327   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3328   PetscFunctionReturn(0);
3329 }
3330 
3331 #undef __FUNCT__
3332 #define __FUNCT__ "MatCreateSeqBAIJ"
3333 /*@C
3334    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3335    compressed row) format.  For good matrix assembly performance the
3336    user should preallocate the matrix storage by setting the parameter nz
3337    (or the array nnz).  By setting these parameters accurately, performance
3338    during matrix assembly can be increased by more than a factor of 50.
3339 
3340    Collective on MPI_Comm
3341 
3342    Input Parameters:
3343 +  comm - MPI communicator, set to PETSC_COMM_SELF
3344 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3345           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3346 .  m - number of rows
3347 .  n - number of columns
3348 .  nz - number of nonzero blocks  per block row (same for all rows)
3349 -  nnz - array containing the number of nonzero blocks in the various block rows
3350          (possibly different for each block row) or NULL
3351 
3352    Output Parameter:
3353 .  A - the matrix
3354 
3355    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3356    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3357    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3358 
3359    Options Database Keys:
3360 .   -mat_no_unroll - uses code that does not unroll the loops in the
3361                      block calculations (much slower)
3362 .    -mat_block_size - size of the blocks to use
3363 
3364    Level: intermediate
3365 
3366    Notes:
3367    The number of rows and columns must be divisible by blocksize.
3368 
3369    If the nnz parameter is given then the nz parameter is ignored
3370 
3371    A nonzero block is any block that as 1 or more nonzeros in it
3372 
3373    The block AIJ format is fully compatible with standard Fortran 77
3374    storage.  That is, the stored row and column indices can begin at
3375    either one (as in Fortran) or zero.  See the users' manual for details.
3376 
3377    Specify the preallocated storage with either nz or nnz (not both).
3378    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3379    allocation.  See Users-Manual: ch_mat for details.
3380    matrices.
3381 
3382 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3383 @*/
3384 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3385 {
3386   PetscErrorCode ierr;
3387 
3388   PetscFunctionBegin;
3389   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3390   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3391   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3392   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3393   PetscFunctionReturn(0);
3394 }
3395 
3396 #undef __FUNCT__
3397 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3398 /*@C
3399    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3400    per row in the matrix. For good matrix assembly performance the
3401    user should preallocate the matrix storage by setting the parameter nz
3402    (or the array nnz).  By setting these parameters accurately, performance
3403    during matrix assembly can be increased by more than a factor of 50.
3404 
3405    Collective on MPI_Comm
3406 
3407    Input Parameters:
3408 +  B - the matrix
3409 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3410           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3411 .  nz - number of block nonzeros per block row (same for all rows)
3412 -  nnz - array containing the number of block nonzeros in the various block rows
3413          (possibly different for each block row) or NULL
3414 
3415    Options Database Keys:
3416 .   -mat_no_unroll - uses code that does not unroll the loops in the
3417                      block calculations (much slower)
3418 .   -mat_block_size - size of the blocks to use
3419 
3420    Level: intermediate
3421 
3422    Notes:
3423    If the nnz parameter is given then the nz parameter is ignored
3424 
3425    You can call MatGetInfo() to get information on how effective the preallocation was;
3426    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3427    You can also run with the option -info and look for messages with the string
3428    malloc in them to see if additional memory allocation was needed.
3429 
3430    The block AIJ format is fully compatible with standard Fortran 77
3431    storage.  That is, the stored row and column indices can begin at
3432    either one (as in Fortran) or zero.  See the users' manual for details.
3433 
3434    Specify the preallocated storage with either nz or nnz (not both).
3435    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3436    allocation.  See Users-Manual: ch_mat for details.
3437 
3438 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3439 @*/
3440 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3441 {
3442   PetscErrorCode ierr;
3443 
3444   PetscFunctionBegin;
3445   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3446   PetscValidType(B,1);
3447   PetscValidLogicalCollectiveInt(B,bs,2);
3448   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3449   PetscFunctionReturn(0);
3450 }
3451 
3452 #undef __FUNCT__
3453 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3454 /*@C
3455    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3456    (the default sequential PETSc format).
3457 
3458    Collective on MPI_Comm
3459 
3460    Input Parameters:
3461 +  B - the matrix
3462 .  i - the indices into j for the start of each local row (starts with zero)
3463 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3464 -  v - optional values in the matrix
3465 
3466    Level: developer
3467 
3468    Notes:
3469    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3470    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3471    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3472    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3473    block column and the second index is over columns within a block.
3474 
3475 .keywords: matrix, aij, compressed row, sparse
3476 
3477 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3478 @*/
3479 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3480 {
3481   PetscErrorCode ierr;
3482 
3483   PetscFunctionBegin;
3484   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3485   PetscValidType(B,1);
3486   PetscValidLogicalCollectiveInt(B,bs,2);
3487   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3488   PetscFunctionReturn(0);
3489 }
3490 
3491 
3492 #undef __FUNCT__
3493 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3494 /*@
3495      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3496 
3497      Collective on MPI_Comm
3498 
3499    Input Parameters:
3500 +  comm - must be an MPI communicator of size 1
3501 .  bs - size of block
3502 .  m - number of rows
3503 .  n - number of columns
3504 .  i - row indices
3505 .  j - column indices
3506 -  a - matrix values
3507 
3508    Output Parameter:
3509 .  mat - the matrix
3510 
3511    Level: advanced
3512 
3513    Notes:
3514        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3515     once the matrix is destroyed
3516 
3517        You cannot set new nonzero locations into this matrix, that will generate an error.
3518 
3519        The i and j indices are 0 based
3520 
3521        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3522 
3523       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3524       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3525       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3526       with column-major ordering within blocks.
3527 
3528 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3529 
3530 @*/
3531 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3532 {
3533   PetscErrorCode ierr;
3534   PetscInt       ii;
3535   Mat_SeqBAIJ    *baij;
3536 
3537   PetscFunctionBegin;
3538   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3539   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3540 
3541   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3542   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3543   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3544   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3545   baij = (Mat_SeqBAIJ*)(*mat)->data;
3546   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3547   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3548 
3549   baij->i = i;
3550   baij->j = j;
3551   baij->a = a;
3552 
3553   baij->singlemalloc = PETSC_FALSE;
3554   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3555   baij->free_a       = PETSC_FALSE;
3556   baij->free_ij      = PETSC_FALSE;
3557 
3558   for (ii=0; ii<m; ii++) {
3559     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3560 #if defined(PETSC_USE_DEBUG)
3561     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3562 #endif
3563   }
3564 #if defined(PETSC_USE_DEBUG)
3565   for (ii=0; ii<baij->i[m]; ii++) {
3566     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3567     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3568   }
3569 #endif
3570 
3571   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3572   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3573   PetscFunctionReturn(0);
3574 }
3575 
3576 #undef __FUNCT__
3577 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3578 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3579 {
3580   PetscErrorCode ierr;
3581 
3582   PetscFunctionBegin;
3583   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3584   PetscFunctionReturn(0);
3585 }
3586