xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 76be6f4ff3bd4e251c19fc00ebbebfd58b6e7589)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 
20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions)
21 {
22   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) A->data;
23   PetscInt       m,n,i;
24   PetscInt       ib,jb,bs = A->rmap->bs;
25   MatScalar      *a_val = a_aij->a;
26 
27   PetscFunctionBegin;
28   PetscCall(MatGetSize(A,&m,&n));
29   for (i=0; i<n; i++) reductions[i] = 0.0;
30   if (type == NORM_2) {
31     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
32       for (jb=0; jb<bs; jb++) {
33         for (ib=0; ib<bs; ib++) {
34           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
35           a_val++;
36         }
37       }
38     }
39   } else if (type == NORM_1) {
40     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
41       for (jb=0; jb<bs; jb++) {
42         for (ib=0; ib<bs; ib++) {
43           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
44           a_val++;
45         }
46       }
47     }
48   } else if (type == NORM_INFINITY) {
49     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
50       for (jb=0; jb<bs; jb++) {
51         for (ib=0; ib<bs; ib++) {
52           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
53           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
54           a_val++;
55         }
56       }
57     }
58   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
59     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
60       for (jb=0; jb<bs; jb++) {
61         for (ib=0; ib<bs; ib++) {
62           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
63           a_val++;
64         }
65       }
66     }
67   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
68     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
69       for (jb=0; jb<bs; jb++) {
70         for (ib=0; ib<bs; ib++) {
71           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
72           a_val++;
73         }
74       }
75     }
76   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
77   if (type == NORM_2) {
78     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
79   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
80     for (i=0; i<n; i++) reductions[i] /= m;
81   }
82   PetscFunctionReturn(0);
83 }
84 
85 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
86 {
87   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
88   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
89   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
90   PetscReal      shift = 0.0;
91   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
92 
93   PetscFunctionBegin;
94   allowzeropivot = PetscNot(A->erroriffailure);
95 
96   if (a->idiagvalid) {
97     if (values) *values = a->idiag;
98     PetscFunctionReturn(0);
99   }
100   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
101   diag_offset = a->diag;
102   if (!a->idiag) {
103     PetscCall(PetscMalloc1(bs2*mbs,&a->idiag));
104     PetscCall(PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar)));
105   }
106   diag  = a->idiag;
107   if (values) *values = a->idiag;
108   /* factor and invert each block */
109   switch (bs) {
110   case 1:
111     for (i=0; i<mbs; i++) {
112       odiag    = v + 1*diag_offset[i];
113       diag[0]  = odiag[0];
114 
115       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
116         if (allowzeropivot) {
117           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
119           A->factorerror_zeropivot_row   = i;
120           PetscCall(PetscInfo(A,"Zero pivot, row %" PetscInt_FMT "\n",i));
121         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
122       }
123 
124       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
125       diag    += 1;
126     }
127     break;
128   case 2:
129     for (i=0; i<mbs; i++) {
130       odiag    = v + 4*diag_offset[i];
131       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
132       PetscCall(PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected));
133       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
134       diag    += 4;
135     }
136     break;
137   case 3:
138     for (i=0; i<mbs; i++) {
139       odiag    = v + 9*diag_offset[i];
140       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
141       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
142       diag[8]  = odiag[8];
143       PetscCall(PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected));
144       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
145       diag    += 9;
146     }
147     break;
148   case 4:
149     for (i=0; i<mbs; i++) {
150       odiag  = v + 16*diag_offset[i];
151       PetscCall(PetscArraycpy(diag,odiag,16));
152       PetscCall(PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected));
153       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
154       diag  += 16;
155     }
156     break;
157   case 5:
158     for (i=0; i<mbs; i++) {
159       odiag  = v + 25*diag_offset[i];
160       PetscCall(PetscArraycpy(diag,odiag,25));
161       PetscCall(PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected));
162       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163       diag  += 25;
164     }
165     break;
166   case 6:
167     for (i=0; i<mbs; i++) {
168       odiag  = v + 36*diag_offset[i];
169       PetscCall(PetscArraycpy(diag,odiag,36));
170       PetscCall(PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected));
171       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172       diag  += 36;
173     }
174     break;
175   case 7:
176     for (i=0; i<mbs; i++) {
177       odiag  = v + 49*diag_offset[i];
178       PetscCall(PetscArraycpy(diag,odiag,49));
179       PetscCall(PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected));
180       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181       diag  += 49;
182     }
183     break;
184   default:
185     PetscCall(PetscMalloc2(bs,&v_work,bs,&v_pivots));
186     for (i=0; i<mbs; i++) {
187       odiag  = v + bs2*diag_offset[i];
188       PetscCall(PetscArraycpy(diag,odiag,bs2));
189       PetscCall(PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected));
190       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
191       diag  += bs2;
192     }
193     PetscCall(PetscFree2(v_work,v_pivots));
194   }
195   a->idiagvalid = PETSC_TRUE;
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
200 {
201   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
202   PetscScalar       *x,*work,*w,*workt,*t;
203   const MatScalar   *v,*aa = a->a, *idiag;
204   const PetscScalar *b,*xb;
205   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
206   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
207   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
208 
209   PetscFunctionBegin;
210   its = its*lits;
211   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
212   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
213   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
214   PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
215   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
216 
217   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A,NULL));
218 
219   if (!m) PetscFunctionReturn(0);
220   diag  = a->diag;
221   idiag = a->idiag;
222   k    = PetscMax(A->rmap->n,A->cmap->n);
223   if (!a->mult_work) {
224     PetscCall(PetscMalloc1(k+1,&a->mult_work));
225   }
226   if (!a->sor_workt) {
227     PetscCall(PetscMalloc1(k,&a->sor_workt));
228   }
229   if (!a->sor_work) {
230     PetscCall(PetscMalloc1(bs,&a->sor_work));
231   }
232   work = a->mult_work;
233   t    = a->sor_workt;
234   w    = a->sor_work;
235 
236   PetscCall(VecGetArray(xx,&x));
237   PetscCall(VecGetArrayRead(bb,&b));
238 
239   if (flag & SOR_ZERO_INITIAL_GUESS) {
240     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
241       switch (bs) {
242       case 1:
243         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
244         t[0] = b[0];
245         i2     = 1;
246         idiag += 1;
247         for (i=1; i<m; i++) {
248           v  = aa + ai[i];
249           vi = aj + ai[i];
250           nz = diag[i] - ai[i];
251           s[0] = b[i2];
252           for (j=0; j<nz; j++) {
253             xw[0] = x[vi[j]];
254             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
255           }
256           t[i2] = s[0];
257           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
258           x[i2]  = xw[0];
259           idiag += 1;
260           i2    += 1;
261         }
262         break;
263       case 2:
264         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
265         t[0] = b[0]; t[1] = b[1];
266         i2     = 2;
267         idiag += 4;
268         for (i=1; i<m; i++) {
269           v  = aa + 4*ai[i];
270           vi = aj + ai[i];
271           nz = diag[i] - ai[i];
272           s[0] = b[i2]; s[1] = b[i2+1];
273           for (j=0; j<nz; j++) {
274             idx = 2*vi[j];
275             it  = 4*j;
276             xw[0] = x[idx]; xw[1] = x[1+idx];
277             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
278           }
279           t[i2] = s[0]; t[i2+1] = s[1];
280           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
281           x[i2]   = xw[0]; x[i2+1] = xw[1];
282           idiag  += 4;
283           i2     += 2;
284         }
285         break;
286       case 3:
287         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
288         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
289         i2     = 3;
290         idiag += 9;
291         for (i=1; i<m; i++) {
292           v  = aa + 9*ai[i];
293           vi = aj + ai[i];
294           nz = diag[i] - ai[i];
295           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
296           while (nz--) {
297             idx = 3*(*vi++);
298             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
299             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
300             v  += 9;
301           }
302           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
303           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
304           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
305           idiag  += 9;
306           i2     += 3;
307         }
308         break;
309       case 4:
310         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
311         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
312         i2     = 4;
313         idiag += 16;
314         for (i=1; i<m; i++) {
315           v  = aa + 16*ai[i];
316           vi = aj + ai[i];
317           nz = diag[i] - ai[i];
318           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
319           while (nz--) {
320             idx = 4*(*vi++);
321             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
322             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
323             v  += 16;
324           }
325           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
326           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
327           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
328           idiag  += 16;
329           i2     += 4;
330         }
331         break;
332       case 5:
333         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
334         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
335         i2     = 5;
336         idiag += 25;
337         for (i=1; i<m; i++) {
338           v  = aa + 25*ai[i];
339           vi = aj + ai[i];
340           nz = diag[i] - ai[i];
341           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
342           while (nz--) {
343             idx = 5*(*vi++);
344             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
345             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
346             v  += 25;
347           }
348           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
349           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
350           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
351           idiag  += 25;
352           i2     += 5;
353         }
354         break;
355       case 6:
356         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
357         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
358         i2     = 6;
359         idiag += 36;
360         for (i=1; i<m; i++) {
361           v  = aa + 36*ai[i];
362           vi = aj + ai[i];
363           nz = diag[i] - ai[i];
364           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
365           while (nz--) {
366             idx = 6*(*vi++);
367             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
368             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
369             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
370             v  += 36;
371           }
372           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
373           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
374           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
375           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
376           idiag  += 36;
377           i2     += 6;
378         }
379         break;
380       case 7:
381         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
382         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
383         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
384         i2     = 7;
385         idiag += 49;
386         for (i=1; i<m; i++) {
387           v  = aa + 49*ai[i];
388           vi = aj + ai[i];
389           nz = diag[i] - ai[i];
390           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
391           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
392           while (nz--) {
393             idx = 7*(*vi++);
394             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
395             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
396             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
397             v  += 49;
398           }
399           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
400           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
401           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
402           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
403           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
404           idiag  += 49;
405           i2     += 7;
406         }
407         break;
408       default:
409         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
410         PetscCall(PetscArraycpy(t,b,bs));
411         i2     = bs;
412         idiag += bs2;
413         for (i=1; i<m; i++) {
414           v  = aa + bs2*ai[i];
415           vi = aj + ai[i];
416           nz = diag[i] - ai[i];
417 
418           PetscCall(PetscArraycpy(w,b+i2,bs));
419           /* copy all rows of x that are needed into contiguous space */
420           workt = work;
421           for (j=0; j<nz; j++) {
422             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
423             workt += bs;
424           }
425           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
426           PetscCall(PetscArraycpy(t+i2,w,bs));
427           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
428 
429           idiag += bs2;
430           i2    += bs;
431         }
432         break;
433       }
434       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
435       PetscCall(PetscLogFlops(1.0*bs2*a->nz));
436       xb = t;
437     }
438     else xb = b;
439     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
440       idiag = a->idiag+bs2*(a->mbs-1);
441       i2 = bs * (m-1);
442       switch (bs) {
443       case 1:
444         s[0]  = xb[i2];
445         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
446         x[i2] = xw[0];
447         i2   -= 1;
448         for (i=m-2; i>=0; i--) {
449           v  = aa + (diag[i]+1);
450           vi = aj + diag[i] + 1;
451           nz = ai[i+1] - diag[i] - 1;
452           s[0] = xb[i2];
453           for (j=0; j<nz; j++) {
454             xw[0] = x[vi[j]];
455             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
456           }
457           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
458           x[i2]  = xw[0];
459           idiag -= 1;
460           i2    -= 1;
461         }
462         break;
463       case 2:
464         s[0]  = xb[i2]; s[1] = xb[i2+1];
465         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
466         x[i2] = xw[0]; x[i2+1] = xw[1];
467         i2    -= 2;
468         idiag -= 4;
469         for (i=m-2; i>=0; i--) {
470           v  = aa + 4*(diag[i] + 1);
471           vi = aj + diag[i] + 1;
472           nz = ai[i+1] - diag[i] - 1;
473           s[0] = xb[i2]; s[1] = xb[i2+1];
474           for (j=0; j<nz; j++) {
475             idx = 2*vi[j];
476             it  = 4*j;
477             xw[0] = x[idx]; xw[1] = x[1+idx];
478             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
479           }
480           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
481           x[i2]   = xw[0]; x[i2+1] = xw[1];
482           idiag  -= 4;
483           i2     -= 2;
484         }
485         break;
486       case 3:
487         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
488         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
489         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
490         i2    -= 3;
491         idiag -= 9;
492         for (i=m-2; i>=0; i--) {
493           v  = aa + 9*(diag[i]+1);
494           vi = aj + diag[i] + 1;
495           nz = ai[i+1] - diag[i] - 1;
496           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
497           while (nz--) {
498             idx = 3*(*vi++);
499             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
500             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
501             v  += 9;
502           }
503           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
504           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
505           idiag  -= 9;
506           i2     -= 3;
507         }
508         break;
509       case 4:
510         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
511         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
512         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
513         i2    -= 4;
514         idiag -= 16;
515         for (i=m-2; i>=0; i--) {
516           v  = aa + 16*(diag[i]+1);
517           vi = aj + diag[i] + 1;
518           nz = ai[i+1] - diag[i] - 1;
519           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
520           while (nz--) {
521             idx = 4*(*vi++);
522             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
523             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
524             v  += 16;
525           }
526           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
527           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
528           idiag  -= 16;
529           i2     -= 4;
530         }
531         break;
532       case 5:
533         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
534         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
535         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
536         i2    -= 5;
537         idiag -= 25;
538         for (i=m-2; i>=0; i--) {
539           v  = aa + 25*(diag[i]+1);
540           vi = aj + diag[i] + 1;
541           nz = ai[i+1] - diag[i] - 1;
542           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
543           while (nz--) {
544             idx = 5*(*vi++);
545             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
546             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
547             v  += 25;
548           }
549           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
550           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
551           idiag  -= 25;
552           i2     -= 5;
553         }
554         break;
555       case 6:
556         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
557         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
558         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
559         i2    -= 6;
560         idiag -= 36;
561         for (i=m-2; i>=0; i--) {
562           v  = aa + 36*(diag[i]+1);
563           vi = aj + diag[i] + 1;
564           nz = ai[i+1] - diag[i] - 1;
565           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
566           while (nz--) {
567             idx = 6*(*vi++);
568             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
569             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
570             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
571             v  += 36;
572           }
573           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
574           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
575           idiag  -= 36;
576           i2     -= 6;
577         }
578         break;
579       case 7:
580         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
581         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
582         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
583         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
584         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
585         i2    -= 7;
586         idiag -= 49;
587         for (i=m-2; i>=0; i--) {
588           v  = aa + 49*(diag[i]+1);
589           vi = aj + diag[i] + 1;
590           nz = ai[i+1] - diag[i] - 1;
591           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
592           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
593           while (nz--) {
594             idx = 7*(*vi++);
595             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
596             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
597             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
598             v  += 49;
599           }
600           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
601           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
602           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
603           idiag  -= 49;
604           i2     -= 7;
605         }
606         break;
607       default:
608         PetscCall(PetscArraycpy(w,xb+i2,bs));
609         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
610         i2    -= bs;
611         idiag -= bs2;
612         for (i=m-2; i>=0; i--) {
613           v  = aa + bs2*(diag[i]+1);
614           vi = aj + diag[i] + 1;
615           nz = ai[i+1] - diag[i] - 1;
616 
617           PetscCall(PetscArraycpy(w,xb+i2,bs));
618           /* copy all rows of x that are needed into contiguous space */
619           workt = work;
620           for (j=0; j<nz; j++) {
621             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
622             workt += bs;
623           }
624           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
625           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
626 
627           idiag -= bs2;
628           i2    -= bs;
629         }
630         break;
631       }
632       PetscCall(PetscLogFlops(1.0*bs2*(a->nz)));
633     }
634     its--;
635   }
636   while (its--) {
637     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
638       idiag = a->idiag;
639       i2 = 0;
640       switch (bs) {
641       case 1:
642         for (i=0; i<m; i++) {
643           v  = aa + ai[i];
644           vi = aj + ai[i];
645           nz = ai[i+1] - ai[i];
646           s[0] = b[i2];
647           for (j=0; j<nz; j++) {
648             xw[0] = x[vi[j]];
649             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
650           }
651           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
652           x[i2] += xw[0];
653           idiag += 1;
654           i2    += 1;
655         }
656         break;
657       case 2:
658         for (i=0; i<m; i++) {
659           v  = aa + 4*ai[i];
660           vi = aj + ai[i];
661           nz = ai[i+1] - ai[i];
662           s[0] = b[i2]; s[1] = b[i2+1];
663           for (j=0; j<nz; j++) {
664             idx = 2*vi[j];
665             it  = 4*j;
666             xw[0] = x[idx]; xw[1] = x[1+idx];
667             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
668           }
669           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
670           x[i2]  += xw[0]; x[i2+1] += xw[1];
671           idiag  += 4;
672           i2     += 2;
673         }
674         break;
675       case 3:
676         for (i=0; i<m; i++) {
677           v  = aa + 9*ai[i];
678           vi = aj + ai[i];
679           nz = ai[i+1] - ai[i];
680           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
681           while (nz--) {
682             idx = 3*(*vi++);
683             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
684             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
685             v  += 9;
686           }
687           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
688           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
689           idiag  += 9;
690           i2     += 3;
691         }
692         break;
693       case 4:
694         for (i=0; i<m; i++) {
695           v  = aa + 16*ai[i];
696           vi = aj + ai[i];
697           nz = ai[i+1] - ai[i];
698           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
699           while (nz--) {
700             idx = 4*(*vi++);
701             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
702             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
703             v  += 16;
704           }
705           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
706           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
707           idiag  += 16;
708           i2     += 4;
709         }
710         break;
711       case 5:
712         for (i=0; i<m; i++) {
713           v  = aa + 25*ai[i];
714           vi = aj + ai[i];
715           nz = ai[i+1] - ai[i];
716           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
717           while (nz--) {
718             idx = 5*(*vi++);
719             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
720             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
721             v  += 25;
722           }
723           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
724           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
725           idiag  += 25;
726           i2     += 5;
727         }
728         break;
729       case 6:
730         for (i=0; i<m; i++) {
731           v  = aa + 36*ai[i];
732           vi = aj + ai[i];
733           nz = ai[i+1] - ai[i];
734           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
735           while (nz--) {
736             idx = 6*(*vi++);
737             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
738             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
739             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
740             v  += 36;
741           }
742           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
743           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
744           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
745           idiag  += 36;
746           i2     += 6;
747         }
748         break;
749       case 7:
750         for (i=0; i<m; i++) {
751           v  = aa + 49*ai[i];
752           vi = aj + ai[i];
753           nz = ai[i+1] - ai[i];
754           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
755           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
756           while (nz--) {
757             idx = 7*(*vi++);
758             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
759             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
760             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
761             v  += 49;
762           }
763           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
764           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
765           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
766           idiag  += 49;
767           i2     += 7;
768         }
769         break;
770       default:
771         for (i=0; i<m; i++) {
772           v  = aa + bs2*ai[i];
773           vi = aj + ai[i];
774           nz = ai[i+1] - ai[i];
775 
776           PetscCall(PetscArraycpy(w,b+i2,bs));
777           /* copy all rows of x that are needed into contiguous space */
778           workt = work;
779           for (j=0; j<nz; j++) {
780             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
781             workt += bs;
782           }
783           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
784           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
785 
786           idiag += bs2;
787           i2    += bs;
788         }
789         break;
790       }
791       PetscCall(PetscLogFlops(2.0*bs2*a->nz));
792     }
793     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
794       idiag = a->idiag+bs2*(a->mbs-1);
795       i2 = bs * (m-1);
796       switch (bs) {
797       case 1:
798         for (i=m-1; i>=0; i--) {
799           v  = aa + ai[i];
800           vi = aj + ai[i];
801           nz = ai[i+1] - ai[i];
802           s[0] = b[i2];
803           for (j=0; j<nz; j++) {
804             xw[0] = x[vi[j]];
805             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
806           }
807           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
808           x[i2] += xw[0];
809           idiag -= 1;
810           i2    -= 1;
811         }
812         break;
813       case 2:
814         for (i=m-1; i>=0; i--) {
815           v  = aa + 4*ai[i];
816           vi = aj + ai[i];
817           nz = ai[i+1] - ai[i];
818           s[0] = b[i2]; s[1] = b[i2+1];
819           for (j=0; j<nz; j++) {
820             idx = 2*vi[j];
821             it  = 4*j;
822             xw[0] = x[idx]; xw[1] = x[1+idx];
823             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
824           }
825           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
826           x[i2]  += xw[0]; x[i2+1] += xw[1];
827           idiag  -= 4;
828           i2     -= 2;
829         }
830         break;
831       case 3:
832         for (i=m-1; i>=0; i--) {
833           v  = aa + 9*ai[i];
834           vi = aj + ai[i];
835           nz = ai[i+1] - ai[i];
836           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
837           while (nz--) {
838             idx = 3*(*vi++);
839             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
840             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
841             v  += 9;
842           }
843           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
844           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
845           idiag  -= 9;
846           i2     -= 3;
847         }
848         break;
849       case 4:
850         for (i=m-1; i>=0; i--) {
851           v  = aa + 16*ai[i];
852           vi = aj + ai[i];
853           nz = ai[i+1] - ai[i];
854           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
855           while (nz--) {
856             idx = 4*(*vi++);
857             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
858             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
859             v  += 16;
860           }
861           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
862           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
863           idiag  -= 16;
864           i2     -= 4;
865         }
866         break;
867       case 5:
868         for (i=m-1; i>=0; i--) {
869           v  = aa + 25*ai[i];
870           vi = aj + ai[i];
871           nz = ai[i+1] - ai[i];
872           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
873           while (nz--) {
874             idx = 5*(*vi++);
875             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
876             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
877             v  += 25;
878           }
879           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
880           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
881           idiag  -= 25;
882           i2     -= 5;
883         }
884         break;
885       case 6:
886         for (i=m-1; i>=0; i--) {
887           v  = aa + 36*ai[i];
888           vi = aj + ai[i];
889           nz = ai[i+1] - ai[i];
890           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
891           while (nz--) {
892             idx = 6*(*vi++);
893             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
894             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
895             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
896             v  += 36;
897           }
898           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
899           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
900           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
901           idiag  -= 36;
902           i2     -= 6;
903         }
904         break;
905       case 7:
906         for (i=m-1; i>=0; i--) {
907           v  = aa + 49*ai[i];
908           vi = aj + ai[i];
909           nz = ai[i+1] - ai[i];
910           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
911           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
912           while (nz--) {
913             idx = 7*(*vi++);
914             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
915             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
916             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
917             v  += 49;
918           }
919           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
920           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
921           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
922           idiag  -= 49;
923           i2     -= 7;
924         }
925         break;
926       default:
927         for (i=m-1; i>=0; i--) {
928           v  = aa + bs2*ai[i];
929           vi = aj + ai[i];
930           nz = ai[i+1] - ai[i];
931 
932           PetscCall(PetscArraycpy(w,b+i2,bs));
933           /* copy all rows of x that are needed into contiguous space */
934           workt = work;
935           for (j=0; j<nz; j++) {
936             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
937             workt += bs;
938           }
939           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
940           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
941 
942           idiag -= bs2;
943           i2    -= bs;
944         }
945         break;
946       }
947       PetscCall(PetscLogFlops(2.0*bs2*(a->nz)));
948     }
949   }
950   PetscCall(VecRestoreArray(xx,&x));
951   PetscCall(VecRestoreArrayRead(bb,&b));
952   PetscFunctionReturn(0);
953 }
954 
955 /*
956     Special version for direct calls from Fortran (Used in PETSc-fun3d)
957 */
958 #if defined(PETSC_HAVE_FORTRAN_CAPS)
959 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
960 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
961 #define matsetvaluesblocked4_ matsetvaluesblocked4
962 #endif
963 
964 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
965 {
966   Mat               A  = *AA;
967   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
968   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
969   PetscInt          *ai    =a->i,*ailen=a->ilen;
970   PetscInt          *aj    =a->j,stepval,lastcol = -1;
971   const PetscScalar *value = v;
972   MatScalar         *ap,*aa = a->a,*bap;
973 
974   PetscFunctionBegin;
975   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
976   stepval = (n-1)*4;
977   for (k=0; k<m; k++) { /* loop over added rows */
978     row  = im[k];
979     rp   = aj + ai[row];
980     ap   = aa + 16*ai[row];
981     nrow = ailen[row];
982     low  = 0;
983     high = nrow;
984     for (l=0; l<n; l++) { /* loop over added columns */
985       col = in[l];
986       if (col <= lastcol)  low = 0;
987       else                high = nrow;
988       lastcol = col;
989       value   = v + k*(stepval+4 + l)*4;
990       while (high-low > 7) {
991         t = (low+high)/2;
992         if (rp[t] > col) high = t;
993         else             low  = t;
994       }
995       for (i=low; i<high; i++) {
996         if (rp[i] > col) break;
997         if (rp[i] == col) {
998           bap = ap +  16*i;
999           for (ii=0; ii<4; ii++,value+=stepval) {
1000             for (jj=ii; jj<16; jj+=4) {
1001               bap[jj] += *value++;
1002             }
1003           }
1004           goto noinsert2;
1005         }
1006       }
1007       N = nrow++ - 1;
1008       high++; /* added new column index thus must search to one higher than before */
1009       /* shift up all the later entries in this row */
1010       for (ii=N; ii>=i; ii--) {
1011         rp[ii+1] = rp[ii];
1012         PetscCallVoid(PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16));
1013       }
1014       if (N >= i) {
1015         PetscCallVoid(PetscArrayzero(ap+16*i,16));
1016       }
1017       rp[i] = col;
1018       bap   = ap +  16*i;
1019       for (ii=0; ii<4; ii++,value+=stepval) {
1020         for (jj=ii; jj<16; jj+=4) {
1021           bap[jj] = *value++;
1022         }
1023       }
1024       noinsert2:;
1025       low = i;
1026     }
1027     ailen[row] = nrow;
1028   }
1029   PetscFunctionReturnVoid();
1030 }
1031 
1032 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1033 #define matsetvalues4_ MATSETVALUES4
1034 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1035 #define matsetvalues4_ matsetvalues4
1036 #endif
1037 
1038 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1039 {
1040   Mat         A  = *AA;
1041   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1042   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
1043   PetscInt    *ai=a->i,*ailen=a->ilen;
1044   PetscInt    *aj=a->j,brow,bcol;
1045   PetscInt    ridx,cidx,lastcol = -1;
1046   MatScalar   *ap,value,*aa=a->a,*bap;
1047 
1048   PetscFunctionBegin;
1049   for (k=0; k<m; k++) { /* loop over added rows */
1050     row  = im[k]; brow = row/4;
1051     rp   = aj + ai[brow];
1052     ap   = aa + 16*ai[brow];
1053     nrow = ailen[brow];
1054     low  = 0;
1055     high = nrow;
1056     for (l=0; l<n; l++) { /* loop over added columns */
1057       col   = in[l]; bcol = col/4;
1058       ridx  = row % 4; cidx = col % 4;
1059       value = v[l + k*n];
1060       if (col <= lastcol)  low = 0;
1061       else                high = nrow;
1062       lastcol = col;
1063       while (high-low > 7) {
1064         t = (low+high)/2;
1065         if (rp[t] > bcol) high = t;
1066         else              low  = t;
1067       }
1068       for (i=low; i<high; i++) {
1069         if (rp[i] > bcol) break;
1070         if (rp[i] == bcol) {
1071           bap   = ap +  16*i + 4*cidx + ridx;
1072           *bap += value;
1073           goto noinsert1;
1074         }
1075       }
1076       N = nrow++ - 1;
1077       high++; /* added new column thus must search to one higher than before */
1078       /* shift up all the later entries in this row */
1079       PetscCallVoid(PetscArraymove(rp+i+1,rp+i,N-i+1));
1080       PetscCallVoid(PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1)));
1081       PetscCallVoid(PetscArrayzero(ap+16*i,16));
1082       rp[i]                    = bcol;
1083       ap[16*i + 4*cidx + ridx] = value;
1084 noinsert1:;
1085       low = i;
1086     }
1087     ailen[brow] = nrow;
1088   }
1089   PetscFunctionReturnVoid();
1090 }
1091 
1092 /*
1093      Checks for missing diagonals
1094 */
1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1096 {
1097   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1098   PetscInt       *diag,*ii = a->i,i;
1099 
1100   PetscFunctionBegin;
1101   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1102   *missing = PETSC_FALSE;
1103   if (A->rmap->n > 0 && !ii) {
1104     *missing = PETSC_TRUE;
1105     if (d) *d = 0;
1106     PetscCall(PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"));
1107   } else {
1108     PetscInt n;
1109     n = PetscMin(a->mbs, a->nbs);
1110     diag = a->diag;
1111     for (i=0; i<n; i++) {
1112       if (diag[i] >= ii[i+1]) {
1113         *missing = PETSC_TRUE;
1114         if (d) *d = i;
1115         PetscCall(PetscInfo(A,"Matrix is missing block diagonal number %" PetscInt_FMT "\n",i));
1116         break;
1117       }
1118     }
1119   }
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1124 {
1125   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1126   PetscInt       i,j,m = a->mbs;
1127 
1128   PetscFunctionBegin;
1129   if (!a->diag) {
1130     PetscCall(PetscMalloc1(m,&a->diag));
1131     PetscCall(PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt)));
1132     a->free_diag = PETSC_TRUE;
1133   }
1134   for (i=0; i<m; i++) {
1135     a->diag[i] = a->i[i+1];
1136     for (j=a->i[i]; j<a->i[i+1]; j++) {
1137       if (a->j[j] == i) {
1138         a->diag[i] = j;
1139         break;
1140       }
1141     }
1142   }
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1147 {
1148   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1149   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1150   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1151 
1152   PetscFunctionBegin;
1153   *nn = n;
1154   if (!ia) PetscFunctionReturn(0);
1155   if (symmetric) {
1156     PetscCall(MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja));
1157     nz   = tia[n];
1158   } else {
1159     tia = a->i; tja = a->j;
1160   }
1161 
1162   if (!blockcompressed && bs > 1) {
1163     (*nn) *= bs;
1164     /* malloc & create the natural set of indices */
1165     PetscCall(PetscMalloc1((n+1)*bs,ia));
1166     if (n) {
1167       (*ia)[0] = oshift;
1168       for (j=1; j<bs; j++) {
1169         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1170       }
1171     }
1172 
1173     for (i=1; i<n; i++) {
1174       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1175       for (j=1; j<bs; j++) {
1176         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1177       }
1178     }
1179     if (n) {
1180       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1181     }
1182 
1183     if (inja) {
1184       PetscCall(PetscMalloc1(nz*bs*bs,ja));
1185       cnt = 0;
1186       for (i=0; i<n; i++) {
1187         for (j=0; j<bs; j++) {
1188           for (k=tia[i]; k<tia[i+1]; k++) {
1189             for (l=0; l<bs; l++) {
1190               (*ja)[cnt++] = bs*tja[k] + l;
1191             }
1192           }
1193         }
1194       }
1195     }
1196 
1197     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1198       PetscCall(PetscFree(tia));
1199       PetscCall(PetscFree(tja));
1200     }
1201   } else if (oshift == 1) {
1202     if (symmetric) {
1203       nz = tia[A->rmap->n/bs];
1204       /*  add 1 to i and j indices */
1205       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1206       *ia = tia;
1207       if (ja) {
1208         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1209         *ja = tja;
1210       }
1211     } else {
1212       nz = a->i[A->rmap->n/bs];
1213       /* malloc space and  add 1 to i and j indices */
1214       PetscCall(PetscMalloc1(A->rmap->n/bs+1,ia));
1215       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1216       if (ja) {
1217         PetscCall(PetscMalloc1(nz,ja));
1218         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1219       }
1220     }
1221   } else {
1222     *ia = tia;
1223     if (ja) *ja = tja;
1224   }
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1229 {
1230   PetscFunctionBegin;
1231   if (!ia) PetscFunctionReturn(0);
1232   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1233     PetscCall(PetscFree(*ia));
1234     if (ja) PetscCall(PetscFree(*ja));
1235   }
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1240 {
1241   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1242 
1243   PetscFunctionBegin;
1244 #if defined(PETSC_USE_LOG)
1245   PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->N,A->cmap->n,a->nz);
1246 #endif
1247   PetscCall(MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i));
1248   PetscCall(ISDestroy(&a->row));
1249   PetscCall(ISDestroy(&a->col));
1250   if (a->free_diag) PetscCall(PetscFree(a->diag));
1251   PetscCall(PetscFree(a->idiag));
1252   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax,a->ilen));
1253   PetscCall(PetscFree(a->solve_work));
1254   PetscCall(PetscFree(a->mult_work));
1255   PetscCall(PetscFree(a->sor_workt));
1256   PetscCall(PetscFree(a->sor_work));
1257   PetscCall(ISDestroy(&a->icol));
1258   PetscCall(PetscFree(a->saved_values));
1259   PetscCall(PetscFree2(a->compressedrow.i,a->compressedrow.rindex));
1260 
1261   PetscCall(MatDestroy(&a->sbaijMat));
1262   PetscCall(MatDestroy(&a->parent));
1263   PetscCall(PetscFree(A->data));
1264 
1265   PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL));
1266   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL));
1267   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL));
1268   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL));
1269   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL));
1270   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL));
1271   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL));
1272   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL));
1273   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL));
1274   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL));
1275   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL));
1276   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL));
1277 #if defined(PETSC_HAVE_HYPRE)
1278   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL));
1279 #endif
1280   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL));
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1285 {
1286   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1287 
1288   PetscFunctionBegin;
1289   switch (op) {
1290   case MAT_ROW_ORIENTED:
1291     a->roworiented = flg;
1292     break;
1293   case MAT_KEEP_NONZERO_PATTERN:
1294     a->keepnonzeropattern = flg;
1295     break;
1296   case MAT_NEW_NONZERO_LOCATIONS:
1297     a->nonew = (flg ? 0 : 1);
1298     break;
1299   case MAT_NEW_NONZERO_LOCATION_ERR:
1300     a->nonew = (flg ? -1 : 0);
1301     break;
1302   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1303     a->nonew = (flg ? -2 : 0);
1304     break;
1305   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1306     a->nounused = (flg ? -1 : 0);
1307     break;
1308   case MAT_FORCE_DIAGONAL_ENTRIES:
1309   case MAT_IGNORE_OFF_PROC_ENTRIES:
1310   case MAT_USE_HASH_TABLE:
1311   case MAT_SORTED_FULL:
1312     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1313     break;
1314   case MAT_SPD:
1315   case MAT_SYMMETRIC:
1316   case MAT_STRUCTURALLY_SYMMETRIC:
1317   case MAT_HERMITIAN:
1318   case MAT_SYMMETRY_ETERNAL:
1319   case MAT_SUBMAT_SINGLEIS:
1320   case MAT_STRUCTURE_ONLY:
1321     /* These options are handled directly by MatSetOption() */
1322     break;
1323   default:
1324     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1325   }
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1330 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1331 {
1332   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1333   MatScalar      *aa_i;
1334   PetscScalar    *v_i;
1335 
1336   PetscFunctionBegin;
1337   bs  = A->rmap->bs;
1338   bs2 = bs*bs;
1339   PetscCheck(row >= 0 && row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range", row);
1340 
1341   bn  = row/bs;   /* Block number */
1342   bp  = row % bs; /* Block Position */
1343   M   = ai[bn+1] - ai[bn];
1344   *nz = bs*M;
1345 
1346   if (v) {
1347     *v = NULL;
1348     if (*nz) {
1349       PetscCall(PetscMalloc1(*nz,v));
1350       for (i=0; i<M; i++) { /* for each block in the block row */
1351         v_i  = *v + i*bs;
1352         aa_i = aa + bs2*(ai[bn] + i);
1353         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1354       }
1355     }
1356   }
1357 
1358   if (idx) {
1359     *idx = NULL;
1360     if (*nz) {
1361       PetscCall(PetscMalloc1(*nz,idx));
1362       for (i=0; i<M; i++) { /* for each block in the block row */
1363         idx_i = *idx + i*bs;
1364         itmp  = bs*aj[ai[bn] + i];
1365         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1366       }
1367     }
1368   }
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1373 {
1374   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1375 
1376   PetscFunctionBegin;
1377   PetscCall(MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a));
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1382 {
1383   PetscFunctionBegin;
1384   if (nz)  *nz = 0;
1385   if (idx) PetscCall(PetscFree(*idx));
1386   if (v)   PetscCall(PetscFree(*v));
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1391 {
1392   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1393   Mat            C;
1394   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1395   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1396   MatScalar      *ata,*aa=a->a;
1397 
1398   PetscFunctionBegin;
1399   PetscCall(PetscCalloc1(1+nbs,&atfill));
1400   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1401     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1402 
1403     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C));
1404     PetscCall(MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N));
1405     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
1406     PetscCall(MatSeqBAIJSetPreallocation(C,bs,0,atfill));
1407 
1408     at  = (Mat_SeqBAIJ*)C->data;
1409     ati = at->i;
1410     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1411   } else {
1412     C = *B;
1413     at = (Mat_SeqBAIJ*)C->data;
1414     ati = at->i;
1415   }
1416 
1417   atj = at->j;
1418   ata = at->a;
1419 
1420   /* Copy ati into atfill so we have locations of the next free space in atj */
1421   PetscCall(PetscArraycpy(atfill,ati,nbs));
1422 
1423   /* Walk through A row-wise and mark nonzero entries of A^T. */
1424   for (i=0; i<mbs; i++) {
1425     anzj = ai[i+1] - ai[i];
1426     for (j=0; j<anzj; j++) {
1427       atj[atfill[*aj]] = i;
1428       for (kr=0; kr<bs; kr++) {
1429         for (k=0; k<bs; k++) {
1430           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1431         }
1432       }
1433       atfill[*aj++] += 1;
1434     }
1435   }
1436   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
1437   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
1438 
1439   /* Clean up temporary space and complete requests. */
1440   PetscCall(PetscFree(atfill));
1441 
1442   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1443     PetscCall(MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1444     *B = C;
1445   } else {
1446     PetscCall(MatHeaderMerge(A,&C));
1447   }
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1452 {
1453   Mat            Btrans;
1454 
1455   PetscFunctionBegin;
1456   *f   = PETSC_FALSE;
1457   PetscCall(MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans));
1458   PetscCall(MatEqual_SeqBAIJ(B,Btrans,f));
1459   PetscCall(MatDestroy(&Btrans));
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1464 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1465 {
1466   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1467   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1468   PetscInt       *rowlens,*colidxs;
1469   PetscScalar    *matvals;
1470 
1471   PetscFunctionBegin;
1472   PetscCall(PetscViewerSetUp(viewer));
1473 
1474   M  = mat->rmap->N;
1475   N  = mat->cmap->N;
1476   m  = mat->rmap->n;
1477   bs = mat->rmap->bs;
1478   nz = bs*bs*A->nz;
1479 
1480   /* write matrix header */
1481   header[0] = MAT_FILE_CLASSID;
1482   header[1] = M; header[2] = N; header[3] = nz;
1483   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1484 
1485   /* store row lengths */
1486   PetscCall(PetscMalloc1(m,&rowlens));
1487   for (cnt=0, i=0; i<A->mbs; i++)
1488     for (j=0; j<bs; j++)
1489       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1490   PetscCall(PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT));
1491   PetscCall(PetscFree(rowlens));
1492 
1493   /* store column indices  */
1494   PetscCall(PetscMalloc1(nz,&colidxs));
1495   for (cnt=0, i=0; i<A->mbs; i++)
1496     for (k=0; k<bs; k++)
1497       for (j=A->i[i]; j<A->i[i+1]; j++)
1498         for (l=0; l<bs; l++)
1499           colidxs[cnt++] = bs*A->j[j] + l;
1500   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1501   PetscCall(PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT));
1502   PetscCall(PetscFree(colidxs));
1503 
1504   /* store nonzero values */
1505   PetscCall(PetscMalloc1(nz,&matvals));
1506   for (cnt=0, i=0; i<A->mbs; i++)
1507     for (k=0; k<bs; k++)
1508       for (j=A->i[i]; j<A->i[i+1]; j++)
1509         for (l=0; l<bs; l++)
1510           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1511   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1512   PetscCall(PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR));
1513   PetscCall(PetscFree(matvals));
1514 
1515   /* write block size option to the viewer's .info file */
1516   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1521 {
1522   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1523   PetscInt       i,bs = A->rmap->bs,k;
1524 
1525   PetscFunctionBegin;
1526   PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
1527   for (i=0; i<a->mbs; i++) {
1528     PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT "-%" PetscInt_FMT ":",i*bs,i*bs+bs-1));
1529     for (k=a->i[i]; k<a->i[i+1]; k++) {
1530       PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT "-%" PetscInt_FMT ") ",bs*a->j[k],bs*a->j[k]+bs-1));
1531     }
1532     PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
1533   }
1534   PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1539 {
1540   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1541   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1542   PetscViewerFormat format;
1543 
1544   PetscFunctionBegin;
1545   if (A->structure_only) {
1546     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A,viewer));
1547     PetscFunctionReturn(0);
1548   }
1549 
1550   PetscCall(PetscViewerGetFormat(viewer,&format));
1551   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1552     PetscCall(PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs));
1553   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1554     const char *matname;
1555     Mat        aij;
1556     PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij));
1557     PetscCall(PetscObjectGetName((PetscObject)A,&matname));
1558     PetscCall(PetscObjectSetName((PetscObject)aij,matname));
1559     PetscCall(MatView(aij,viewer));
1560     PetscCall(MatDestroy(&aij));
1561   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1562       PetscFunctionReturn(0);
1563   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1564     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
1565     for (i=0; i<a->mbs; i++) {
1566       for (j=0; j<bs; j++) {
1567         PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j));
1568         for (k=a->i[i]; k<a->i[i+1]; k++) {
1569           for (l=0; l<bs; l++) {
1570 #if defined(PETSC_USE_COMPLEX)
1571             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1572               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %gi) ",bs*a->j[k]+l,
1573                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1574             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1575               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %gi) ",bs*a->j[k]+l,
1576                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1577             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1578               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j])));
1579             }
1580 #else
1581             if (a->a[bs2*k + l*bs + j] != 0.0) {
1582               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]));
1583             }
1584 #endif
1585           }
1586         }
1587         PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
1588       }
1589     }
1590     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
1591   } else {
1592     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
1593     for (i=0; i<a->mbs; i++) {
1594       for (j=0; j<bs; j++) {
1595         PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j));
1596         for (k=a->i[i]; k<a->i[i+1]; k++) {
1597           for (l=0; l<bs; l++) {
1598 #if defined(PETSC_USE_COMPLEX)
1599             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1600               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i) ",bs*a->j[k]+l,
1601                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1602             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1603               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i) ",bs*a->j[k]+l,
1604                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1605             } else {
1606               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j])));
1607             }
1608 #else
1609             PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]));
1610 #endif
1611           }
1612         }
1613         PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
1614       }
1615     }
1616     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
1617   }
1618   PetscCall(PetscViewerFlush(viewer));
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 #include <petscdraw.h>
1623 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1624 {
1625   Mat               A = (Mat) Aa;
1626   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1627   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1628   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1629   MatScalar         *aa;
1630   PetscViewer       viewer;
1631   PetscViewerFormat format;
1632 
1633   PetscFunctionBegin;
1634   PetscCall(PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer));
1635   PetscCall(PetscViewerGetFormat(viewer,&format));
1636   PetscCall(PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr));
1637 
1638   /* loop over matrix elements drawing boxes */
1639 
1640   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1641     PetscDrawCollectiveBegin(draw);
1642     /* Blue for negative, Cyan for zero and  Red for positive */
1643     color = PETSC_DRAW_BLUE;
1644     for (i=0,row=0; i<mbs; i++,row+=bs) {
1645       for (j=a->i[i]; j<a->i[i+1]; j++) {
1646         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1647         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1648         aa  = a->a + j*bs2;
1649         for (k=0; k<bs; k++) {
1650           for (l=0; l<bs; l++) {
1651             if (PetscRealPart(*aa++) >=  0.) continue;
1652             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1653           }
1654         }
1655       }
1656     }
1657     color = PETSC_DRAW_CYAN;
1658     for (i=0,row=0; i<mbs; i++,row+=bs) {
1659       for (j=a->i[i]; j<a->i[i+1]; j++) {
1660         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1661         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1662         aa  = a->a + j*bs2;
1663         for (k=0; k<bs; k++) {
1664           for (l=0; l<bs; l++) {
1665             if (PetscRealPart(*aa++) != 0.) continue;
1666             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1667           }
1668         }
1669       }
1670     }
1671     color = PETSC_DRAW_RED;
1672     for (i=0,row=0; i<mbs; i++,row+=bs) {
1673       for (j=a->i[i]; j<a->i[i+1]; j++) {
1674         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1675         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1676         aa  = a->a + j*bs2;
1677         for (k=0; k<bs; k++) {
1678           for (l=0; l<bs; l++) {
1679             if (PetscRealPart(*aa++) <= 0.) continue;
1680             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1681           }
1682         }
1683       }
1684     }
1685     PetscDrawCollectiveEnd(draw);
1686   } else {
1687     /* use contour shading to indicate magnitude of values */
1688     /* first determine max of all nonzero values */
1689     PetscReal      minv = 0.0, maxv = 0.0;
1690     PetscDraw      popup;
1691 
1692     for (i=0; i<a->nz*a->bs2; i++) {
1693       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1694     }
1695     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1696     PetscCall(PetscDrawGetPopup(draw,&popup));
1697     PetscCall(PetscDrawScalePopup(popup,0.0,maxv));
1698 
1699     PetscDrawCollectiveBegin(draw);
1700     for (i=0,row=0; i<mbs; i++,row+=bs) {
1701       for (j=a->i[i]; j<a->i[i+1]; j++) {
1702         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1703         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1704         aa  = a->a + j*bs2;
1705         for (k=0; k<bs; k++) {
1706           for (l=0; l<bs; l++) {
1707             MatScalar v = *aa++;
1708             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1709             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1710           }
1711         }
1712       }
1713     }
1714     PetscDrawCollectiveEnd(draw);
1715   }
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1720 {
1721   PetscReal      xl,yl,xr,yr,w,h;
1722   PetscDraw      draw;
1723   PetscBool      isnull;
1724 
1725   PetscFunctionBegin;
1726   PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1727   PetscCall(PetscDrawIsNull(draw,&isnull));
1728   if (isnull) PetscFunctionReturn(0);
1729 
1730   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1731   xr  += w;          yr += h;        xl = -w;     yl = -h;
1732   PetscCall(PetscDrawSetCoordinates(draw,xl,yl,xr,yr));
1733   PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer));
1734   PetscCall(PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A));
1735   PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL));
1736   PetscCall(PetscDrawSave(draw));
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1741 {
1742   PetscBool      iascii,isbinary,isdraw;
1743 
1744   PetscFunctionBegin;
1745   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1746   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1747   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1748   if (iascii) {
1749     PetscCall(MatView_SeqBAIJ_ASCII(A,viewer));
1750   } else if (isbinary) {
1751     PetscCall(MatView_SeqBAIJ_Binary(A,viewer));
1752   } else if (isdraw) {
1753     PetscCall(MatView_SeqBAIJ_Draw(A,viewer));
1754   } else {
1755     Mat B;
1756     PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1757     PetscCall(MatView(B,viewer));
1758     PetscCall(MatDestroy(&B));
1759   }
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1764 {
1765   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1766   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1767   PetscInt    *ai = a->i,*ailen = a->ilen;
1768   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1769   MatScalar   *ap,*aa = a->a;
1770 
1771   PetscFunctionBegin;
1772   for (k=0; k<m; k++) { /* loop over rows */
1773     row = im[k]; brow = row/bs;
1774     if (row < 0) {v += n; continue;} /* negative row */
1775     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " too large", row);
1776     rp   = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */
1777     ap   = aa ? aa + bs2*ai[brow] : NULL; /* mustn't add to NULL, that is UB */
1778     nrow = ailen[brow];
1779     for (l=0; l<n; l++) { /* loop over columns */
1780       if (in[l] < 0) {v++; continue;} /* negative column */
1781       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %" PetscInt_FMT " too large", in[l]);
1782       col  = in[l];
1783       bcol = col/bs;
1784       cidx = col%bs;
1785       ridx = row%bs;
1786       high = nrow;
1787       low  = 0; /* assume unsorted */
1788       while (high-low > 5) {
1789         t = (low+high)/2;
1790         if (rp[t] > bcol) high = t;
1791         else             low  = t;
1792       }
1793       for (i=low; i<high; i++) {
1794         if (rp[i] > bcol) break;
1795         if (rp[i] == bcol) {
1796           *v++ = ap[bs2*i+bs*cidx+ridx];
1797           goto finished;
1798         }
1799       }
1800       *v++ = 0.0;
1801 finished:;
1802     }
1803   }
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1808 {
1809   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1810   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1811   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1812   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1813   PetscBool         roworiented=a->roworiented;
1814   const PetscScalar *value     = v;
1815   MatScalar         *ap=NULL,*aa = a->a,*bap;
1816 
1817   PetscFunctionBegin;
1818   if (roworiented) {
1819     stepval = (n-1)*bs;
1820   } else {
1821     stepval = (m-1)*bs;
1822   }
1823   for (k=0; k<m; k++) { /* loop over added rows */
1824     row = im[k];
1825     if (row < 0) continue;
1826     PetscCheck(row < a->mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT,row,a->mbs-1);
1827     rp   = aj + ai[row];
1828     if (!A->structure_only) ap = aa + bs2*ai[row];
1829     rmax = imax[row];
1830     nrow = ailen[row];
1831     low  = 0;
1832     high = nrow;
1833     for (l=0; l<n; l++) { /* loop over added columns */
1834       if (in[l] < 0) continue;
1835       PetscCheck(in[l] < a->nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT,in[l],a->nbs-1);
1836       col = in[l];
1837       if (!A->structure_only) {
1838         if (roworiented) {
1839           value = v + (k*(stepval+bs) + l)*bs;
1840         } else {
1841           value = v + (l*(stepval+bs) + k)*bs;
1842         }
1843       }
1844       if (col <= lastcol) low = 0;
1845       else high = nrow;
1846       lastcol = col;
1847       while (high-low > 7) {
1848         t = (low+high)/2;
1849         if (rp[t] > col) high = t;
1850         else             low  = t;
1851       }
1852       for (i=low; i<high; i++) {
1853         if (rp[i] > col) break;
1854         if (rp[i] == col) {
1855           if (A->structure_only) goto noinsert2;
1856           bap = ap +  bs2*i;
1857           if (roworiented) {
1858             if (is == ADD_VALUES) {
1859               for (ii=0; ii<bs; ii++,value+=stepval) {
1860                 for (jj=ii; jj<bs2; jj+=bs) {
1861                   bap[jj] += *value++;
1862                 }
1863               }
1864             } else {
1865               for (ii=0; ii<bs; ii++,value+=stepval) {
1866                 for (jj=ii; jj<bs2; jj+=bs) {
1867                   bap[jj] = *value++;
1868                 }
1869               }
1870             }
1871           } else {
1872             if (is == ADD_VALUES) {
1873               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1874                 for (jj=0; jj<bs; jj++) {
1875                   bap[jj] += value[jj];
1876                 }
1877                 bap += bs;
1878               }
1879             } else {
1880               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1881                 for (jj=0; jj<bs; jj++) {
1882                   bap[jj]  = value[jj];
1883                 }
1884                 bap += bs;
1885               }
1886             }
1887           }
1888           goto noinsert2;
1889         }
1890       }
1891       if (nonew == 1) goto noinsert2;
1892       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
1893       if (A->structure_only) {
1894         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1895       } else {
1896         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1897       }
1898       N = nrow++ - 1; high++;
1899       /* shift up all the later entries in this row */
1900       PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
1901       rp[i] = col;
1902       if (!A->structure_only) {
1903         PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
1904         bap   = ap +  bs2*i;
1905         if (roworiented) {
1906           for (ii=0; ii<bs; ii++,value+=stepval) {
1907             for (jj=ii; jj<bs2; jj+=bs) {
1908               bap[jj] = *value++;
1909             }
1910           }
1911         } else {
1912           for (ii=0; ii<bs; ii++,value+=stepval) {
1913             for (jj=0; jj<bs; jj++) {
1914               *bap++ = *value++;
1915             }
1916           }
1917         }
1918       }
1919 noinsert2:;
1920       low = i;
1921     }
1922     ailen[row] = nrow;
1923   }
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1928 {
1929   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1930   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1931   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1932   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1933   MatScalar      *aa  = a->a,*ap;
1934   PetscReal      ratio=0.6;
1935 
1936   PetscFunctionBegin;
1937   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1938 
1939   if (m) rmax = ailen[0];
1940   for (i=1; i<mbs; i++) {
1941     /* move each row back by the amount of empty slots (fshift) before it*/
1942     fshift += imax[i-1] - ailen[i-1];
1943     rmax    = PetscMax(rmax,ailen[i]);
1944     if (fshift) {
1945       ip = aj + ai[i];
1946       ap = aa + bs2*ai[i];
1947       N  = ailen[i];
1948       PetscCall(PetscArraymove(ip-fshift,ip,N));
1949       if (!A->structure_only) {
1950         PetscCall(PetscArraymove(ap-bs2*fshift,ap,bs2*N));
1951       }
1952     }
1953     ai[i] = ai[i-1] + ailen[i-1];
1954   }
1955   if (mbs) {
1956     fshift += imax[mbs-1] - ailen[mbs-1];
1957     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1958   }
1959 
1960   /* reset ilen and imax for each row */
1961   a->nonzerorowcnt = 0;
1962   if (A->structure_only) {
1963     PetscCall(PetscFree2(a->imax,a->ilen));
1964   } else { /* !A->structure_only */
1965     for (i=0; i<mbs; i++) {
1966       ailen[i] = imax[i] = ai[i+1] - ai[i];
1967       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1968     }
1969   }
1970   a->nz = ai[mbs];
1971 
1972   /* diagonals may have moved, so kill the diagonal pointers */
1973   a->idiagvalid = PETSC_FALSE;
1974   if (fshift && a->diag) {
1975     PetscCall(PetscFree(a->diag));
1976     PetscCall(PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt)));
1977     a->diag = NULL;
1978   }
1979   if (fshift) PetscCheck(a->nounused != -1,PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1980   PetscCall(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2));
1981   PetscCall(PetscInfo(A,"Number of mallocs during MatSetValues is %" PetscInt_FMT "\n",a->reallocs));
1982   PetscCall(PetscInfo(A,"Most nonzeros blocks in any row is %" PetscInt_FMT "\n",rmax));
1983 
1984   A->info.mallocs    += a->reallocs;
1985   a->reallocs         = 0;
1986   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1987   a->rmax             = rmax;
1988 
1989   if (!A->structure_only) {
1990     PetscCall(MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio));
1991   }
1992   PetscFunctionReturn(0);
1993 }
1994 
1995 /*
1996    This function returns an array of flags which indicate the locations of contiguous
1997    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1998    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1999    Assume: sizes should be long enough to hold all the values.
2000 */
2001 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2002 {
2003   PetscInt  i,j,k,row;
2004   PetscBool flg;
2005 
2006   PetscFunctionBegin;
2007   for (i=0,j=0; i<n; j++) {
2008     row = idx[i];
2009     if (row%bs!=0) { /* Not the beginning of a block */
2010       sizes[j] = 1;
2011       i++;
2012     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2013       sizes[j] = 1;         /* Also makes sure at least 'bs' values exist for next else */
2014       i++;
2015     } else { /* Beginning of the block, so check if the complete block exists */
2016       flg = PETSC_TRUE;
2017       for (k=1; k<bs; k++) {
2018         if (row+k != idx[i+k]) { /* break in the block */
2019           flg = PETSC_FALSE;
2020           break;
2021         }
2022       }
2023       if (flg) { /* No break in the bs */
2024         sizes[j] = bs;
2025         i       += bs;
2026       } else {
2027         sizes[j] = 1;
2028         i++;
2029       }
2030     }
2031   }
2032   *bs_max = j;
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2037 {
2038   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2039   PetscInt          i,j,k,count,*rows;
2040   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2041   PetscScalar       zero = 0.0;
2042   MatScalar         *aa;
2043   const PetscScalar *xx;
2044   PetscScalar       *bb;
2045 
2046   PetscFunctionBegin;
2047   /* fix right hand side if needed */
2048   if (x && b) {
2049     PetscCall(VecGetArrayRead(x,&xx));
2050     PetscCall(VecGetArray(b,&bb));
2051     for (i=0; i<is_n; i++) {
2052       bb[is_idx[i]] = diag*xx[is_idx[i]];
2053     }
2054     PetscCall(VecRestoreArrayRead(x,&xx));
2055     PetscCall(VecRestoreArray(b,&bb));
2056   }
2057 
2058   /* Make a copy of the IS and  sort it */
2059   /* allocate memory for rows,sizes */
2060   PetscCall(PetscMalloc2(is_n,&rows,2*is_n,&sizes));
2061 
2062   /* copy IS values to rows, and sort them */
2063   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2064   PetscCall(PetscSortInt(is_n,rows));
2065 
2066   if (baij->keepnonzeropattern) {
2067     for (i=0; i<is_n; i++) sizes[i] = 1;
2068     bs_max          = is_n;
2069   } else {
2070     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max));
2071     A->nonzerostate++;
2072   }
2073 
2074   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2075     row = rows[j];
2076     PetscCheck(row >= 0 && row <= A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",row);
2077     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2078     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2079     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2080       if (diag != (PetscScalar)0.0) {
2081         if (baij->ilen[row/bs] > 0) {
2082           baij->ilen[row/bs]       = 1;
2083           baij->j[baij->i[row/bs]] = row/bs;
2084 
2085           PetscCall(PetscArrayzero(aa,count*bs));
2086         }
2087         /* Now insert all the diagonal values for this bs */
2088         for (k=0; k<bs; k++) {
2089           PetscCall((*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES));
2090         }
2091       } else { /* (diag == 0.0) */
2092         baij->ilen[row/bs] = 0;
2093       } /* end (diag == 0.0) */
2094     } else { /* (sizes[i] != bs) */
2095       PetscAssert(sizes[i] == 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2096       for (k=0; k<count; k++) {
2097         aa[0] =  zero;
2098         aa   += bs;
2099       }
2100       if (diag != (PetscScalar)0.0) {
2101         PetscCall((*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES));
2102       }
2103     }
2104   }
2105 
2106   PetscCall(PetscFree2(rows,sizes));
2107   PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY));
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2112 {
2113   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2114   PetscInt          i,j,k,count;
2115   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2116   PetscScalar       zero = 0.0;
2117   MatScalar         *aa;
2118   const PetscScalar *xx;
2119   PetscScalar       *bb;
2120   PetscBool         *zeroed,vecs = PETSC_FALSE;
2121 
2122   PetscFunctionBegin;
2123   /* fix right hand side if needed */
2124   if (x && b) {
2125     PetscCall(VecGetArrayRead(x,&xx));
2126     PetscCall(VecGetArray(b,&bb));
2127     vecs = PETSC_TRUE;
2128   }
2129 
2130   /* zero the columns */
2131   PetscCall(PetscCalloc1(A->rmap->n,&zeroed));
2132   for (i=0; i<is_n; i++) {
2133     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",is_idx[i]);
2134     zeroed[is_idx[i]] = PETSC_TRUE;
2135   }
2136   for (i=0; i<A->rmap->N; i++) {
2137     if (!zeroed[i]) {
2138       row = i/bs;
2139       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2140         for (k=0; k<bs; k++) {
2141           col = bs*baij->j[j] + k;
2142           if (zeroed[col]) {
2143             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2144             if (vecs) bb[i] -= aa[0]*xx[col];
2145             aa[0] = 0.0;
2146           }
2147         }
2148       }
2149     } else if (vecs) bb[i] = diag*xx[i];
2150   }
2151   PetscCall(PetscFree(zeroed));
2152   if (vecs) {
2153     PetscCall(VecRestoreArrayRead(x,&xx));
2154     PetscCall(VecRestoreArray(b,&bb));
2155   }
2156 
2157   /* zero the rows */
2158   for (i=0; i<is_n; i++) {
2159     row   = is_idx[i];
2160     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2161     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2162     for (k=0; k<count; k++) {
2163       aa[0] =  zero;
2164       aa   += bs;
2165     }
2166     if (diag != (PetscScalar)0.0) {
2167       PetscCall((*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES));
2168     }
2169   }
2170   PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY));
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2175 {
2176   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2177   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2178   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2179   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2180   PetscInt       ridx,cidx,bs2=a->bs2;
2181   PetscBool      roworiented=a->roworiented;
2182   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2183 
2184   PetscFunctionBegin;
2185   for (k=0; k<m; k++) { /* loop over added rows */
2186     row  = im[k];
2187     brow = row/bs;
2188     if (row < 0) continue;
2189     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1);
2190     rp   = aj + ai[brow];
2191     if (!A->structure_only) ap = aa + bs2*ai[brow];
2192     rmax = imax[brow];
2193     nrow = ailen[brow];
2194     low  = 0;
2195     high = nrow;
2196     for (l=0; l<n; l++) { /* loop over added columns */
2197       if (in[l] < 0) continue;
2198       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1);
2199       col  = in[l]; bcol = col/bs;
2200       ridx = row % bs; cidx = col % bs;
2201       if (!A->structure_only) {
2202         if (roworiented) {
2203           value = v[l + k*n];
2204         } else {
2205           value = v[k + l*m];
2206         }
2207       }
2208       if (col <= lastcol) low = 0; else high = nrow;
2209       lastcol = col;
2210       while (high-low > 7) {
2211         t = (low+high)/2;
2212         if (rp[t] > bcol) high = t;
2213         else              low  = t;
2214       }
2215       for (i=low; i<high; i++) {
2216         if (rp[i] > bcol) break;
2217         if (rp[i] == bcol) {
2218           bap = ap +  bs2*i + bs*cidx + ridx;
2219           if (!A->structure_only) {
2220             if (is == ADD_VALUES) *bap += value;
2221             else                  *bap  = value;
2222           }
2223           goto noinsert1;
2224         }
2225       }
2226       if (nonew == 1) goto noinsert1;
2227       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2228       if (A->structure_only) {
2229         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2230       } else {
2231         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2232       }
2233       N = nrow++ - 1; high++;
2234       /* shift up all the later entries in this row */
2235       PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
2236       rp[i] = bcol;
2237       if (!A->structure_only) {
2238         PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
2239         PetscCall(PetscArrayzero(ap+bs2*i,bs2));
2240         ap[bs2*i + bs*cidx + ridx] = value;
2241       }
2242       a->nz++;
2243       A->nonzerostate++;
2244 noinsert1:;
2245       low = i;
2246     }
2247     ailen[brow] = nrow;
2248   }
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2253 {
2254   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2255   Mat            outA;
2256   PetscBool      row_identity,col_identity;
2257 
2258   PetscFunctionBegin;
2259   PetscCheck(info->levels == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2260   PetscCall(ISIdentity(row,&row_identity));
2261   PetscCall(ISIdentity(col,&col_identity));
2262   PetscCheck(row_identity && col_identity,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2263 
2264   outA            = inA;
2265   inA->factortype = MAT_FACTOR_LU;
2266   PetscCall(PetscFree(inA->solvertype));
2267   PetscCall(PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype));
2268 
2269   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2270 
2271   PetscCall(PetscObjectReference((PetscObject)row));
2272   PetscCall(ISDestroy(&a->row));
2273   a->row = row;
2274   PetscCall(PetscObjectReference((PetscObject)col));
2275   PetscCall(ISDestroy(&a->col));
2276   a->col = col;
2277 
2278   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2279   PetscCall(ISDestroy(&a->icol));
2280   PetscCall(ISInvertPermutation(col,PETSC_DECIDE,&a->icol));
2281   PetscCall(PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol));
2282 
2283   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity)));
2284   if (!a->solve_work) {
2285     PetscCall(PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work));
2286     PetscCall(PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar)));
2287   }
2288   PetscCall(MatLUFactorNumeric(outA,inA,info));
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2293 {
2294   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2295   PetscInt    i,nz,mbs;
2296 
2297   PetscFunctionBegin;
2298   nz  = baij->maxnz;
2299   mbs = baij->mbs;
2300   for (i=0; i<nz; i++) {
2301     baij->j[i] = indices[i];
2302   }
2303   baij->nz = nz;
2304   for (i=0; i<mbs; i++) {
2305     baij->ilen[i] = baij->imax[i];
2306   }
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 /*@
2311     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2312        in the matrix.
2313 
2314   Input Parameters:
2315 +  mat - the SeqBAIJ matrix
2316 -  indices - the column indices
2317 
2318   Level: advanced
2319 
2320   Notes:
2321     This can be called if you have precomputed the nonzero structure of the
2322   matrix and want to provide it to the matrix object to improve the performance
2323   of the MatSetValues() operation.
2324 
2325     You MUST have set the correct numbers of nonzeros per row in the call to
2326   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2327 
2328     MUST be called before any calls to MatSetValues();
2329 
2330 @*/
2331 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2332 {
2333   PetscFunctionBegin;
2334   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2335   PetscValidIntPointer(indices,2);
2336   PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2341 {
2342   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2343   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2344   PetscReal      atmp;
2345   PetscScalar    *x,zero = 0.0;
2346   MatScalar      *aa;
2347   PetscInt       ncols,brow,krow,kcol;
2348 
2349   PetscFunctionBegin;
2350   /* why is this not a macro???????????????????????????????????????????????????????????????? */
2351   PetscCheck(!A->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2352   bs  = A->rmap->bs;
2353   aa  = a->a;
2354   ai  = a->i;
2355   aj  = a->j;
2356   mbs = a->mbs;
2357 
2358   PetscCall(VecSet(v,zero));
2359   PetscCall(VecGetArray(v,&x));
2360   PetscCall(VecGetLocalSize(v,&n));
2361   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2362   for (i=0; i<mbs; i++) {
2363     ncols = ai[1] - ai[0]; ai++;
2364     brow  = bs*i;
2365     for (j=0; j<ncols; j++) {
2366       for (kcol=0; kcol<bs; kcol++) {
2367         for (krow=0; krow<bs; krow++) {
2368           atmp = PetscAbsScalar(*aa);aa++;
2369           row  = brow + krow;   /* row index */
2370           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2371         }
2372       }
2373       aj++;
2374     }
2375   }
2376   PetscCall(VecRestoreArray(v,&x));
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2381 {
2382   PetscFunctionBegin;
2383   /* If the two matrices have the same copy implementation, use fast copy. */
2384   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2385     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2386     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2387     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2388 
2389     PetscCheck(a->i[ambs] == b->i[bmbs],PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",a->i[ambs],b->i[bmbs]);
2390     PetscCheck(abs == bbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",abs,bbs);
2391     PetscCall(PetscArraycpy(b->a,a->a,bs2*a->i[ambs]));
2392     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2393   } else {
2394     PetscCall(MatCopy_Basic(A,B,str));
2395   }
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2400 {
2401   PetscFunctionBegin;
2402   PetscCall(MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL));
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2407 {
2408   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2409 
2410   PetscFunctionBegin;
2411   *array = a->a;
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2416 {
2417   PetscFunctionBegin;
2418   *array = NULL;
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2423 {
2424   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2425   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2426   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2427 
2428   PetscFunctionBegin;
2429   /* Set the number of nonzeros in the new matrix */
2430   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz));
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2435 {
2436   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2437   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2438   PetscBLASInt   one=1;
2439 
2440   PetscFunctionBegin;
2441   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2442     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2443     if (e) {
2444       PetscCall(PetscArraycmp(x->i,y->i,x->mbs+1,&e));
2445       if (e) {
2446         PetscCall(PetscArraycmp(x->j,y->j,x->i[x->mbs],&e));
2447         if (e) str = SAME_NONZERO_PATTERN;
2448       }
2449     }
2450     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"MatStructure is not SAME_NONZERO_PATTERN");
2451   }
2452   if (str == SAME_NONZERO_PATTERN) {
2453     PetscScalar  alpha = a;
2454     PetscBLASInt bnz;
2455     PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz));
2456     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2457     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2458   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2459     PetscCall(MatAXPY_Basic(Y,a,X,str));
2460   } else {
2461     Mat      B;
2462     PetscInt *nnz;
2463     PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2464     PetscCall(PetscMalloc1(Y->rmap->N,&nnz));
2465     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2466     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2467     PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N));
2468     PetscCall(MatSetBlockSizesFromMats(B,Y,Y));
2469     PetscCall(MatSetType(B,(MatType) ((PetscObject)Y)->type_name));
2470     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz));
2471     PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz));
2472     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2473     PetscCall(MatHeaderMerge(Y,&B));
2474     PetscCall(PetscFree(nnz));
2475   }
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2480 {
2481 #if defined(PETSC_USE_COMPLEX)
2482   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2483   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2484   MatScalar   *aa = a->a;
2485 
2486   PetscFunctionBegin;
2487   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2488 #else
2489   PetscFunctionBegin;
2490 #endif
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2495 {
2496   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2497   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2498   MatScalar   *aa = a->a;
2499 
2500   PetscFunctionBegin;
2501   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2506 {
2507   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2508   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2509   MatScalar   *aa = a->a;
2510 
2511   PetscFunctionBegin;
2512   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 /*
2517     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2518 */
2519 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2520 {
2521   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2522   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2523   PetscInt       nz = a->i[m],row,*jj,mr,col;
2524 
2525   PetscFunctionBegin;
2526   *nn = n;
2527   if (!ia) PetscFunctionReturn(0);
2528   PetscCheck(!symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2529   else {
2530     PetscCall(PetscCalloc1(n,&collengths));
2531     PetscCall(PetscMalloc1(n+1,&cia));
2532     PetscCall(PetscMalloc1(nz,&cja));
2533     jj   = a->j;
2534     for (i=0; i<nz; i++) {
2535       collengths[jj[i]]++;
2536     }
2537     cia[0] = oshift;
2538     for (i=0; i<n; i++) {
2539       cia[i+1] = cia[i] + collengths[i];
2540     }
2541     PetscCall(PetscArrayzero(collengths,n));
2542     jj   = a->j;
2543     for (row=0; row<m; row++) {
2544       mr = a->i[row+1] - a->i[row];
2545       for (i=0; i<mr; i++) {
2546         col = *jj++;
2547 
2548         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2549       }
2550     }
2551     PetscCall(PetscFree(collengths));
2552     *ia  = cia; *ja = cja;
2553   }
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2558 {
2559   PetscFunctionBegin;
2560   if (!ia) PetscFunctionReturn(0);
2561   PetscCall(PetscFree(*ia));
2562   PetscCall(PetscFree(*ja));
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 /*
2567  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2568  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2569  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2570  */
2571 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2572 {
2573   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2574   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2575   PetscInt       nz = a->i[m],row,*jj,mr,col;
2576   PetscInt       *cspidx;
2577 
2578   PetscFunctionBegin;
2579   *nn = n;
2580   if (!ia) PetscFunctionReturn(0);
2581 
2582   PetscCall(PetscCalloc1(n,&collengths));
2583   PetscCall(PetscMalloc1(n+1,&cia));
2584   PetscCall(PetscMalloc1(nz,&cja));
2585   PetscCall(PetscMalloc1(nz,&cspidx));
2586   jj   = a->j;
2587   for (i=0; i<nz; i++) {
2588     collengths[jj[i]]++;
2589   }
2590   cia[0] = oshift;
2591   for (i=0; i<n; i++) {
2592     cia[i+1] = cia[i] + collengths[i];
2593   }
2594   PetscCall(PetscArrayzero(collengths,n));
2595   jj   = a->j;
2596   for (row=0; row<m; row++) {
2597     mr = a->i[row+1] - a->i[row];
2598     for (i=0; i<mr; i++) {
2599       col = *jj++;
2600       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2601       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2602     }
2603   }
2604   PetscCall(PetscFree(collengths));
2605   *ia    = cia;
2606   *ja    = cja;
2607   *spidx = cspidx;
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2612 {
2613   PetscFunctionBegin;
2614   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done));
2615   PetscCall(PetscFree(*spidx));
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2620 {
2621   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2622 
2623   PetscFunctionBegin;
2624   if (!Y->preallocated || !aij->nz) {
2625     PetscCall(MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL));
2626   }
2627   PetscCall(MatShift_Basic(Y,a));
2628   PetscFunctionReturn(0);
2629 }
2630 
2631 /* -------------------------------------------------------------------*/
2632 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2633                                        MatGetRow_SeqBAIJ,
2634                                        MatRestoreRow_SeqBAIJ,
2635                                        MatMult_SeqBAIJ_N,
2636                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2637                                        MatMultTranspose_SeqBAIJ,
2638                                        MatMultTransposeAdd_SeqBAIJ,
2639                                        NULL,
2640                                        NULL,
2641                                        NULL,
2642                                /* 10*/ NULL,
2643                                        MatLUFactor_SeqBAIJ,
2644                                        NULL,
2645                                        NULL,
2646                                        MatTranspose_SeqBAIJ,
2647                                /* 15*/ MatGetInfo_SeqBAIJ,
2648                                        MatEqual_SeqBAIJ,
2649                                        MatGetDiagonal_SeqBAIJ,
2650                                        MatDiagonalScale_SeqBAIJ,
2651                                        MatNorm_SeqBAIJ,
2652                                /* 20*/ NULL,
2653                                        MatAssemblyEnd_SeqBAIJ,
2654                                        MatSetOption_SeqBAIJ,
2655                                        MatZeroEntries_SeqBAIJ,
2656                                /* 24*/ MatZeroRows_SeqBAIJ,
2657                                        NULL,
2658                                        NULL,
2659                                        NULL,
2660                                        NULL,
2661                                /* 29*/ MatSetUp_SeqBAIJ,
2662                                        NULL,
2663                                        NULL,
2664                                        NULL,
2665                                        NULL,
2666                                /* 34*/ MatDuplicate_SeqBAIJ,
2667                                        NULL,
2668                                        NULL,
2669                                        MatILUFactor_SeqBAIJ,
2670                                        NULL,
2671                                /* 39*/ MatAXPY_SeqBAIJ,
2672                                        MatCreateSubMatrices_SeqBAIJ,
2673                                        MatIncreaseOverlap_SeqBAIJ,
2674                                        MatGetValues_SeqBAIJ,
2675                                        MatCopy_SeqBAIJ,
2676                                /* 44*/ NULL,
2677                                        MatScale_SeqBAIJ,
2678                                        MatShift_SeqBAIJ,
2679                                        NULL,
2680                                        MatZeroRowsColumns_SeqBAIJ,
2681                                /* 49*/ NULL,
2682                                        MatGetRowIJ_SeqBAIJ,
2683                                        MatRestoreRowIJ_SeqBAIJ,
2684                                        MatGetColumnIJ_SeqBAIJ,
2685                                        MatRestoreColumnIJ_SeqBAIJ,
2686                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2687                                        NULL,
2688                                        NULL,
2689                                        NULL,
2690                                        MatSetValuesBlocked_SeqBAIJ,
2691                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2692                                        MatDestroy_SeqBAIJ,
2693                                        MatView_SeqBAIJ,
2694                                        NULL,
2695                                        NULL,
2696                                /* 64*/ NULL,
2697                                        NULL,
2698                                        NULL,
2699                                        NULL,
2700                                        NULL,
2701                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2702                                        NULL,
2703                                        MatConvert_Basic,
2704                                        NULL,
2705                                        NULL,
2706                                /* 74*/ NULL,
2707                                        MatFDColoringApply_BAIJ,
2708                                        NULL,
2709                                        NULL,
2710                                        NULL,
2711                                /* 79*/ NULL,
2712                                        NULL,
2713                                        NULL,
2714                                        NULL,
2715                                        MatLoad_SeqBAIJ,
2716                                /* 84*/ NULL,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                        NULL,
2721                                /* 89*/ NULL,
2722                                        NULL,
2723                                        NULL,
2724                                        NULL,
2725                                        NULL,
2726                                /* 94*/ NULL,
2727                                        NULL,
2728                                        NULL,
2729                                        NULL,
2730                                        NULL,
2731                                /* 99*/ NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        MatConjugate_SeqBAIJ,
2735                                        NULL,
2736                                /*104*/ NULL,
2737                                        MatRealPart_SeqBAIJ,
2738                                        MatImaginaryPart_SeqBAIJ,
2739                                        NULL,
2740                                        NULL,
2741                                /*109*/ NULL,
2742                                        NULL,
2743                                        NULL,
2744                                        NULL,
2745                                        MatMissingDiagonal_SeqBAIJ,
2746                                /*114*/ NULL,
2747                                        NULL,
2748                                        NULL,
2749                                        NULL,
2750                                        NULL,
2751                                /*119*/ NULL,
2752                                        NULL,
2753                                        MatMultHermitianTranspose_SeqBAIJ,
2754                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2755                                        NULL,
2756                                /*124*/ NULL,
2757                                        MatGetColumnReductions_SeqBAIJ,
2758                                        MatInvertBlockDiagonal_SeqBAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                /*129*/ NULL,
2762                                        NULL,
2763                                        NULL,
2764                                        NULL,
2765                                        NULL,
2766                                /*134*/ NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        NULL,
2770                                        NULL,
2771                                /*139*/ MatSetBlockSizes_Default,
2772                                        NULL,
2773                                        NULL,
2774                                        MatFDColoringSetUp_SeqXAIJ,
2775                                        NULL,
2776                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2777                                        MatDestroySubMatrices_SeqBAIJ,
2778                                        NULL,
2779                                        NULL
2780 };
2781 
2782 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2783 {
2784   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2785   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2786 
2787   PetscFunctionBegin;
2788   PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2789 
2790   /* allocate space for values if not already there */
2791   if (!aij->saved_values) {
2792     PetscCall(PetscMalloc1(nz+1,&aij->saved_values));
2793     PetscCall(PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar)));
2794   }
2795 
2796   /* copy values over */
2797   PetscCall(PetscArraycpy(aij->saved_values,aij->a,nz));
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2802 {
2803   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2804   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2805 
2806   PetscFunctionBegin;
2807   PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2808   PetscCheck(aij->saved_values,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2809 
2810   /* copy values over */
2811   PetscCall(PetscArraycpy(aij->a,aij->saved_values,nz));
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2816 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2817 
2818 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2819 {
2820   Mat_SeqBAIJ    *b;
2821   PetscInt       i,mbs,nbs,bs2;
2822   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2823 
2824   PetscFunctionBegin;
2825   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2826   if (nz == MAT_SKIP_ALLOCATION) {
2827     skipallocation = PETSC_TRUE;
2828     nz             = 0;
2829   }
2830 
2831   PetscCall(MatSetBlockSize(B,PetscAbs(bs)));
2832   PetscCall(PetscLayoutSetUp(B->rmap));
2833   PetscCall(PetscLayoutSetUp(B->cmap));
2834   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2835 
2836   B->preallocated = PETSC_TRUE;
2837 
2838   mbs = B->rmap->n/bs;
2839   nbs = B->cmap->n/bs;
2840   bs2 = bs*bs;
2841 
2842   PetscCheck(mbs*bs==B->rmap->n && nbs*bs==B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,B->cmap->n,bs);
2843 
2844   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2845   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz);
2846   if (nnz) {
2847     for (i=0; i<mbs; i++) {
2848       PetscCheck(nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]);
2849       PetscCheck(nnz[i] <= nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],nbs);
2850     }
2851   }
2852 
2853   b    = (Mat_SeqBAIJ*)B->data;
2854   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2855   PetscCall(PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL));
2856   PetscOptionsEnd();
2857 
2858   if (!flg) {
2859     switch (bs) {
2860     case 1:
2861       B->ops->mult    = MatMult_SeqBAIJ_1;
2862       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2863       break;
2864     case 2:
2865       B->ops->mult    = MatMult_SeqBAIJ_2;
2866       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2867       break;
2868     case 3:
2869       B->ops->mult    = MatMult_SeqBAIJ_3;
2870       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2871       break;
2872     case 4:
2873       B->ops->mult    = MatMult_SeqBAIJ_4;
2874       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2875       break;
2876     case 5:
2877       B->ops->mult    = MatMult_SeqBAIJ_5;
2878       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2879       break;
2880     case 6:
2881       B->ops->mult    = MatMult_SeqBAIJ_6;
2882       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2883       break;
2884     case 7:
2885       B->ops->mult    = MatMult_SeqBAIJ_7;
2886       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2887       break;
2888     case 9:
2889     {
2890       PetscInt version = 1;
2891       PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL));
2892       switch (version) {
2893 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2894       case 1:
2895         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2896         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2897         PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2898         break;
2899 #endif
2900       default:
2901         B->ops->mult    = MatMult_SeqBAIJ_N;
2902         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2903         PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2904         break;
2905       }
2906       break;
2907     }
2908     case 11:
2909       B->ops->mult    = MatMult_SeqBAIJ_11;
2910       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2911       break;
2912     case 12:
2913     {
2914       PetscInt version = 1;
2915       PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL));
2916       switch (version) {
2917       case 1:
2918         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
2919         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2920         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2921         break;
2922       case 2:
2923         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
2924         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2925         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2926         break;
2927 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2928       case 3:
2929         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
2930         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2931         PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2932         break;
2933 #endif
2934       default:
2935         B->ops->mult    = MatMult_SeqBAIJ_N;
2936         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2937         PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2938         break;
2939       }
2940       break;
2941     }
2942     case 15:
2943     {
2944       PetscInt version = 1;
2945       PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL));
2946       switch (version) {
2947       case 1:
2948         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2949         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2950         break;
2951       case 2:
2952         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
2953         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2954         break;
2955       case 3:
2956         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
2957         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2958         break;
2959       case 4:
2960         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
2961         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2962         break;
2963       default:
2964         B->ops->mult    = MatMult_SeqBAIJ_N;
2965         PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2966         break;
2967       }
2968       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2969       break;
2970     }
2971     default:
2972       B->ops->mult    = MatMult_SeqBAIJ_N;
2973       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2974       PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2975       break;
2976     }
2977   }
2978   B->ops->sor = MatSOR_SeqBAIJ;
2979   b->mbs = mbs;
2980   b->nbs = nbs;
2981   if (!skipallocation) {
2982     if (!b->imax) {
2983       PetscCall(PetscMalloc2(mbs,&b->imax,mbs,&b->ilen));
2984       PetscCall(PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt)));
2985 
2986       b->free_imax_ilen = PETSC_TRUE;
2987     }
2988     /* b->ilen will count nonzeros in each block row so far. */
2989     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2990     if (!nnz) {
2991       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2992       else if (nz < 0) nz = 1;
2993       nz = PetscMin(nz,nbs);
2994       for (i=0; i<mbs; i++) b->imax[i] = nz;
2995       PetscCall(PetscIntMultError(nz,mbs,&nz));
2996     } else {
2997       PetscInt64 nz64 = 0;
2998       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2999       PetscCall(PetscIntCast(nz64,&nz));
3000     }
3001 
3002     /* allocate the matrix space */
3003     PetscCall(MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i));
3004     if (B->structure_only) {
3005       PetscCall(PetscMalloc1(nz,&b->j));
3006       PetscCall(PetscMalloc1(B->rmap->N+1,&b->i));
3007       PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt)));
3008     } else {
3009       PetscInt nzbs2 = 0;
3010       PetscCall(PetscIntMultError(nz,bs2,&nzbs2));
3011       PetscCall(PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i));
3012       PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))));
3013       PetscCall(PetscArrayzero(b->a,nz*bs2));
3014     }
3015     PetscCall(PetscArrayzero(b->j,nz));
3016 
3017     if (B->structure_only) {
3018       b->singlemalloc = PETSC_FALSE;
3019       b->free_a       = PETSC_FALSE;
3020     } else {
3021       b->singlemalloc = PETSC_TRUE;
3022       b->free_a       = PETSC_TRUE;
3023     }
3024     b->free_ij = PETSC_TRUE;
3025 
3026     b->i[0] = 0;
3027     for (i=1; i<mbs+1; i++) {
3028       b->i[i] = b->i[i-1] + b->imax[i-1];
3029     }
3030 
3031   } else {
3032     b->free_a  = PETSC_FALSE;
3033     b->free_ij = PETSC_FALSE;
3034   }
3035 
3036   b->bs2              = bs2;
3037   b->mbs              = mbs;
3038   b->nz               = 0;
3039   b->maxnz            = nz;
3040   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3041   B->was_assembled    = PETSC_FALSE;
3042   B->assembled        = PETSC_FALSE;
3043   if (realalloc) PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
3044   PetscFunctionReturn(0);
3045 }
3046 
3047 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3048 {
3049   PetscInt       i,m,nz,nz_max=0,*nnz;
3050   PetscScalar    *values=NULL;
3051   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3052 
3053   PetscFunctionBegin;
3054   PetscCheck(bs >= 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs);
3055   PetscCall(PetscLayoutSetBlockSize(B->rmap,bs));
3056   PetscCall(PetscLayoutSetBlockSize(B->cmap,bs));
3057   PetscCall(PetscLayoutSetUp(B->rmap));
3058   PetscCall(PetscLayoutSetUp(B->cmap));
3059   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
3060   m    = B->rmap->n/bs;
3061 
3062   PetscCheck(ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
3063   PetscCall(PetscMalloc1(m+1, &nnz));
3064   for (i=0; i<m; i++) {
3065     nz = ii[i+1]- ii[i];
3066     PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
3067     nz_max = PetscMax(nz_max, nz);
3068     nnz[i] = nz;
3069   }
3070   PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz));
3071   PetscCall(PetscFree(nnz));
3072 
3073   values = (PetscScalar*)V;
3074   if (!values) {
3075     PetscCall(PetscCalloc1(bs*bs*(nz_max+1),&values));
3076   }
3077   for (i=0; i<m; i++) {
3078     PetscInt          ncols  = ii[i+1] - ii[i];
3079     const PetscInt    *icols = jj + ii[i];
3080     if (bs == 1 || !roworiented) {
3081       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3082       PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES));
3083     } else {
3084       PetscInt j;
3085       for (j=0; j<ncols; j++) {
3086         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3087         PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES));
3088       }
3089     }
3090   }
3091   if (!V) PetscCall(PetscFree(values));
3092   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3093   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3094   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3095   PetscFunctionReturn(0);
3096 }
3097 
3098 /*@C
3099    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3100 
3101    Not Collective
3102 
3103    Input Parameter:
3104 .  mat - a MATSEQBAIJ matrix
3105 
3106    Output Parameter:
3107 .   array - pointer to the data
3108 
3109    Level: intermediate
3110 
3111 .seealso: `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3112 @*/
3113 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3114 {
3115   PetscFunctionBegin;
3116   PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));
3117   PetscFunctionReturn(0);
3118 }
3119 
3120 /*@C
3121    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3122 
3123    Not Collective
3124 
3125    Input Parameters:
3126 +  mat - a MATSEQBAIJ matrix
3127 -  array - pointer to the data
3128 
3129    Level: intermediate
3130 
3131 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3132 @*/
3133 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3134 {
3135   PetscFunctionBegin;
3136   PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));
3137   PetscFunctionReturn(0);
3138 }
3139 
3140 /*MC
3141    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3142    block sparse compressed row format.
3143 
3144    Options Database Keys:
3145 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3146 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3147 
3148    Level: beginner
3149 
3150    Notes:
3151     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3152     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3153 
3154    Run with -info to see what version of the matrix-vector product is being used
3155 
3156 .seealso: `MatCreateSeqBAIJ()`
3157 M*/
3158 
3159 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3160 
3161 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3162 {
3163   PetscMPIInt    size;
3164   Mat_SeqBAIJ    *b;
3165 
3166   PetscFunctionBegin;
3167   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
3168   PetscCheck(size == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3169 
3170   PetscCall(PetscNewLog(B,&b));
3171   B->data = (void*)b;
3172   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
3173 
3174   b->row          = NULL;
3175   b->col          = NULL;
3176   b->icol         = NULL;
3177   b->reallocs     = 0;
3178   b->saved_values = NULL;
3179 
3180   b->roworiented        = PETSC_TRUE;
3181   b->nonew              = 0;
3182   b->diag               = NULL;
3183   B->spptr              = NULL;
3184   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3185   b->keepnonzeropattern = PETSC_FALSE;
3186 
3187   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ));
3188   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ));
3189   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ));
3190   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ));
3191   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ));
3192   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ));
3193   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ));
3194   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ));
3195   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3196   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ));
3197 #if defined(PETSC_HAVE_HYPRE)
3198   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE));
3199 #endif
3200   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS));
3201   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ));
3202   PetscFunctionReturn(0);
3203 }
3204 
3205 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3206 {
3207   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3208   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3209 
3210   PetscFunctionBegin;
3211   PetscCheck(a->i[mbs] == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3212 
3213   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3214     c->imax           = a->imax;
3215     c->ilen           = a->ilen;
3216     c->free_imax_ilen = PETSC_FALSE;
3217   } else {
3218     PetscCall(PetscMalloc2(mbs,&c->imax,mbs,&c->ilen));
3219     PetscCall(PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt)));
3220     for (i=0; i<mbs; i++) {
3221       c->imax[i] = a->imax[i];
3222       c->ilen[i] = a->ilen[i];
3223     }
3224     c->free_imax_ilen = PETSC_TRUE;
3225   }
3226 
3227   /* allocate the matrix space */
3228   if (mallocmatspace) {
3229     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3230       PetscCall(PetscCalloc1(bs2*nz,&c->a));
3231       PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar)));
3232 
3233       c->i            = a->i;
3234       c->j            = a->j;
3235       c->singlemalloc = PETSC_FALSE;
3236       c->free_a       = PETSC_TRUE;
3237       c->free_ij      = PETSC_FALSE;
3238       c->parent       = A;
3239       C->preallocated = PETSC_TRUE;
3240       C->assembled    = PETSC_TRUE;
3241 
3242       PetscCall(PetscObjectReference((PetscObject)A));
3243       PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3244       PetscCall(MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3245     } else {
3246       PetscCall(PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i));
3247       PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt)));
3248 
3249       c->singlemalloc = PETSC_TRUE;
3250       c->free_a       = PETSC_TRUE;
3251       c->free_ij      = PETSC_TRUE;
3252 
3253       PetscCall(PetscArraycpy(c->i,a->i,mbs+1));
3254       if (mbs > 0) {
3255         PetscCall(PetscArraycpy(c->j,a->j,nz));
3256         if (cpvalues == MAT_COPY_VALUES) {
3257           PetscCall(PetscArraycpy(c->a,a->a,bs2*nz));
3258         } else {
3259           PetscCall(PetscArrayzero(c->a,bs2*nz));
3260         }
3261       }
3262       C->preallocated = PETSC_TRUE;
3263       C->assembled    = PETSC_TRUE;
3264     }
3265   }
3266 
3267   c->roworiented = a->roworiented;
3268   c->nonew       = a->nonew;
3269 
3270   PetscCall(PetscLayoutReference(A->rmap,&C->rmap));
3271   PetscCall(PetscLayoutReference(A->cmap,&C->cmap));
3272 
3273   c->bs2         = a->bs2;
3274   c->mbs         = a->mbs;
3275   c->nbs         = a->nbs;
3276 
3277   if (a->diag) {
3278     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3279       c->diag      = a->diag;
3280       c->free_diag = PETSC_FALSE;
3281     } else {
3282       PetscCall(PetscMalloc1(mbs+1,&c->diag));
3283       PetscCall(PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt)));
3284       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3285       c->free_diag = PETSC_TRUE;
3286     }
3287   } else c->diag = NULL;
3288 
3289   c->nz         = a->nz;
3290   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3291   c->solve_work = NULL;
3292   c->mult_work  = NULL;
3293   c->sor_workt  = NULL;
3294   c->sor_work   = NULL;
3295 
3296   c->compressedrow.use   = a->compressedrow.use;
3297   c->compressedrow.nrows = a->compressedrow.nrows;
3298   if (a->compressedrow.use) {
3299     i    = a->compressedrow.nrows;
3300     PetscCall(PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex));
3301     PetscCall(PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt)));
3302     PetscCall(PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1));
3303     PetscCall(PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i));
3304   } else {
3305     c->compressedrow.use    = PETSC_FALSE;
3306     c->compressedrow.i      = NULL;
3307     c->compressedrow.rindex = NULL;
3308   }
3309   C->nonzerostate = A->nonzerostate;
3310 
3311   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist));
3312   PetscFunctionReturn(0);
3313 }
3314 
3315 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3316 {
3317   PetscFunctionBegin;
3318   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),B));
3319   PetscCall(MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n));
3320   PetscCall(MatSetType(*B,MATSEQBAIJ));
3321   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE));
3322   PetscFunctionReturn(0);
3323 }
3324 
3325 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3326 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3327 {
3328   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3329   PetscInt       *rowidxs,*colidxs;
3330   PetscScalar    *matvals;
3331 
3332   PetscFunctionBegin;
3333   PetscCall(PetscViewerSetUp(viewer));
3334 
3335   /* read matrix header */
3336   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3337   PetscCheck(header[0] == MAT_FILE_CLASSID,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3338   M = header[1]; N = header[2]; nz = header[3];
3339   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3340   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3341   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3342 
3343   /* set block sizes from the viewer's .info file */
3344   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3345   /* set local and global sizes if not set already */
3346   if (mat->rmap->n < 0) mat->rmap->n = M;
3347   if (mat->cmap->n < 0) mat->cmap->n = N;
3348   if (mat->rmap->N < 0) mat->rmap->N = M;
3349   if (mat->cmap->N < 0) mat->cmap->N = N;
3350   PetscCall(PetscLayoutSetUp(mat->rmap));
3351   PetscCall(PetscLayoutSetUp(mat->cmap));
3352 
3353   /* check if the matrix sizes are correct */
3354   PetscCall(MatGetSize(mat,&rows,&cols));
3355   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3356   PetscCall(MatGetBlockSize(mat,&bs));
3357   PetscCall(MatGetLocalSize(mat,&m,&n));
3358   mbs = m/bs; nbs = n/bs;
3359 
3360   /* read in row lengths, column indices and nonzero values */
3361   PetscCall(PetscMalloc1(m+1,&rowidxs));
3362   PetscCall(PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT));
3363   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3364   sum = rowidxs[m];
3365   PetscCheck(sum == nz,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3366 
3367   /* read in column indices and nonzero values */
3368   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals));
3369   PetscCall(PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT));
3370   PetscCall(PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR));
3371 
3372   { /* preallocate matrix storage */
3373     PetscBT   bt; /* helper bit set to count nonzeros */
3374     PetscInt  *nnz;
3375     PetscBool sbaij;
3376 
3377     PetscCall(PetscBTCreate(nbs,&bt));
3378     PetscCall(PetscCalloc1(mbs,&nnz));
3379     PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij));
3380     for (i=0; i<mbs; i++) {
3381       PetscCall(PetscBTMemzero(nbs,bt));
3382       for (k=0; k<bs; k++) {
3383         PetscInt row = bs*i + k;
3384         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3385           PetscInt col = colidxs[j];
3386           if (!sbaij || col >= row)
3387             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3388         }
3389       }
3390     }
3391     PetscCall(PetscBTDestroy(&bt));
3392     PetscCall(MatSeqBAIJSetPreallocation(mat,bs,0,nnz));
3393     PetscCall(MatSeqSBAIJSetPreallocation(mat,bs,0,nnz));
3394     PetscCall(PetscFree(nnz));
3395   }
3396 
3397   /* store matrix values */
3398   for (i=0; i<m; i++) {
3399     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3400     PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES));
3401   }
3402 
3403   PetscCall(PetscFree(rowidxs));
3404   PetscCall(PetscFree2(colidxs,matvals));
3405   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
3406   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
3407   PetscFunctionReturn(0);
3408 }
3409 
3410 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3411 {
3412   PetscBool isbinary;
3413 
3414   PetscFunctionBegin;
3415   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
3416   PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3417   PetscCall(MatLoad_SeqBAIJ_Binary(mat,viewer));
3418   PetscFunctionReturn(0);
3419 }
3420 
3421 /*@C
3422    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3423    compressed row) format.  For good matrix assembly performance the
3424    user should preallocate the matrix storage by setting the parameter nz
3425    (or the array nnz).  By setting these parameters accurately, performance
3426    during matrix assembly can be increased by more than a factor of 50.
3427 
3428    Collective
3429 
3430    Input Parameters:
3431 +  comm - MPI communicator, set to PETSC_COMM_SELF
3432 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3433           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3434 .  m - number of rows
3435 .  n - number of columns
3436 .  nz - number of nonzero blocks  per block row (same for all rows)
3437 -  nnz - array containing the number of nonzero blocks in the various block rows
3438          (possibly different for each block row) or NULL
3439 
3440    Output Parameter:
3441 .  A - the matrix
3442 
3443    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3444    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3445    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3446 
3447    Options Database Keys:
3448 +   -mat_no_unroll - uses code that does not unroll the loops in the
3449                      block calculations (much slower)
3450 -    -mat_block_size - size of the blocks to use
3451 
3452    Level: intermediate
3453 
3454    Notes:
3455    The number of rows and columns must be divisible by blocksize.
3456 
3457    If the nnz parameter is given then the nz parameter is ignored
3458 
3459    A nonzero block is any block that as 1 or more nonzeros in it
3460 
3461    The block AIJ format is fully compatible with standard Fortran 77
3462    storage.  That is, the stored row and column indices can begin at
3463    either one (as in Fortran) or zero.  See the users' manual for details.
3464 
3465    Specify the preallocated storage with either nz or nnz (not both).
3466    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3467    allocation.  See Users-Manual: ch_mat for details.
3468    matrices.
3469 
3470 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3471 @*/
3472 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3473 {
3474   PetscFunctionBegin;
3475   PetscCall(MatCreate(comm,A));
3476   PetscCall(MatSetSizes(*A,m,n,m,n));
3477   PetscCall(MatSetType(*A,MATSEQBAIJ));
3478   PetscCall(MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz));
3479   PetscFunctionReturn(0);
3480 }
3481 
3482 /*@C
3483    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3484    per row in the matrix. For good matrix assembly performance the
3485    user should preallocate the matrix storage by setting the parameter nz
3486    (or the array nnz).  By setting these parameters accurately, performance
3487    during matrix assembly can be increased by more than a factor of 50.
3488 
3489    Collective
3490 
3491    Input Parameters:
3492 +  B - the matrix
3493 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3494           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3495 .  nz - number of block nonzeros per block row (same for all rows)
3496 -  nnz - array containing the number of block nonzeros in the various block rows
3497          (possibly different for each block row) or NULL
3498 
3499    Options Database Keys:
3500 +   -mat_no_unroll - uses code that does not unroll the loops in the
3501                      block calculations (much slower)
3502 -   -mat_block_size - size of the blocks to use
3503 
3504    Level: intermediate
3505 
3506    Notes:
3507    If the nnz parameter is given then the nz parameter is ignored
3508 
3509    You can call MatGetInfo() to get information on how effective the preallocation was;
3510    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3511    You can also run with the option -info and look for messages with the string
3512    malloc in them to see if additional memory allocation was needed.
3513 
3514    The block AIJ format is fully compatible with standard Fortran 77
3515    storage.  That is, the stored row and column indices can begin at
3516    either one (as in Fortran) or zero.  See the users' manual for details.
3517 
3518    Specify the preallocated storage with either nz or nnz (not both).
3519    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3520    allocation.  See Users-Manual: ch_mat for details.
3521 
3522 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3523 @*/
3524 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3525 {
3526   PetscFunctionBegin;
3527   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3528   PetscValidType(B,1);
3529   PetscValidLogicalCollectiveInt(B,bs,2);
3530   PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3531   PetscFunctionReturn(0);
3532 }
3533 
3534 /*@C
3535    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3536 
3537    Collective
3538 
3539    Input Parameters:
3540 +  B - the matrix
3541 .  i - the indices into j for the start of each local row (starts with zero)
3542 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3543 -  v - optional values in the matrix
3544 
3545    Level: advanced
3546 
3547    Notes:
3548    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3549    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3550    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3551    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3552    block column and the second index is over columns within a block.
3553 
3554    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3555 
3556 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3557 @*/
3558 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3559 {
3560   PetscFunctionBegin;
3561   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3562   PetscValidType(B,1);
3563   PetscValidLogicalCollectiveInt(B,bs,2);
3564   PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3565   PetscFunctionReturn(0);
3566 }
3567 
3568 /*@
3569      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3570 
3571      Collective
3572 
3573    Input Parameters:
3574 +  comm - must be an MPI communicator of size 1
3575 .  bs - size of block
3576 .  m - number of rows
3577 .  n - number of columns
3578 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3579 .  j - column indices
3580 -  a - matrix values
3581 
3582    Output Parameter:
3583 .  mat - the matrix
3584 
3585    Level: advanced
3586 
3587    Notes:
3588        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3589     once the matrix is destroyed
3590 
3591        You cannot set new nonzero locations into this matrix, that will generate an error.
3592 
3593        The i and j indices are 0 based
3594 
3595        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3596 
3597       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3598       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3599       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3600       with column-major ordering within blocks.
3601 
3602 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3603 
3604 @*/
3605 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3606 {
3607   PetscInt       ii;
3608   Mat_SeqBAIJ    *baij;
3609 
3610   PetscFunctionBegin;
3611   PetscCheck(bs == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %" PetscInt_FMT " > 1 is not supported yet",bs);
3612   if (m > 0) PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3613 
3614   PetscCall(MatCreate(comm,mat));
3615   PetscCall(MatSetSizes(*mat,m,n,m,n));
3616   PetscCall(MatSetType(*mat,MATSEQBAIJ));
3617   PetscCall(MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL));
3618   baij = (Mat_SeqBAIJ*)(*mat)->data;
3619   PetscCall(PetscMalloc2(m,&baij->imax,m,&baij->ilen));
3620   PetscCall(PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt)));
3621 
3622   baij->i = i;
3623   baij->j = j;
3624   baij->a = a;
3625 
3626   baij->singlemalloc = PETSC_FALSE;
3627   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3628   baij->free_a       = PETSC_FALSE;
3629   baij->free_ij      = PETSC_FALSE;
3630 
3631   for (ii=0; ii<m; ii++) {
3632     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3633     PetscCheck(i[ii+1] - i[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT,ii,i[ii+1] - i[ii]);
3634   }
3635   if (PetscDefined(USE_DEBUG)) {
3636     for (ii=0; ii<baij->i[m]; ii++) {
3637       PetscCheck(j[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]);
3638       PetscCheck(j[ii] <= n - 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]);
3639     }
3640   }
3641 
3642   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3643   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3644   PetscFunctionReturn(0);
3645 }
3646 
3647 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3648 {
3649   PetscMPIInt    size;
3650 
3651   PetscFunctionBegin;
3652   PetscCallMPI(MPI_Comm_size(comm,&size));
3653   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3654     PetscCall(MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN));
3655   } else {
3656     PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat));
3657   }
3658   PetscFunctionReturn(0);
3659 }
3660