xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact2.c (revision 1bb69edbc2f6efc8bcb8b8b24da8e7bcfef7c08b) !
1 #define PETSCMAT_DLL
2 
3 /*
4     Factorization code for SBAIJ format.
5 */
6 
7 #include "src/mat/impls/sbaij/seq/sbaij.h"
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/inline/ilu.h"
10 #include "src/inline/dot.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSolve_SeqSBAIJ_N"
14 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
15 {
16   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
17   IS              isrow=a->row;
18   PetscInt        mbs=a->mbs,*ai=a->i,*aj=a->j;
19   PetscErrorCode  ierr;
20   PetscInt        nz,*vj,k,*r,idx,k1;
21   PetscInt        bs=A->rmap.bs,bs2 = a->bs2;
22   MatScalar       *aa=a->a,*v,*diag;
23   PetscScalar     *x,*xk,*xj,*b,*xk_tmp,*t;
24 
25   PetscFunctionBegin;
26   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
27   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
28   t  = a->solve_work;
29   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
30   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
31 
32   /* solve U^T * D * y = b by forward substitution */
33   xk = t;
34   for (k=0; k<mbs; k++) { /* t <- perm(b) */
35     idx   = bs*r[k];
36     for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
37   }
38   for (k=0; k<mbs; k++){
39     v  = aa + bs2*ai[k];
40     xk = t + k*bs;      /* Dk*xk = k-th block of x */
41     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
42     nz = ai[k+1] - ai[k];
43     vj = aj + ai[k];
44     xj = t + (*vj)*bs;  /* *vj-th block of x, *vj>k */
45     while (nz--) {
46       /* x(:) += U(k,:)^T*(Dk*xk) */
47       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
48       vj++; xj = t + (*vj)*bs;
49       v += bs2;
50     }
51     /* xk = inv(Dk)*(Dk*xk) */
52     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
53     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
54   }
55 
56   /* solve U*x = y by back substitution */
57   for (k=mbs-1; k>=0; k--){
58     v  = aa + bs2*ai[k];
59     xk = t + k*bs;        /* xk */
60     nz = ai[k+1] - ai[k];
61     vj = aj + ai[k];
62     xj = t + (*vj)*bs;
63     while (nz--) {
64       /* xk += U(k,:)*x(:) */
65       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
66       vj++;
67       v += bs2; xj = t + (*vj)*bs;
68     }
69     idx   = bs*r[k];
70     for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
71   }
72 
73   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
74   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
75   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
76   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
77   ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N"
83 PetscErrorCode MatForwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
84 {
85   PetscFunctionBegin;
86   SETERRQ(1,"not implemented yet");
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N"
92 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
93 {
94   PetscFunctionBegin;
95   SETERRQ(1,"not implemented yet");
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private"
101 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
102 {
103   PetscErrorCode ierr;
104   PetscInt       nz,*vj,k;
105   PetscInt       bs2 = bs*bs;
106   MatScalar      *v,*diag;
107   PetscScalar    *xk,*xj,*xk_tmp;
108 
109   PetscFunctionBegin;
110   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
111   for (k=0; k<mbs; k++){
112     v  = aa + bs2*ai[k];
113     xk = x + k*bs;      /* Dk*xk = k-th block of x */
114     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
115     nz = ai[k+1] - ai[k];
116     vj = aj + ai[k];
117     xj = x + (*vj)*bs;  /* *vj-th block of x, *vj>k */
118     while (nz--) {
119       /* x(:) += U(k,:)^T*(Dk*xk) */
120       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
121       vj++; xj = x + (*vj)*bs;
122       v += bs2;
123     }
124     /* xk = inv(Dk)*(Dk*xk) */
125     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
126     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
127   }
128   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private"
134 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
135 {
136   PetscInt       nz,*vj,k;
137   PetscInt       bs2 = bs*bs;
138   MatScalar      *v;
139   PetscScalar    *xk,*xj;
140 
141   PetscFunctionBegin;
142   for (k=mbs-1; k>=0; k--){
143     v  = aa + bs2*ai[k];
144     xk = x + k*bs;        /* xk */
145     nz = ai[k+1] - ai[k];
146     vj = aj + ai[k];
147     xj = x + (*vj)*bs;
148     while (nz--) {
149       /* xk += U(k,:)*x(:) */
150       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
151       vj++;
152       v += bs2; xj = x + (*vj)*bs;
153     }
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 #undef __FUNCT__
159 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering"
160 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
161 {
162   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
163   PetscErrorCode ierr;
164   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
165   PetscInt       bs=A->rmap.bs,bs2 = a->bs2;
166   MatScalar      *aa=a->a;
167   PetscScalar    *x,*b;
168 
169   PetscFunctionBegin;
170   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
171   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
172 
173   /* solve U^T * D * y = b by forward substitution */
174   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
175   ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
176 
177   /* solve U*x = y by back substitution */
178   ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
179 
180   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
181   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
182   ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering"
188 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
189 {
190   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
191   PetscErrorCode ierr;
192   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
193   PetscInt       bs=A->rmap.bs,bs2 = a->bs2;
194   MatScalar      *aa=a->a;
195   PetscScalar    *x,*b;
196 
197   PetscFunctionBegin;
198   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
199   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
200   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
201   ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
202   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
203   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
204   ierr = PetscLogFlops(bs2*a->nz + A->rmap.N);CHKERRQ(ierr);
205   PetscFunctionReturn(0);
206 }
207 
208 #undef __FUNCT__
209 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering"
210 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
211 {
212   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
213   PetscErrorCode ierr;
214   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
215   PetscInt       bs=A->rmap.bs,bs2 = a->bs2;
216   MatScalar      *aa=a->a;
217   PetscScalar    *x,*b;
218 
219   PetscFunctionBegin;
220   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
221   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
222   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
223   ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
224   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
225   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
226   ierr = PetscLogFlops(bs2*a->nz);CHKERRQ(ierr);
227   PetscFunctionReturn(0);
228 }
229 
230 #undef __FUNCT__
231 #define __FUNCT__ "MatSolve_SeqSBAIJ_7"
232 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
233 {
234   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
235   IS             isrow=a->row;
236   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
237   PetscErrorCode ierr;
238   PetscInt       nz,*vj,k,*r,idx;
239   MatScalar      *aa=a->a,*v,*d;
240   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
241 
242   PetscFunctionBegin;
243   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
244   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
245   t  = a->solve_work;
246   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
247 
248   /* solve U^T * D * y = b by forward substitution */
249   tp = t;
250   for (k=0; k<mbs; k++) { /* t <- perm(b) */
251     idx   = 7*r[k];
252     tp[0] = b[idx];
253     tp[1] = b[idx+1];
254     tp[2] = b[idx+2];
255     tp[3] = b[idx+3];
256     tp[4] = b[idx+4];
257     tp[5] = b[idx+5];
258     tp[6] = b[idx+6];
259     tp += 7;
260   }
261 
262   for (k=0; k<mbs; k++){
263     v  = aa + 49*ai[k];
264     vj = aj + ai[k];
265     tp = t + k*7;
266     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
267     nz = ai[k+1] - ai[k];
268     tp = t + (*vj)*7;
269     while (nz--) {
270       tp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
271       tp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
272       tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
273       tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
274       tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
275       tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
276       tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
277       vj++; tp = t + (*vj)*7;
278       v += 49;
279     }
280 
281     /* xk = inv(Dk)*(Dk*xk) */
282     d  = aa+k*49;          /* ptr to inv(Dk) */
283     tp    = t + k*7;
284     tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
285     tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
286     tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
287     tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
288     tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
289     tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
290     tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
291   }
292 
293   /* solve U*x = y by back substitution */
294   for (k=mbs-1; k>=0; k--){
295     v  = aa + 49*ai[k];
296     vj = aj + ai[k];
297     tp    = t + k*7;
298     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  x6=tp[6]; /* xk */
299     nz = ai[k+1] - ai[k];
300 
301     tp = t + (*vj)*7;
302     while (nz--) {
303       /* xk += U(k,:)*x(:) */
304       x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
305       x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
306       x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
307       x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
308       x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
309       x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
310       x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
311       vj++; tp = t + (*vj)*7;
312       v += 49;
313     }
314     tp    = t + k*7;
315     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
316     idx   = 7*r[k];
317     x[idx]     = x0;
318     x[idx+1]   = x1;
319     x[idx+2]   = x2;
320     x[idx+3]   = x3;
321     x[idx+4]   = x4;
322     x[idx+5]   = x5;
323     x[idx+6]   = x6;
324   }
325 
326   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
327   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
328   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
329   ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr);
330   PetscFunctionReturn(0);
331 }
332 
333 #undef __FUNCT__
334 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private"
335 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
336 {
337   MatScalar      *v,*d;
338   PetscScalar    *xp,x0,x1,x2,x3,x4,x5,x6;
339   PetscInt       nz,*vj,k;
340 
341   PetscFunctionBegin;
342   for (k=0; k<mbs; k++){
343     v  = aa + 49*ai[k];
344     xp = x + k*7;
345     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
346     nz = ai[k+1] - ai[k];
347     vj = aj + ai[k];
348     xp = x + (*vj)*7;
349     while (nz--) {
350       /* x(:) += U(k,:)^T*(Dk*xk) */
351       xp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
352       xp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
353       xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
354       xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
355       xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
356       xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
357       xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
358       vj++; xp = x + (*vj)*7;
359       v += 49;
360     }
361     /* xk = inv(Dk)*(Dk*xk) */
362     d  = aa+k*49;          /* ptr to inv(Dk) */
363     xp = x + k*7;
364     xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
365     xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
366     xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
367     xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
368     xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
369     xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
370     xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
371   }
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private"
377 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
378 {
379   MatScalar      *v;
380   PetscScalar    *xp,x0,x1,x2,x3,x4,x5,x6;
381   PetscInt       nz,*vj,k;
382 
383   PetscFunctionBegin;
384   for (k=mbs-1; k>=0; k--){
385     v  = aa + 49*ai[k];
386     xp = x + k*7;
387     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
388     nz = ai[k+1] - ai[k];
389     vj = aj + ai[k];
390     xp = x + (*vj)*7;
391     while (nz--) {
392       /* xk += U(k,:)*x(:) */
393       x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
394       x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
395       x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
396       x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
397       x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
398       x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
399       x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
400       vj++;
401       v += 49; xp = x + (*vj)*7;
402     }
403     xp = x + k*7;
404     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
405   }
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering"
411 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
412 {
413   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
414   PetscErrorCode ierr;
415   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
416   MatScalar      *aa=a->a;
417   PetscScalar    *x,*b;
418 
419   PetscFunctionBegin;
420   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
421   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
422 
423   /* solve U^T * D * y = b by forward substitution */
424   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
425   ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
426 
427   /* solve U*x = y by back substitution */
428   ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
429 
430   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
431   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
432   ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr);
433   PetscFunctionReturn(0);
434 }
435 
436 #undef __FUNCT__
437 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering"
438 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
439 {
440   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
441   PetscErrorCode ierr;
442   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
443   MatScalar      *aa=a->a;
444   PetscScalar    *x,*b;
445 
446   PetscFunctionBegin;
447   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
448   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
449   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
450   ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
451   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
452   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
453   ierr = PetscLogFlops(49*a->nz + mbs);CHKERRQ(ierr);
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNCT__
458 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering"
459 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
460 {
461   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
462   PetscErrorCode ierr;
463   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
464   MatScalar      *aa=a->a;
465   PetscScalar    *x,*b;
466 
467   PetscFunctionBegin;
468   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
469   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
470   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
471   ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
472   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
473   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
474   ierr = PetscLogFlops(49*a->nz);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatSolve_SeqSBAIJ_6"
480 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
481 {
482   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
483   IS             isrow=a->row;
484   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
485   PetscErrorCode ierr;
486   PetscInt       nz,*vj,k,*r,idx;
487   MatScalar      *aa=a->a,*v,*d;
488   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;
489 
490   PetscFunctionBegin;
491   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
492   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
493   t  = a->solve_work;
494   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
495 
496   /* solve U^T * D * y = b by forward substitution */
497   tp = t;
498   for (k=0; k<mbs; k++) { /* t <- perm(b) */
499     idx   = 6*r[k];
500     tp[0] = b[idx];
501     tp[1] = b[idx+1];
502     tp[2] = b[idx+2];
503     tp[3] = b[idx+3];
504     tp[4] = b[idx+4];
505     tp[5] = b[idx+5];
506     tp += 6;
507   }
508 
509   for (k=0; k<mbs; k++){
510     v  = aa + 36*ai[k];
511     vj = aj + ai[k];
512     tp = t + k*6;
513     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
514     nz = ai[k+1] - ai[k];
515     tp = t + (*vj)*6;
516     while (nz--) {
517       tp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
518       tp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
519       tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
520       tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
521       tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
522       tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
523       vj++; tp = t + (*vj)*6;
524       v += 36;
525     }
526 
527     /* xk = inv(Dk)*(Dk*xk) */
528     d  = aa+k*36;          /* ptr to inv(Dk) */
529     tp    = t + k*6;
530     tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
531     tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
532     tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
533     tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
534     tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
535     tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
536   }
537 
538   /* solve U*x = y by back substitution */
539   for (k=mbs-1; k>=0; k--){
540     v  = aa + 36*ai[k];
541     vj = aj + ai[k];
542     tp    = t + k*6;
543     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  /* xk */
544     nz = ai[k+1] - ai[k];
545 
546     tp = t + (*vj)*6;
547     while (nz--) {
548       /* xk += U(k,:)*x(:) */
549       x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
550       x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
551       x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
552       x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
553       x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
554       x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
555       vj++; tp = t + (*vj)*6;
556       v += 36;
557     }
558     tp    = t + k*6;
559     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
560     idx   = 6*r[k];
561     x[idx]     = x0;
562     x[idx+1]   = x1;
563     x[idx+2]   = x2;
564     x[idx+3]   = x3;
565     x[idx+4]   = x4;
566     x[idx+5]   = x5;
567   }
568 
569   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
570   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
571   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
572   ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private"
578 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
579 {
580   MatScalar      *v,*d;
581   PetscScalar    *xp,x0,x1,x2,x3,x4,x5;
582   PetscInt       nz,*vj,k;
583 
584   PetscFunctionBegin;
585   for (k=0; k<mbs; k++){
586     v  = aa + 36*ai[k];
587     xp = x + k*6;
588     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
589     nz = ai[k+1] - ai[k];
590     vj = aj + ai[k];
591     xp = x + (*vj)*6;
592     while (nz--) {
593       /* x(:) += U(k,:)^T*(Dk*xk) */
594       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
595       xp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
596       xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
597       xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
598       xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
599       xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
600       vj++; xp = x + (*vj)*6;
601       v += 36;
602     }
603     /* xk = inv(Dk)*(Dk*xk) */
604     d  = aa+k*36;          /* ptr to inv(Dk) */
605     xp = x + k*6;
606     xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
607     xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
608     xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
609     xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
610     xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
611     xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
612   }
613   PetscFunctionReturn(0);
614 }
615 #undef __FUNCT__
616 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private"
617 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
618 {
619   MatScalar      *v;
620   PetscScalar    *xp,x0,x1,x2,x3,x4,x5;
621   PetscInt       nz,*vj,k;
622 
623   PetscFunctionBegin;
624   for (k=mbs-1; k>=0; k--){
625     v  = aa + 36*ai[k];
626     xp = x + k*6;
627     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
628     nz = ai[k+1] - ai[k];
629     vj = aj + ai[k];
630     xp = x + (*vj)*6;
631     while (nz--) {
632       /* xk += U(k,:)*x(:) */
633       x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
634       x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
635       x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
636       x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
637       x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
638       x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
639       vj++;
640       v += 36; xp = x + (*vj)*6;
641     }
642     xp = x + k*6;
643     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
644   }
645   PetscFunctionReturn(0);
646 }
647 
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering"
651 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
652 {
653   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
654   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
655   MatScalar      *aa=a->a;
656   PetscScalar    *x,*b;
657   PetscErrorCode ierr;
658 
659   PetscFunctionBegin;
660   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
661   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
662 
663   /* solve U^T * D * y = b by forward substitution */
664   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
665   ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
666 
667   /* solve U*x = y by back substitution */
668   ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
669 
670   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
671   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
672   ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering"
678 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
679 {
680   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
681   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
682   MatScalar      *aa=a->a;
683   PetscScalar    *x,*b;
684   PetscErrorCode ierr;
685 
686   PetscFunctionBegin;
687   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
688   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
689   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
690   ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
691   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
692   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
693   ierr = PetscLogFlops(36*a->nz + mbs);CHKERRQ(ierr);
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNCT__
698 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering"
699 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
700 {
701   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
702   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
703   MatScalar      *aa=a->a;
704   PetscScalar    *x,*b;
705   PetscErrorCode ierr;
706 
707   PetscFunctionBegin;
708   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
709   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
710   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
711   ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
712   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
713   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
714   ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr);
715   PetscFunctionReturn(0);
716 }
717 
718 #undef __FUNCT__
719 #define __FUNCT__ "MatSolve_SeqSBAIJ_5"
720 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
721 {
722   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
723   IS             isrow=a->row;
724   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
725   PetscErrorCode ierr;
726   PetscInt       nz,*vj,k,*r,idx;
727   MatScalar      *aa=a->a,*v,*diag;
728   PetscScalar    *x,*b,x0,x1,x2,x3,x4,*t,*tp;
729 
730   PetscFunctionBegin;
731   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
732   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
733   t  = a->solve_work;
734   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
735 
736   /* solve U^T * D * y = b by forward substitution */
737   tp = t;
738   for (k=0; k<mbs; k++) { /* t <- perm(b) */
739     idx   = 5*r[k];
740     tp[0] = b[idx];
741     tp[1] = b[idx+1];
742     tp[2] = b[idx+2];
743     tp[3] = b[idx+3];
744     tp[4] = b[idx+4];
745     tp += 5;
746   }
747 
748   for (k=0; k<mbs; k++){
749     v  = aa + 25*ai[k];
750     vj = aj + ai[k];
751     tp = t + k*5;
752     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
753     nz = ai[k+1] - ai[k];
754 
755     tp = t + (*vj)*5;
756     while (nz--) {
757       tp[0] +=  v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
758       tp[1] +=  v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
759       tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
760       tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
761       tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
762       vj++; tp = t + (*vj)*5;
763       v += 25;
764     }
765 
766     /* xk = inv(Dk)*(Dk*xk) */
767     diag  = aa+k*25;          /* ptr to inv(Dk) */
768     tp    = t + k*5;
769       tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
770       tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
771       tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
772       tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
773       tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
774   }
775 
776   /* solve U*x = y by back substitution */
777   for (k=mbs-1; k>=0; k--){
778     v  = aa + 25*ai[k];
779     vj = aj + ai[k];
780     tp    = t + k*5;
781     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
782     nz = ai[k+1] - ai[k];
783 
784     tp = t + (*vj)*5;
785     while (nz--) {
786       /* xk += U(k,:)*x(:) */
787       x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
788       x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
789       x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
790       x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
791       x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
792       vj++; tp = t + (*vj)*5;
793       v += 25;
794     }
795     tp    = t + k*5;
796     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
797     idx   = 5*r[k];
798     x[idx]     = x0;
799     x[idx+1]   = x1;
800     x[idx+2]   = x2;
801     x[idx+3]   = x3;
802     x[idx+4]   = x4;
803   }
804 
805   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
806   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
807   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
808   ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 #undef __FUNCT__
813 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private"
814 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
815 {
816   MatScalar      *v,*diag;
817   PetscScalar    *xp,x0,x1,x2,x3,x4;
818   PetscInt       nz,*vj,k;
819 
820   PetscFunctionBegin;
821   for (k=0; k<mbs; k++){
822     v  = aa + 25*ai[k];
823     xp = x + k*5;
824     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
825     nz = ai[k+1] - ai[k];
826     vj = aj + ai[k];
827     xp = x + (*vj)*5;
828     while (nz--) {
829       /* x(:) += U(k,:)^T*(Dk*xk) */
830       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4;
831       xp[1] +=  v[5]*x0 +  v[6]*x1 +  v[7]*x2 + v[8]*x3 + v[9]*x4;
832       xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
833       xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
834       xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
835       vj++; xp = x + (*vj)*5;
836       v += 25;
837     }
838     /* xk = inv(Dk)*(Dk*xk) */
839     diag = aa+k*25;          /* ptr to inv(Dk) */
840     xp   = x + k*5;
841     xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
842     xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
843     xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
844     xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
845     xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
846   }
847   PetscFunctionReturn(0);
848 }
849 
850 #undef __FUNCT__
851 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private"
852 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
853 {
854   MatScalar      *v;
855   PetscScalar    *xp,x0,x1,x2,x3,x4;
856   PetscInt       nz,*vj,k;
857 
858   PetscFunctionBegin;
859   for (k=mbs-1; k>=0; k--){
860     v  = aa + 25*ai[k];
861     xp = x + k*5;
862     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
863     nz = ai[k+1] - ai[k];
864     vj = aj + ai[k];
865     xp = x + (*vj)*5;
866     while (nz--) {
867       /* xk += U(k,:)*x(:) */
868       x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
869       x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
870       x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
871       x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
872       x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
873       vj++;
874       v += 25; xp = x + (*vj)*5;
875     }
876     xp = x + k*5;
877     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
878   }
879   PetscFunctionReturn(0);
880 }
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering"
884 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
885 {
886   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
887   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
888   MatScalar      *aa=a->a;
889   PetscScalar    *x,*b;
890   PetscErrorCode ierr;
891 
892   PetscFunctionBegin;
893   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
894   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
895 
896   /* solve U^T * D * y = b by forward substitution */
897   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
898   ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
899 
900   /* solve U*x = y by back substitution */
901   ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
902 
903   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
904   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
905   ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr);
906   PetscFunctionReturn(0);
907 }
908 
909 #undef __FUNCT__
910 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering"
911 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
912 {
913   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
914   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
915   MatScalar      *aa=a->a;
916   PetscScalar    *x,*b;
917   PetscErrorCode ierr;
918 
919   PetscFunctionBegin;
920   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
921   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
922   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
923   ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
924   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
925   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
926   ierr = PetscLogFlops(25*(a->nz + mbs));CHKERRQ(ierr);
927   PetscFunctionReturn(0);
928 }
929 
930 #undef __FUNCT__
931 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering"
932 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
933 {
934   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
935   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
936   MatScalar      *aa=a->a;
937   PetscScalar    *x,*b;
938   PetscErrorCode ierr;
939 
940   PetscFunctionBegin;
941   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
942   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
943   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
944   ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
945   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
946   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
947   ierr = PetscLogFlops(25*a->nz);CHKERRQ(ierr);
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatSolve_SeqSBAIJ_4"
953 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
954 {
955   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
956   IS             isrow=a->row;
957   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
958   PetscErrorCode ierr;
959   PetscInt       nz,*vj,k,*r,idx;
960   MatScalar      *aa=a->a,*v,*diag;
961   PetscScalar    *x,*b,x0,x1,x2,x3,*t,*tp;
962 
963   PetscFunctionBegin;
964   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
965   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
966   t  = a->solve_work;
967   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
968 
969   /* solve U^T * D * y = b by forward substitution */
970   tp = t;
971   for (k=0; k<mbs; k++) { /* t <- perm(b) */
972     idx   = 4*r[k];
973     tp[0] = b[idx];
974     tp[1] = b[idx+1];
975     tp[2] = b[idx+2];
976     tp[3] = b[idx+3];
977     tp += 4;
978   }
979 
980   for (k=0; k<mbs; k++){
981     v  = aa + 16*ai[k];
982     vj = aj + ai[k];
983     tp = t + k*4;
984     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
985     nz = ai[k+1] - ai[k];
986 
987     tp = t + (*vj)*4;
988     while (nz--) {
989       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
990       tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
991       tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
992       tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
993       vj++; tp = t + (*vj)*4;
994       v += 16;
995     }
996 
997     /* xk = inv(Dk)*(Dk*xk) */
998     diag  = aa+k*16;          /* ptr to inv(Dk) */
999     tp    = t + k*4;
1000     tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
1001     tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
1002     tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
1003     tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
1004   }
1005 
1006   /* solve U*x = y by back substitution */
1007   for (k=mbs-1; k>=0; k--){
1008     v  = aa + 16*ai[k];
1009     vj = aj + ai[k];
1010     tp    = t + k*4;
1011     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
1012     nz = ai[k+1] - ai[k];
1013 
1014     tp = t + (*vj)*4;
1015     while (nz--) {
1016       /* xk += U(k,:)*x(:) */
1017       x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
1018       x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
1019       x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
1020       x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
1021       vj++; tp = t + (*vj)*4;
1022       v += 16;
1023     }
1024     tp    = t + k*4;
1025     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
1026     idx        = 4*r[k];
1027     x[idx]     = x0;
1028     x[idx+1]   = x1;
1029     x[idx+2]   = x2;
1030     x[idx+3]   = x3;
1031   }
1032 
1033   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1034   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1035   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1036   ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr);
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 #undef __FUNCT__
1041 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private"
1042 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1043 {
1044   MatScalar      *v,*diag;
1045   PetscScalar    *xp,x0,x1,x2,x3;
1046   PetscInt       nz,*vj,k;
1047 
1048   PetscFunctionBegin;
1049   for (k=0; k<mbs; k++){
1050     v  = aa + 16*ai[k];
1051     xp = x + k*4;
1052     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
1053     nz = ai[k+1] - ai[k];
1054     vj = aj + ai[k];
1055     xp = x + (*vj)*4;
1056     while (nz--) {
1057       /* x(:) += U(k,:)^T*(Dk*xk) */
1058       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
1059       xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
1060       xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
1061       xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
1062       vj++; xp = x + (*vj)*4;
1063       v += 16;
1064     }
1065     /* xk = inv(Dk)*(Dk*xk) */
1066     diag = aa+k*16;          /* ptr to inv(Dk) */
1067     xp   = x + k*4;
1068     xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
1069     xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
1070     xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
1071     xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
1072   }
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private"
1078 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1079 {
1080   MatScalar      *v;
1081   PetscScalar    *xp,x0,x1,x2,x3;
1082   PetscInt       nz,*vj,k;
1083 
1084   PetscFunctionBegin;
1085   for (k=mbs-1; k>=0; k--){
1086     v  = aa + 16*ai[k];
1087     xp = x + k*4;
1088     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
1089     nz = ai[k+1] - ai[k];
1090     vj = aj + ai[k];
1091     xp = x + (*vj)*4;
1092     while (nz--) {
1093       /* xk += U(k,:)*x(:) */
1094       x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
1095       x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
1096       x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
1097       x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
1098       vj++;
1099       v += 16; xp = x + (*vj)*4;
1100     }
1101     xp = x + k*4;
1102     xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
1103   }
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering"
1109 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1110 {
1111   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1112   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1113   MatScalar      *aa=a->a;
1114   PetscScalar    *x,*b;
1115   PetscErrorCode ierr;
1116 
1117   PetscFunctionBegin;
1118   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1119   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1120 
1121   /* solve U^T * D * y = b by forward substitution */
1122   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
1123   ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1124 
1125   /* solve U*x = y by back substitution */
1126   ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1127   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1128   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1129   ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering"
1135 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1136 {
1137   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1138   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1139   MatScalar      *aa=a->a;
1140   PetscScalar    *x,*b;
1141   PetscErrorCode ierr;
1142 
1143   PetscFunctionBegin;
1144   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1145   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1146   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
1147   ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1148   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1149   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1150   ierr = PetscLogFlops(16*a->nz + mbs);CHKERRQ(ierr);
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering"
1156 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1157 {
1158   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1159   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1160   MatScalar      *aa=a->a;
1161   PetscScalar    *x,*b;
1162   PetscErrorCode ierr;
1163 
1164   PetscFunctionBegin;
1165   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1166   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1167   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1168   ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1169   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1170   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1171   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatSolve_SeqSBAIJ_3"
1177 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
1178 {
1179   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1180   IS             isrow=a->row;
1181   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1182   PetscErrorCode ierr;
1183   PetscInt       nz,*vj,k,*r,idx;
1184   MatScalar      *aa=a->a,*v,*diag;
1185   PetscScalar    *x,*b,x0,x1,x2,*t,*tp;
1186 
1187   PetscFunctionBegin;
1188   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1189   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1190   t  = a->solve_work;
1191   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1192 
1193   /* solve U^T * D * y = b by forward substitution */
1194   tp = t;
1195   for (k=0; k<mbs; k++) { /* t <- perm(b) */
1196     idx   = 3*r[k];
1197     tp[0] = b[idx];
1198     tp[1] = b[idx+1];
1199     tp[2] = b[idx+2];
1200     tp += 3;
1201   }
1202 
1203   for (k=0; k<mbs; k++){
1204     v  = aa + 9*ai[k];
1205     vj = aj + ai[k];
1206     tp = t + k*3;
1207     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
1208     nz = ai[k+1] - ai[k];
1209 
1210     tp = t + (*vj)*3;
1211     while (nz--) {
1212       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1213       tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1214       tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1215       vj++; tp = t + (*vj)*3;
1216       v += 9;
1217     }
1218 
1219     /* xk = inv(Dk)*(Dk*xk) */
1220     diag  = aa+k*9;          /* ptr to inv(Dk) */
1221     tp    = t + k*3;
1222     tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1223     tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1224     tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1225   }
1226 
1227   /* solve U*x = y by back substitution */
1228   for (k=mbs-1; k>=0; k--){
1229     v  = aa + 9*ai[k];
1230     vj = aj + ai[k];
1231     tp    = t + k*3;
1232     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];  /* xk */
1233     nz = ai[k+1] - ai[k];
1234 
1235     tp = t + (*vj)*3;
1236     while (nz--) {
1237       /* xk += U(k,:)*x(:) */
1238       x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
1239       x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
1240       x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
1241       vj++; tp = t + (*vj)*3;
1242       v += 9;
1243     }
1244     tp    = t + k*3;
1245     tp[0] = x0; tp[1] = x1; tp[2] = x2;
1246     idx      = 3*r[k];
1247     x[idx]   = x0;
1248     x[idx+1] = x1;
1249     x[idx+2] = x2;
1250   }
1251 
1252   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1253   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1254   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1255   ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr);
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private"
1261 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1262 {
1263   MatScalar      *v,*diag;
1264   PetscScalar    *xp,x0,x1,x2;
1265   PetscInt       nz,*vj,k;
1266 
1267   PetscFunctionBegin;
1268   for (k=0; k<mbs; k++){
1269     v  = aa + 9*ai[k];
1270     xp = x + k*3;
1271     x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
1272     nz = ai[k+1] - ai[k];
1273     vj = aj + ai[k];
1274     xp = x + (*vj)*3;
1275     while (nz--) {
1276       /* x(:) += U(k,:)^T*(Dk*xk) */
1277       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1278       xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1279       xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1280       vj++; xp = x + (*vj)*3;
1281       v += 9;
1282     }
1283     /* xk = inv(Dk)*(Dk*xk) */
1284     diag = aa+k*9;          /* ptr to inv(Dk) */
1285     xp   = x + k*3;
1286     xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1287     xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1288     xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1289   }
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 #undef __FUNCT__
1294 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private"
1295 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1296 {
1297   MatScalar      *v;
1298   PetscScalar    *xp,x0,x1,x2;
1299   PetscInt       nz,*vj,k;
1300 
1301   PetscFunctionBegin;
1302   for (k=mbs-1; k>=0; k--){
1303     v  = aa + 9*ai[k];
1304     xp = x + k*3;
1305     x0 = xp[0]; x1 = xp[1]; x2 = xp[2];  /* xk */
1306     nz = ai[k+1] - ai[k];
1307     vj = aj + ai[k];
1308     xp = x + (*vj)*3;
1309     while (nz--) {
1310       /* xk += U(k,:)*x(:) */
1311       x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
1312       x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
1313       x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
1314       vj++;
1315       v += 9; xp = x + (*vj)*3;
1316     }
1317     xp = x + k*3;
1318     xp[0] = x0; xp[1] = x1; xp[2] = x2;
1319   }
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering"
1325 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1326 {
1327   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1328   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1329   MatScalar      *aa=a->a;
1330   PetscScalar    *x,*b;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1335   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1336 
1337   /* solve U^T * D * y = b by forward substitution */
1338   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1339   ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1340 
1341   /* solve U*x = y by back substitution */
1342   ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1343 
1344   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1345   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1346   ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr);
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering"
1352 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1353 {
1354   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1355   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1356   MatScalar      *aa=a->a;
1357   PetscScalar    *x,*b;
1358   PetscErrorCode ierr;
1359 
1360   PetscFunctionBegin;
1361   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1362   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1363   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1364   ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1365   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1366   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1367   ierr = PetscLogFlops(9*(a->nz + mbs));CHKERRQ(ierr);
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering"
1373 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1374 {
1375   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1376   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1377   MatScalar      *aa=a->a;
1378   PetscScalar    *x,*b;
1379   PetscErrorCode ierr;
1380 
1381   PetscFunctionBegin;
1382   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1383   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1384   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1385   ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1386   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1387   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1388   ierr = PetscLogFlops(9*a->nz);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatSolve_SeqSBAIJ_2"
1394 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
1395 {
1396   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ *)A->data;
1397   IS             isrow=a->row;
1398   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1399   PetscErrorCode ierr;
1400   PetscInt       nz,*vj,k,k2,*r,idx;
1401   MatScalar      *aa=a->a,*v,*diag;
1402   PetscScalar    *x,*b,x0,x1,*t;
1403 
1404   PetscFunctionBegin;
1405   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1406   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1407   t  = a->solve_work;
1408   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1409 
1410   /* solve U^T * D * y = perm(b) by forward substitution */
1411   for (k=0; k<mbs; k++) {  /* t <- perm(b) */
1412     idx = 2*r[k];
1413     t[k*2]   = b[idx];
1414     t[k*2+1] = b[idx+1];
1415   }
1416   for (k=0; k<mbs; k++){
1417     v  = aa + 4*ai[k];
1418     vj = aj + ai[k];
1419     k2 = k*2;
1420     x0 = t[k2]; x1 = t[k2+1];
1421     nz = ai[k+1] - ai[k];
1422     while (nz--) {
1423       t[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1424       t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1425       vj++; v += 4;
1426     }
1427     diag = aa+k*4;  /* ptr to inv(Dk) */
1428     t[k2]   = diag[0]*x0 + diag[2]*x1;
1429     t[k2+1] = diag[1]*x0 + diag[3]*x1;
1430   }
1431 
1432   /* solve U*x = y by back substitution */
1433   for (k=mbs-1; k>=0; k--){
1434     v  = aa + 4*ai[k];
1435     vj = aj + ai[k];
1436     k2 = k*2;
1437     x0 = t[k2]; x1 = t[k2+1];
1438     nz = ai[k+1] - ai[k];
1439     while (nz--) {
1440       x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1441       x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1442       vj++; v += 4;
1443     }
1444     t[k2]    = x0;
1445     t[k2+1]  = x1;
1446     idx      = 2*r[k];
1447     x[idx]   = x0;
1448     x[idx+1] = x1;
1449   }
1450 
1451   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1452   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1453   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1454   ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr);
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private"
1460 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1461 {
1462   MatScalar      *v,*diag;
1463   PetscScalar    x0,x1;
1464   PetscInt       nz,*vj,k,k2;
1465 
1466   PetscFunctionBegin;
1467   for (k=0; k<mbs; k++){
1468     v  = aa + 4*ai[k];
1469     vj = aj + ai[k];
1470     k2 = k*2;
1471     x0 = x[k2]; x1 = x[k2+1];  /* Dk*xk = k-th block of x */
1472     nz = ai[k+1] - ai[k];
1473 
1474     while (nz--) {
1475       /* x(:) += U(k,:)^T*(Dk*xk) */
1476       x[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1477       x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1478       vj++; v += 4;
1479     }
1480     /* xk = inv(Dk)*(Dk*xk) */
1481     diag = aa+k*4;          /* ptr to inv(Dk) */
1482     x[k2]   = diag[0]*x0 + diag[2]*x1;
1483     x[k2+1] = diag[1]*x0 + diag[3]*x1;
1484   }
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNCT__
1489 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private"
1490 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1491 {
1492   MatScalar      *v;
1493   PetscScalar    x0,x1;
1494   PetscInt       nz,*vj,k,k2;
1495 
1496   PetscFunctionBegin;
1497   for (k=mbs-1; k>=0; k--){
1498     v  = aa + 4*ai[k];
1499     vj = aj + ai[k];
1500     k2 = k*2;
1501     x0 = x[k2]; x1 = x[k2+1];  /* xk */
1502     nz = ai[k+1] - ai[k];
1503     while (nz--) {
1504       /* xk += U(k,:)*x(:) */
1505       x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1506       x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1507       vj++; v += 4;
1508     }
1509     x[k2]     = x0;
1510     x[k2+1]   = x1;
1511   }
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 #undef __FUNCT__
1516 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering"
1517 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1518 {
1519   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1520   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1521   MatScalar      *aa=a->a;
1522   PetscScalar    *x,*b;
1523   PetscErrorCode ierr;
1524 
1525   PetscFunctionBegin;
1526   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1527   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1528 
1529   /* solve U^T * D * y = b by forward substitution */
1530   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1531   ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1532 
1533   /* solve U*x = y by back substitution */
1534   ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1535 
1536   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1537   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1538   ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 #undef __FUNCT__
1543 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering"
1544 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1545 {
1546   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1547   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1548   MatScalar      *aa=a->a;
1549   PetscScalar    *x,*b;
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBegin;
1553   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1554   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1555   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1556   ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1557   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1558   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1559   ierr = PetscLogFlops(4*(a->nz + mbs));CHKERRQ(ierr);
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering"
1565 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1566 {
1567   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1568   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1569   MatScalar      *aa=a->a;
1570   PetscScalar    *x,*b;
1571   PetscErrorCode ierr;
1572 
1573   PetscFunctionBegin;
1574   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1575   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1576   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1577   ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1578   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1579   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1580   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatSolve_SeqSBAIJ_1"
1586 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1587 {
1588   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1589   IS             isrow=a->row;
1590   PetscErrorCode ierr;
1591   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j,*rp;
1592   MatScalar      *aa=a->a,*v;
1593   PetscScalar    *x,*b,xk,*t;
1594   PetscInt       nz,*vj,k;
1595 
1596   PetscFunctionBegin;
1597   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1598   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1599   t    = a->solve_work;
1600   ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1601 
1602   /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */
1603   for (k=0; k<mbs; k++) t[k] = b[rp[k]];
1604   for (k=0; k<mbs; k++){
1605     v  = aa + ai[k] + 1;
1606     vj = aj + ai[k] + 1;
1607     xk = t[k];
1608     nz = ai[k+1] - ai[k] - 1;
1609     while (nz--) t[*vj++] += (*v++) * xk;
1610     t[k] = xk*aa[ai[k]];  /* aa[k] = 1/D(k) */
1611   }
1612 
1613   /* solve U*perm(x) = y by back substitution */
1614   for (k=mbs-1; k>=0; k--){
1615     v  = aa + ai[k] + 1;
1616     vj = aj + ai[k] + 1;
1617     nz = ai[k+1] - ai[k] - 1;
1618     while (nz--) t[k] += (*v++) * t[*vj++];
1619     x[rp[k]] = t[k];
1620   }
1621 
1622   ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1623   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1624   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1625   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 #undef __FUNCT__
1630 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1"
1631 PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1632 {
1633   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1634   IS             isrow=a->row;
1635   PetscErrorCode ierr;
1636   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j,*rp;
1637   MatScalar      *aa=a->a,*v;
1638   PetscReal      diagk;
1639   PetscScalar    *x,*b,xk;
1640   PetscInt       nz,*vj,k;
1641 
1642   PetscFunctionBegin;
1643   /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */
1644   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1645   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1646   ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1647 
1648   for (k=0; k<mbs; k++) x[k] = b[rp[k]];
1649   for (k=0; k<mbs; k++){
1650     v  = aa + ai[k] + 1;
1651     vj = aj + ai[k] + 1;
1652     xk = x[k];
1653     nz = ai[k+1] - ai[k] - 1;
1654     while (nz--) x[*vj++] += (*v++) * xk;
1655 
1656     diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1657     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1658     x[k] = xk*sqrt(diagk);
1659   }
1660   ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1661   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1662   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1663   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1"
1669 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1670 {
1671   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1672   IS             isrow=a->row;
1673   PetscErrorCode ierr;
1674   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j,*rp;
1675   MatScalar      *aa=a->a,*v;
1676   PetscReal      diagk;
1677   PetscScalar    *x,*b,*t;
1678   PetscInt       nz,*vj,k;
1679 
1680   PetscFunctionBegin;
1681   /* solve D^(1/2)*U*perm(x) = b by back substitution */
1682   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1683   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1684   t    = a->solve_work;
1685   ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1686 
1687   for (k=mbs-1; k>=0; k--){
1688     v  = aa + ai[k] + 1;
1689     vj = aj + ai[k] + 1;
1690     diagk = PetscRealPart(aa[ai[k]]);
1691     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1692     t[k] = b[k] * sqrt(diagk);
1693     nz = ai[k+1] - ai[k] - 1;
1694     while (nz--) t[k] += (*v++) * t[*vj++];
1695     x[rp[k]] = t[k];
1696   }
1697   ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1698   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1699   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1700   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatSolves_SeqSBAIJ_1"
1706 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1707 {
1708   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1709   PetscErrorCode ierr;
1710 
1711   PetscFunctionBegin;
1712   if (A->rmap.bs == 1) {
1713     ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr);
1714   } else {
1715     IS              isrow=a->row;
1716     PetscInt             mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,i;
1717     MatScalar       *aa=a->a,*v;
1718     PetscScalar     *x,*b,*t;
1719     PetscInt             nz,*vj,k,n;
1720     if (bb->n > a->solves_work_n) {
1721       ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
1722       ierr = PetscMalloc(bb->n*A->rmap.N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr);
1723       a->solves_work_n = bb->n;
1724     }
1725     n    = bb->n;
1726     ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr);
1727     ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr);
1728     t    = a->solves_work;
1729 
1730     ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1731 
1732     /* solve U^T*D*y = perm(b) by forward substitution */
1733     for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs];} /* values are stored interlaced in t */
1734     for (k=0; k<mbs; k++){
1735       v  = aa + ai[k];
1736       vj = aj + ai[k];
1737       nz = ai[k+1] - ai[k];
1738       while (nz--) {
1739         for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1740         v++;vj++;
1741       }
1742       for (i=0; i<n; i++) t[n*k+i] *= aa[k];  /* note: aa[k] = 1/D(k) */
1743     }
1744 
1745     /* solve U*perm(x) = y by back substitution */
1746     for (k=mbs-1; k>=0; k--){
1747       v  = aa + ai[k];
1748       vj = aj + ai[k];
1749       nz = ai[k+1] - ai[k];
1750       while (nz--) {
1751         for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1752         v++;vj++;
1753       }
1754       for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i];
1755     }
1756 
1757     ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1758     ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr);
1759     ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr);
1760     ierr = PetscLogFlops(bb->n*(4*a->nz + A->rmap.N));CHKERRQ(ierr);
1761   }
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 /*
1766       Special case where the matrix was ILU(0) factored in the natural
1767    ordering. This eliminates the need for the column and row permutation.
1768 */
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering"
1771 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1772 {
1773   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1774   PetscErrorCode ierr;
1775   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1776   MatScalar      *aa=a->a,*v;
1777   PetscScalar    *x,*b,xk;
1778   PetscInt       nz,*vj,k;
1779 
1780   PetscFunctionBegin;
1781   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1782   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1783 
1784   /* solve U^T*D*y = b by forward substitution */
1785   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1786   for (k=0; k<mbs; k++){
1787     v  = aa + ai[k] + 1;
1788     vj = aj + ai[k] + 1;
1789     xk = x[k];
1790     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1791     while (nz--) x[*vj++] += (*v++) * xk;
1792     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1793   }
1794 
1795   /* solve U*x = y by back substitution */
1796   for (k=mbs-2; k>=0; k--){
1797     v  = aa + ai[k] + 1;
1798     vj = aj + ai[k] + 1;
1799     xk = x[k];
1800     nz = ai[k+1] - ai[k] - 1;
1801     while (nz--) xk += (*v++) * x[*vj++];
1802     x[k] = xk;
1803   }
1804 
1805   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1806   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1807   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering"
1813 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1814 {
1815   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1816   PetscErrorCode ierr;
1817   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1818   MatScalar      *aa=a->a,*v;
1819   PetscReal      diagk;
1820   PetscScalar    *x,*b;
1821   PetscInt       nz,*vj,k;
1822 
1823   PetscFunctionBegin;
1824   /* solve U^T*D^(1/2)*x = b by forward substitution */
1825   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1826   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1827   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1828   for (k=0; k<mbs; k++){
1829     v  = aa + ai[k] + 1;
1830     vj = aj + ai[k] + 1;
1831     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1832     while (nz--) x[*vj++] += (*v++) * x[k];
1833     diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1834     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ2(PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[ai[k]]),PetscImaginaryPart(aa[ai[k]]));
1835     x[k] *= sqrt(diagk);
1836   }
1837   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1838   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1839   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 #undef __FUNCT__
1844 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering"
1845 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1846 {
1847   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1848   PetscErrorCode ierr;
1849   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1850   MatScalar      *aa=a->a,*v;
1851   PetscReal      diagk;
1852   PetscScalar    *x,*b;
1853   PetscInt       nz,*vj,k;
1854 
1855   PetscFunctionBegin;
1856   /* solve D^(1/2)*U*x = b by back substitution */
1857   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1858   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1859 
1860   for (k=mbs-1; k>=0; k--){
1861     v  = aa + ai[k] + 1;
1862     vj = aj + ai[k] + 1;
1863     diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1864     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1865     x[k] = sqrt(diagk)*b[k];
1866     nz = ai[k+1] - ai[k] - 1;
1867     while (nz--) x[k] += (*v++) * x[*vj++];
1868   }
1869   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1870   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1871   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR"
1878 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B)
1879 {
1880   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
1881   PetscErrorCode ierr;
1882   PetscInt       *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j;
1883   PetscInt       *jutmp,bs = A->rmap.bs,bs2=a->bs2;
1884   PetscInt       m,reallocs = 0,*levtmp;
1885   PetscInt       *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
1886   PetscInt       incrlev,*lev,shift,prow,nz;
1887   PetscReal      f = info->fill,levels = info->levels;
1888   PetscTruth     perm_identity;
1889 
1890   PetscFunctionBegin;
1891   /* check whether perm is the identity mapping */
1892   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1893 
1894   if (perm_identity){
1895     a->permute = PETSC_FALSE;
1896     ai = a->i; aj = a->j;
1897   } else { /*  non-trivial permutation */
1898     a->permute = PETSC_TRUE;
1899     ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
1900     ai = a->inew; aj = a->jnew;
1901   }
1902 
1903   /* initialization */
1904   ierr  = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1905   umax  = (PetscInt)(f*ai[mbs] + 1);
1906   ierr  = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr);
1907   umax += mbs + 1;
1908   shift = mbs + 1;
1909   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr);
1910   ierr  = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr);
1911   iu[0] = mbs + 1;
1912   juidx = mbs + 1;
1913   /* prowl: linked list for pivot row */
1914   ierr    = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr);
1915   /* q: linked list for col index */
1916   q       = prowl + mbs;
1917   levtmp  = q     + mbs;
1918 
1919   for (i=0; i<mbs; i++){
1920     prowl[i] = mbs;
1921     q[i] = 0;
1922   }
1923 
1924   /* for each row k */
1925   for (k=0; k<mbs; k++){
1926     nzk  = 0;
1927     q[k] = mbs;
1928     /* copy current row into linked list */
1929     nz = ai[rip[k]+1] - ai[rip[k]];
1930     j = ai[rip[k]];
1931     while (nz--){
1932       vj = rip[aj[j++]];
1933       if (vj > k){
1934         qm = k;
1935         do {
1936           m  = qm; qm = q[m];
1937         } while(qm < vj);
1938         if (qm == vj) {
1939           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
1940         }
1941         nzk++;
1942         q[m]       = vj;
1943         q[vj]      = qm;
1944         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1945       }
1946     }
1947 
1948     /* modify nonzero structure of k-th row by computing fill-in
1949        for each row prow to be merged in */
1950     prow = k;
1951     prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
1952 
1953     while (prow < k){
1954       /* merge row prow into k-th row */
1955       jmin = iu[prow] + 1;
1956       jmax = iu[prow+1];
1957       qm = k;
1958       for (j=jmin; j<jmax; j++){
1959         incrlev = lev[j-shift] + 1;
1960 	if (incrlev > levels) continue;
1961         vj      = ju[j];
1962         do {
1963           m = qm; qm = q[m];
1964         } while (qm < vj);
1965         if (qm != vj){      /* a fill */
1966           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1967           levtmp[vj] = incrlev;
1968         } else {
1969           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1970         }
1971       }
1972       prow = prowl[prow]; /* next pivot row */
1973     }
1974 
1975     /* add k to row list for first nonzero element in k-th row */
1976     if (nzk > 1){
1977       i = q[k]; /* col value of first nonzero element in k_th row of U */
1978       prowl[k] = prowl[i]; prowl[i] = k;
1979     }
1980     iu[k+1] = iu[k] + nzk;
1981 
1982     /* allocate more space to ju and lev if needed */
1983     if (iu[k+1] > umax) {
1984       /* estimate how much additional space we will need */
1985       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1986       /* just double the memory each time */
1987       maxadd = umax;
1988       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1989       umax += maxadd;
1990 
1991       /* allocate a longer ju */
1992       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
1993       ierr     = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr);
1994       ierr     = PetscFree(ju);CHKERRQ(ierr);
1995       ju       = jutmp;
1996 
1997       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
1998       ierr     = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr);
1999       ierr     = PetscFree(lev);CHKERRQ(ierr);
2000       lev      = jutmp;
2001       reallocs += 2; /* count how many times we realloc */
2002     }
2003 
2004     /* save nonzero structure of k-th row in ju */
2005     i=k;
2006     while (nzk --) {
2007       i                = q[i];
2008       ju[juidx]        = i;
2009       lev[juidx-shift] = levtmp[i];
2010       juidx++;
2011     }
2012   }
2013 
2014 #if defined(PETSC_USE_INFO)
2015   if (ai[mbs] != 0) {
2016     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
2017     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2018     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2019     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
2020     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2021   } else {
2022     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2023   }
2024 #endif
2025 
2026   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2027   ierr = PetscFree(prowl);CHKERRQ(ierr);
2028   ierr = PetscFree(lev);CHKERRQ(ierr);
2029 
2030   /* put together the new matrix */
2031   ierr = MatCreate(A->comm,B);CHKERRQ(ierr);
2032   ierr = MatSetSizes(*B,bs*mbs,bs*mbs,bs*mbs,bs*mbs);CHKERRQ(ierr);
2033   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
2034   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
2035 
2036   /* ierr = PetscLogObjectParent(*B,iperm);CHKERRQ(ierr); */
2037   b    = (Mat_SeqSBAIJ*)(*B)->data;
2038   ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr);
2039   b->singlemalloc = PETSC_FALSE;
2040   b->free_a       = PETSC_TRUE;
2041   b->free_ij      = PETSC_TRUE;
2042   /* the next line frees the default space generated by the Create() */
2043   ierr    = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr);
2044   ierr    = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
2045   b->j    = ju;
2046   b->i    = iu;
2047   b->diag = 0;
2048   b->ilen = 0;
2049   b->imax = 0;
2050 
2051   if (b->row) {
2052     ierr = ISDestroy(b->row);CHKERRQ(ierr);
2053   }
2054   if (b->icol) {
2055     ierr = ISDestroy(b->icol);CHKERRQ(ierr);
2056   }
2057   b->row  = perm;
2058   b->icol = perm;
2059   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2060   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2061   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2062   /* In b structure:  Free imax, ilen, old a, old j.
2063      Allocate idnew, solve_work, new a, new j */
2064   ierr    = PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2065   b->maxnz = b->nz = iu[mbs];
2066 
2067   (*B)->factor                 = FACTOR_CHOLESKY;
2068   (*B)->info.factor_mallocs    = reallocs;
2069   (*B)->info.fill_ratio_given  = f;
2070   if (ai[mbs] != 0) {
2071     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
2072   } else {
2073     (*B)->info.fill_ratio_needed = 0.0;
2074   }
2075 
2076   if (perm_identity){
2077     switch (bs) {
2078       case 1:
2079         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
2080         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2081         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2082         (*B)->ops->solves                = MatSolves_SeqSBAIJ_1;
2083         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr);
2084         break;
2085       case 2:
2086         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
2087         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2088         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2089         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
2090         break;
2091       case 3:
2092         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
2093         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2094         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2095         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
2096         break;
2097       case 4:
2098         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
2099         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2100         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2101         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
2102         break;
2103       case 5:
2104         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
2105         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2106         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2107         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
2108         break;
2109       case 6:
2110         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
2111         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2112         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2113         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
2114         break;
2115       case 7:
2116         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
2117         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2118         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2119         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
2120       break;
2121       default:
2122         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
2123         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2124         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2125         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr);
2126       break;
2127     }
2128   }
2129 
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 #include "petscbt.h"
2134 #include "src/mat/utils/freespace.h"
2135 #undef __FUNCT__
2136 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ"
2137 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
2138 {
2139   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
2140   Mat_SeqSBAIJ       *b;
2141   Mat                B;
2142   PetscErrorCode     ierr;
2143   PetscTruth         perm_identity,free_ij = PETSC_TRUE;
2144   PetscInt           bs=A->rmap.bs,am=a->mbs;
2145   PetscInt           reallocs=0,*rip,i,*ai,*aj,*ui;
2146   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2147   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
2148   PetscReal          fill=info->fill,levels=info->levels,ratio_needed;
2149   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2150   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2151   PetscBT            lnkbt;
2152   PetscScalar        *ua;
2153 
2154   PetscFunctionBegin;
2155   /*
2156    This code originally uses Modified Sparse Row (MSR) storage
2157    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
2158    Then it is rewritten so the factor B takes seqsbaij format. However the associated
2159    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1,
2160    thus the original code in MSR format is still used for these cases.
2161    The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever
2162    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
2163   */
2164   if (bs > 1){
2165     ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr);
2166     PetscFunctionReturn(0);
2167   }
2168 
2169   /* check whether perm is the identity mapping */
2170   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2171 
2172   /* special case that simply copies fill pattern */
2173   if (!levels && perm_identity) {
2174     a->permute = PETSC_FALSE;
2175     /* reuse the column pointers and row offsets for memory savings */
2176     ui           = a->i;
2177     uj           = a->j;
2178     free_ij      = PETSC_FALSE;
2179     ratio_needed = 1.0;
2180   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2181     if (perm_identity){
2182       a->permute = PETSC_FALSE;
2183       ai = a->i; aj = a->j;
2184     } else { /*  non-trivial permutation */
2185       a->permute = PETSC_TRUE;
2186       ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
2187       ai   = a->inew; aj = a->jnew;
2188     }
2189     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2190 
2191     /* initialization */
2192     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2193     ui[0] = 0;
2194 
2195     /* jl: linked list for storing indices of the pivot rows
2196        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2197     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
2198     il         = jl + am;
2199     uj_ptr     = (PetscInt**)(il + am);
2200     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
2201     for (i=0; i<am; i++){
2202       jl[i] = am; il[i] = 0;
2203     }
2204 
2205     /* create and initialize a linked list for storing column indices of the active row k */
2206     nlnk = am + 1;
2207     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2208 
2209     /* initial FreeSpace size is fill*(ai[am]+1) */
2210     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2211     current_space = free_space;
2212     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2213     current_space_lvl = free_space_lvl;
2214 
2215     for (k=0; k<am; k++){  /* for each active row k */
2216       /* initialize lnk by the column indices of row rip[k] */
2217       nzk   = 0;
2218       ncols = ai[rip[k]+1] - ai[rip[k]];
2219       cols  = aj+ai[rip[k]];
2220       ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2221       nzk += nlnk;
2222 
2223       /* update lnk by computing fill-in for each pivot row to be merged in */
2224       prow = jl[k]; /* 1st pivot row */
2225 
2226       while (prow < k){
2227         nextprow = jl[prow];
2228 
2229         /* merge prow into k-th row */
2230         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2231         jmax = ui[prow+1];
2232         ncols = jmax-jmin;
2233         i     = jmin - ui[prow];
2234         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2235         j     = *(uj_lvl_ptr[prow] + i - 1);
2236         cols_lvl = uj_lvl_ptr[prow]+i;
2237         ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2238         nzk += nlnk;
2239 
2240         /* update il and jl for prow */
2241         if (jmin < jmax){
2242           il[prow] = jmin;
2243           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2244         }
2245         prow = nextprow;
2246       }
2247 
2248       /* if free space is not available, make more free space */
2249       if (current_space->local_remaining<nzk) {
2250         i = am - k + 1; /* num of unfactored rows */
2251         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2252         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2253         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2254         reallocs++;
2255       }
2256 
2257       /* copy data into free_space and free_space_lvl, then initialize lnk */
2258       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2259 
2260       /* add the k-th row into il and jl */
2261       if (nzk-1 > 0){
2262         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2263         jl[k] = jl[i]; jl[i] = k;
2264         il[k] = ui[k] + 1;
2265       }
2266       uj_ptr[k]     = current_space->array;
2267       uj_lvl_ptr[k] = current_space_lvl->array;
2268 
2269       current_space->array           += nzk;
2270       current_space->local_used      += nzk;
2271       current_space->local_remaining -= nzk;
2272       current_space_lvl->array           += nzk;
2273       current_space_lvl->local_used      += nzk;
2274       current_space_lvl->local_remaining -= nzk;
2275 
2276       ui[k+1] = ui[k] + nzk;
2277     }
2278 
2279 #if defined(PETSC_USE_INFO)
2280     if (ai[am] != 0) {
2281       PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2282       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2283       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2284       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2285     } else {
2286       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2287     }
2288 #endif
2289 
2290     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2291     ierr = PetscFree(jl);CHKERRQ(ierr);
2292 
2293     /* destroy list of free space and other temporary array(s) */
2294     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2295     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2296     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2297     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2298     if (ai[am] != 0) {
2299       ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2300     } else {
2301       ratio_needed = 0.0;
2302     }
2303   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2304 
2305   /* put together the new matrix in MATSEQSBAIJ format */
2306   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&ua);CHKERRQ(ierr);
2307   ierr = MatCreateSeqSBAIJWithArrays(PETSC_COMM_SELF,1,am,am,ui,uj,ua,fact);CHKERRQ(ierr);
2308   B = *fact;
2309   b = (Mat_SeqSBAIJ*)B->data;
2310   ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr);
2311   b->singlemalloc = PETSC_FALSE;
2312   b->free_a  = PETSC_TRUE;
2313   b->free_ij = free_ij;
2314   b->diag    = 0;
2315   b->ilen    = 0;
2316   b->imax    = 0;
2317   b->row     = perm;
2318   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2319   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2320   b->icol = perm;
2321   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2322   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2323   b->maxnz = b->nz = ui[am];
2324 
2325   B->factor                 = FACTOR_CHOLESKY;
2326   B->info.factor_mallocs    = reallocs;
2327   B->info.fill_ratio_given  = fill;
2328   B->info.fill_ratio_needed = ratio_needed;
2329 
2330   if (perm_identity){
2331     switch (bs) {
2332       case 1:
2333         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
2334         B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2335         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2336         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr);
2337         break;
2338       case 2:
2339         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
2340         B->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2341         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2342         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
2343         break;
2344       case 3:
2345         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
2346         B->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2347         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2348         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
2349         break;
2350       case 4:
2351         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
2352         B->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2353         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2354         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
2355         break;
2356       case 5:
2357         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
2358         B->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2359         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2360         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
2361         break;
2362       case 6:
2363         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
2364         B->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2365         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2366         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
2367         break;
2368       case 7:
2369         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
2370         B->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2371         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2372         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
2373       break;
2374       default:
2375         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
2376         B->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2377         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2378         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr);
2379       break;
2380     }
2381   }
2382   PetscFunctionReturn(0);
2383 }
2384 
2385