xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact2.c (revision a4efd8ea4bb0600a5aef92b2c23cbfa314827193)
1 #define PETSCMAT_DLL
2 
3 /*
4     Factorization code for SBAIJ format.
5 */
6 
7 #include "src/mat/impls/sbaij/seq/sbaij.h"
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/inline/ilu.h"
10 #include "src/inline/dot.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSolve_SeqSBAIJ_N"
14 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
15 {
16   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
17   IS              isrow=a->row;
18   PetscInt        mbs=a->mbs,*ai=a->i,*aj=a->j;
19   PetscErrorCode  ierr;
20   PetscInt        nz,*vj,k,*r,idx,k1;
21   PetscInt        bs=A->rmap->bs,bs2 = a->bs2;
22   MatScalar       *aa=a->a,*v,*diag;
23   PetscScalar     *x,*xk,*xj,*b,*xk_tmp,*t;
24 
25   PetscFunctionBegin;
26   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
27   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
28   t  = a->solve_work;
29   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
30   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
31 
32   /* solve U^T * D * y = b by forward substitution */
33   xk = t;
34   for (k=0; k<mbs; k++) { /* t <- perm(b) */
35     idx   = bs*r[k];
36     for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
37   }
38   for (k=0; k<mbs; k++){
39     v  = aa + bs2*ai[k];
40     xk = t + k*bs;      /* Dk*xk = k-th block of x */
41     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
42     nz = ai[k+1] - ai[k];
43     vj = aj + ai[k];
44     xj = t + (*vj)*bs;  /* *vj-th block of x, *vj>k */
45     while (nz--) {
46       /* x(:) += U(k,:)^T*(Dk*xk) */
47       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
48       vj++; xj = t + (*vj)*bs;
49       v += bs2;
50     }
51     /* xk = inv(Dk)*(Dk*xk) */
52     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
53     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
54   }
55 
56   /* solve U*x = y by back substitution */
57   for (k=mbs-1; k>=0; k--){
58     v  = aa + bs2*ai[k];
59     xk = t + k*bs;        /* xk */
60     nz = ai[k+1] - ai[k];
61     vj = aj + ai[k];
62     xj = t + (*vj)*bs;
63     while (nz--) {
64       /* xk += U(k,:)*x(:) */
65       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
66       vj++;
67       v += bs2; xj = t + (*vj)*bs;
68     }
69     idx   = bs*r[k];
70     for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
71   }
72 
73   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
74   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
75   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
76   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
77   ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N"
83 PetscErrorCode MatForwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
84 {
85   PetscFunctionBegin;
86   SETERRQ(1,"not implemented yet");
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N"
92 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
93 {
94   PetscFunctionBegin;
95   SETERRQ(1,"not implemented yet");
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private"
101 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
102 {
103   PetscErrorCode ierr;
104   PetscInt       nz,*vj,k;
105   PetscInt       bs2 = bs*bs;
106   MatScalar      *v,*diag;
107   PetscScalar    *xk,*xj,*xk_tmp;
108 
109   PetscFunctionBegin;
110   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
111   for (k=0; k<mbs; k++){
112     v  = aa + bs2*ai[k];
113     xk = x + k*bs;      /* Dk*xk = k-th block of x */
114     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
115     nz = ai[k+1] - ai[k];
116     vj = aj + ai[k];
117     xj = x + (*vj)*bs;  /* *vj-th block of x, *vj>k */
118     while (nz--) {
119       /* x(:) += U(k,:)^T*(Dk*xk) */
120       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
121       vj++; xj = x + (*vj)*bs;
122       v += bs2;
123     }
124     /* xk = inv(Dk)*(Dk*xk) */
125     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
126     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
127   }
128   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private"
134 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
135 {
136   PetscInt       nz,*vj,k;
137   PetscInt       bs2 = bs*bs;
138   MatScalar      *v;
139   PetscScalar    *xk,*xj;
140 
141   PetscFunctionBegin;
142   for (k=mbs-1; k>=0; k--){
143     v  = aa + bs2*ai[k];
144     xk = x + k*bs;        /* xk */
145     nz = ai[k+1] - ai[k];
146     vj = aj + ai[k];
147     xj = x + (*vj)*bs;
148     while (nz--) {
149       /* xk += U(k,:)*x(:) */
150       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
151       vj++;
152       v += bs2; xj = x + (*vj)*bs;
153     }
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 #undef __FUNCT__
159 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering"
160 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
161 {
162   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
163   PetscErrorCode ierr;
164   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
165   PetscInt       bs=A->rmap->bs;
166   MatScalar      *aa=a->a;
167   PetscScalar    *x,*b;
168 #if defined(PETSC_USE_LOG)
169   PetscInt       bs2 = a->bs2;
170 #endif
171 
172   PetscFunctionBegin;
173   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
174   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
175 
176   /* solve U^T * D * y = b by forward substitution */
177   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
178   ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
179 
180   /* solve U*x = y by back substitution */
181   ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
182 
183   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
184   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
185   ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr);
186   PetscFunctionReturn(0);
187 }
188 
189 #undef __FUNCT__
190 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering"
191 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
192 {
193   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
194   PetscErrorCode ierr;
195   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
196   PetscInt       bs=A->rmap->bs;
197   MatScalar      *aa=a->a;
198   PetscScalar    *x,*b;
199 #if defined(PETSC_USE_LOG)
200   PetscInt       bs2 = a->bs2;
201 #endif
202 
203   PetscFunctionBegin;
204   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
205   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
206   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
207   ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
208   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
209   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
210   ierr = PetscLogFlops(bs2*a->nz + A->rmap->N);CHKERRQ(ierr);
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering"
216 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
217 {
218   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
219   PetscErrorCode ierr;
220   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
221   PetscInt       bs=A->rmap->bs;
222   MatScalar      *aa=a->a;
223   PetscScalar    *x,*b;
224 #if defined(PETSC_USE_LOG)
225   PetscInt       bs2 = a->bs2;
226 #endif
227 
228   PetscFunctionBegin;
229   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
230   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
231   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
232   ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
233   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
234   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
235   ierr = PetscLogFlops(bs2*a->nz);CHKERRQ(ierr);
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatSolve_SeqSBAIJ_7"
241 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
242 {
243   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
244   IS             isrow=a->row;
245   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
246   PetscErrorCode ierr;
247   PetscInt       nz,*vj,k,*r,idx;
248   MatScalar      *aa=a->a,*v,*d;
249   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
250 
251   PetscFunctionBegin;
252   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
253   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
254   t  = a->solve_work;
255   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
256 
257   /* solve U^T * D * y = b by forward substitution */
258   tp = t;
259   for (k=0; k<mbs; k++) { /* t <- perm(b) */
260     idx   = 7*r[k];
261     tp[0] = b[idx];
262     tp[1] = b[idx+1];
263     tp[2] = b[idx+2];
264     tp[3] = b[idx+3];
265     tp[4] = b[idx+4];
266     tp[5] = b[idx+5];
267     tp[6] = b[idx+6];
268     tp += 7;
269   }
270 
271   for (k=0; k<mbs; k++){
272     v  = aa + 49*ai[k];
273     vj = aj + ai[k];
274     tp = t + k*7;
275     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
276     nz = ai[k+1] - ai[k];
277     tp = t + (*vj)*7;
278     while (nz--) {
279       tp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
280       tp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
281       tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
282       tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
283       tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
284       tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
285       tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
286       vj++; tp = t + (*vj)*7;
287       v += 49;
288     }
289 
290     /* xk = inv(Dk)*(Dk*xk) */
291     d  = aa+k*49;          /* ptr to inv(Dk) */
292     tp    = t + k*7;
293     tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
294     tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
295     tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
296     tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
297     tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
298     tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
299     tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
300   }
301 
302   /* solve U*x = y by back substitution */
303   for (k=mbs-1; k>=0; k--){
304     v  = aa + 49*ai[k];
305     vj = aj + ai[k];
306     tp    = t + k*7;
307     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  x6=tp[6]; /* xk */
308     nz = ai[k+1] - ai[k];
309 
310     tp = t + (*vj)*7;
311     while (nz--) {
312       /* xk += U(k,:)*x(:) */
313       x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
314       x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
315       x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
316       x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
317       x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
318       x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
319       x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
320       vj++; tp = t + (*vj)*7;
321       v += 49;
322     }
323     tp    = t + k*7;
324     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
325     idx   = 7*r[k];
326     x[idx]     = x0;
327     x[idx+1]   = x1;
328     x[idx+2]   = x2;
329     x[idx+3]   = x3;
330     x[idx+4]   = x4;
331     x[idx+5]   = x5;
332     x[idx+6]   = x6;
333   }
334 
335   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
336   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
337   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
338   ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr);
339   PetscFunctionReturn(0);
340 }
341 
342 #undef __FUNCT__
343 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private"
344 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
345 {
346   MatScalar      *v,*d;
347   PetscScalar    *xp,x0,x1,x2,x3,x4,x5,x6;
348   PetscInt       nz,*vj,k;
349 
350   PetscFunctionBegin;
351   for (k=0; k<mbs; k++){
352     v  = aa + 49*ai[k];
353     xp = x + k*7;
354     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
355     nz = ai[k+1] - ai[k];
356     vj = aj + ai[k];
357     xp = x + (*vj)*7;
358     while (nz--) {
359       /* x(:) += U(k,:)^T*(Dk*xk) */
360       xp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
361       xp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
362       xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
363       xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
364       xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
365       xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
366       xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
367       vj++; xp = x + (*vj)*7;
368       v += 49;
369     }
370     /* xk = inv(Dk)*(Dk*xk) */
371     d  = aa+k*49;          /* ptr to inv(Dk) */
372     xp = x + k*7;
373     xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
374     xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
375     xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
376     xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
377     xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
378     xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
379     xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
380   }
381   PetscFunctionReturn(0);
382 }
383 
384 #undef __FUNCT__
385 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private"
386 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
387 {
388   MatScalar      *v;
389   PetscScalar    *xp,x0,x1,x2,x3,x4,x5,x6;
390   PetscInt       nz,*vj,k;
391 
392   PetscFunctionBegin;
393   for (k=mbs-1; k>=0; k--){
394     v  = aa + 49*ai[k];
395     xp = x + k*7;
396     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
397     nz = ai[k+1] - ai[k];
398     vj = aj + ai[k];
399     xp = x + (*vj)*7;
400     while (nz--) {
401       /* xk += U(k,:)*x(:) */
402       x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
403       x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
404       x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
405       x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
406       x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
407       x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
408       x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
409       vj++;
410       v += 49; xp = x + (*vj)*7;
411     }
412     xp = x + k*7;
413     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
414   }
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering"
420 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
421 {
422   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
423   PetscErrorCode ierr;
424   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
425   MatScalar      *aa=a->a;
426   PetscScalar    *x,*b;
427 
428   PetscFunctionBegin;
429   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
430   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
431 
432   /* solve U^T * D * y = b by forward substitution */
433   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
434   ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
435 
436   /* solve U*x = y by back substitution */
437   ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
438 
439   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
440   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
441   ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr);
442   PetscFunctionReturn(0);
443 }
444 
445 #undef __FUNCT__
446 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering"
447 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
448 {
449   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
450   PetscErrorCode ierr;
451   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
452   MatScalar      *aa=a->a;
453   PetscScalar    *x,*b;
454 
455   PetscFunctionBegin;
456   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
457   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
458   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
459   ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
460   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
461   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
462   ierr = PetscLogFlops(49*a->nz + mbs);CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering"
468 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
469 {
470   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
471   PetscErrorCode ierr;
472   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
473   MatScalar      *aa=a->a;
474   PetscScalar    *x,*b;
475 
476   PetscFunctionBegin;
477   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
478   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
479   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
480   ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
481   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
482   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
483   ierr = PetscLogFlops(49*a->nz);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 #undef __FUNCT__
488 #define __FUNCT__ "MatSolve_SeqSBAIJ_6"
489 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
490 {
491   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
492   IS             isrow=a->row;
493   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
494   PetscErrorCode ierr;
495   PetscInt       nz,*vj,k,*r,idx;
496   MatScalar      *aa=a->a,*v,*d;
497   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;
498 
499   PetscFunctionBegin;
500   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
501   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
502   t  = a->solve_work;
503   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
504 
505   /* solve U^T * D * y = b by forward substitution */
506   tp = t;
507   for (k=0; k<mbs; k++) { /* t <- perm(b) */
508     idx   = 6*r[k];
509     tp[0] = b[idx];
510     tp[1] = b[idx+1];
511     tp[2] = b[idx+2];
512     tp[3] = b[idx+3];
513     tp[4] = b[idx+4];
514     tp[5] = b[idx+5];
515     tp += 6;
516   }
517 
518   for (k=0; k<mbs; k++){
519     v  = aa + 36*ai[k];
520     vj = aj + ai[k];
521     tp = t + k*6;
522     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
523     nz = ai[k+1] - ai[k];
524     tp = t + (*vj)*6;
525     while (nz--) {
526       tp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
527       tp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
528       tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
529       tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
530       tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
531       tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
532       vj++; tp = t + (*vj)*6;
533       v += 36;
534     }
535 
536     /* xk = inv(Dk)*(Dk*xk) */
537     d  = aa+k*36;          /* ptr to inv(Dk) */
538     tp    = t + k*6;
539     tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
540     tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
541     tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
542     tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
543     tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
544     tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
545   }
546 
547   /* solve U*x = y by back substitution */
548   for (k=mbs-1; k>=0; k--){
549     v  = aa + 36*ai[k];
550     vj = aj + ai[k];
551     tp    = t + k*6;
552     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  /* xk */
553     nz = ai[k+1] - ai[k];
554 
555     tp = t + (*vj)*6;
556     while (nz--) {
557       /* xk += U(k,:)*x(:) */
558       x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
559       x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
560       x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
561       x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
562       x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
563       x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
564       vj++; tp = t + (*vj)*6;
565       v += 36;
566     }
567     tp    = t + k*6;
568     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
569     idx   = 6*r[k];
570     x[idx]     = x0;
571     x[idx+1]   = x1;
572     x[idx+2]   = x2;
573     x[idx+3]   = x3;
574     x[idx+4]   = x4;
575     x[idx+5]   = x5;
576   }
577 
578   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
579   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
580   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
581   ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr);
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private"
587 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
588 {
589   MatScalar      *v,*d;
590   PetscScalar    *xp,x0,x1,x2,x3,x4,x5;
591   PetscInt       nz,*vj,k;
592 
593   PetscFunctionBegin;
594   for (k=0; k<mbs; k++){
595     v  = aa + 36*ai[k];
596     xp = x + k*6;
597     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
598     nz = ai[k+1] - ai[k];
599     vj = aj + ai[k];
600     xp = x + (*vj)*6;
601     while (nz--) {
602       /* x(:) += U(k,:)^T*(Dk*xk) */
603       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
604       xp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
605       xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
606       xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
607       xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
608       xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
609       vj++; xp = x + (*vj)*6;
610       v += 36;
611     }
612     /* xk = inv(Dk)*(Dk*xk) */
613     d  = aa+k*36;          /* ptr to inv(Dk) */
614     xp = x + k*6;
615     xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
616     xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
617     xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
618     xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
619     xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
620     xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
621   }
622   PetscFunctionReturn(0);
623 }
624 #undef __FUNCT__
625 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private"
626 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
627 {
628   MatScalar      *v;
629   PetscScalar    *xp,x0,x1,x2,x3,x4,x5;
630   PetscInt       nz,*vj,k;
631 
632   PetscFunctionBegin;
633   for (k=mbs-1; k>=0; k--){
634     v  = aa + 36*ai[k];
635     xp = x + k*6;
636     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
637     nz = ai[k+1] - ai[k];
638     vj = aj + ai[k];
639     xp = x + (*vj)*6;
640     while (nz--) {
641       /* xk += U(k,:)*x(:) */
642       x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
643       x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
644       x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
645       x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
646       x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
647       x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
648       vj++;
649       v += 36; xp = x + (*vj)*6;
650     }
651     xp = x + k*6;
652     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
653   }
654   PetscFunctionReturn(0);
655 }
656 
657 
658 #undef __FUNCT__
659 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering"
660 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
661 {
662   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
663   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
664   MatScalar      *aa=a->a;
665   PetscScalar    *x,*b;
666   PetscErrorCode ierr;
667 
668   PetscFunctionBegin;
669   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
670   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
671 
672   /* solve U^T * D * y = b by forward substitution */
673   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
674   ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
675 
676   /* solve U*x = y by back substitution */
677   ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
678 
679   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
680   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
681   ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 #undef __FUNCT__
686 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering"
687 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
688 {
689   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
690   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
691   MatScalar      *aa=a->a;
692   PetscScalar    *x,*b;
693   PetscErrorCode ierr;
694 
695   PetscFunctionBegin;
696   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
697   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
698   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
699   ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
700   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
701   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
702   ierr = PetscLogFlops(36*a->nz + mbs);CHKERRQ(ierr);
703   PetscFunctionReturn(0);
704 }
705 
706 #undef __FUNCT__
707 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering"
708 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
709 {
710   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
711   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
712   MatScalar      *aa=a->a;
713   PetscScalar    *x,*b;
714   PetscErrorCode ierr;
715 
716   PetscFunctionBegin;
717   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
718   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
719   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
720   ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
721   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
722   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
723   ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr);
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatSolve_SeqSBAIJ_5"
729 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
730 {
731   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
732   IS             isrow=a->row;
733   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
734   PetscErrorCode ierr;
735   PetscInt       nz,*vj,k,*r,idx;
736   MatScalar      *aa=a->a,*v,*diag;
737   PetscScalar    *x,*b,x0,x1,x2,x3,x4,*t,*tp;
738 
739   PetscFunctionBegin;
740   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
741   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
742   t  = a->solve_work;
743   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
744 
745   /* solve U^T * D * y = b by forward substitution */
746   tp = t;
747   for (k=0; k<mbs; k++) { /* t <- perm(b) */
748     idx   = 5*r[k];
749     tp[0] = b[idx];
750     tp[1] = b[idx+1];
751     tp[2] = b[idx+2];
752     tp[3] = b[idx+3];
753     tp[4] = b[idx+4];
754     tp += 5;
755   }
756 
757   for (k=0; k<mbs; k++){
758     v  = aa + 25*ai[k];
759     vj = aj + ai[k];
760     tp = t + k*5;
761     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
762     nz = ai[k+1] - ai[k];
763 
764     tp = t + (*vj)*5;
765     while (nz--) {
766       tp[0] +=  v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
767       tp[1] +=  v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
768       tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
769       tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
770       tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
771       vj++; tp = t + (*vj)*5;
772       v += 25;
773     }
774 
775     /* xk = inv(Dk)*(Dk*xk) */
776     diag  = aa+k*25;          /* ptr to inv(Dk) */
777     tp    = t + k*5;
778       tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
779       tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
780       tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
781       tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
782       tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
783   }
784 
785   /* solve U*x = y by back substitution */
786   for (k=mbs-1; k>=0; k--){
787     v  = aa + 25*ai[k];
788     vj = aj + ai[k];
789     tp    = t + k*5;
790     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
791     nz = ai[k+1] - ai[k];
792 
793     tp = t + (*vj)*5;
794     while (nz--) {
795       /* xk += U(k,:)*x(:) */
796       x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
797       x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
798       x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
799       x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
800       x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
801       vj++; tp = t + (*vj)*5;
802       v += 25;
803     }
804     tp    = t + k*5;
805     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
806     idx   = 5*r[k];
807     x[idx]     = x0;
808     x[idx+1]   = x1;
809     x[idx+2]   = x2;
810     x[idx+3]   = x3;
811     x[idx+4]   = x4;
812   }
813 
814   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
815   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
816   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
817   ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr);
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private"
823 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
824 {
825   MatScalar      *v,*diag;
826   PetscScalar    *xp,x0,x1,x2,x3,x4;
827   PetscInt       nz,*vj,k;
828 
829   PetscFunctionBegin;
830   for (k=0; k<mbs; k++){
831     v  = aa + 25*ai[k];
832     xp = x + k*5;
833     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
834     nz = ai[k+1] - ai[k];
835     vj = aj + ai[k];
836     xp = x + (*vj)*5;
837     while (nz--) {
838       /* x(:) += U(k,:)^T*(Dk*xk) */
839       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4;
840       xp[1] +=  v[5]*x0 +  v[6]*x1 +  v[7]*x2 + v[8]*x3 + v[9]*x4;
841       xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
842       xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
843       xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
844       vj++; xp = x + (*vj)*5;
845       v += 25;
846     }
847     /* xk = inv(Dk)*(Dk*xk) */
848     diag = aa+k*25;          /* ptr to inv(Dk) */
849     xp   = x + k*5;
850     xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
851     xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
852     xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
853     xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
854     xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
855   }
856   PetscFunctionReturn(0);
857 }
858 
859 #undef __FUNCT__
860 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private"
861 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
862 {
863   MatScalar      *v;
864   PetscScalar    *xp,x0,x1,x2,x3,x4;
865   PetscInt       nz,*vj,k;
866 
867   PetscFunctionBegin;
868   for (k=mbs-1; k>=0; k--){
869     v  = aa + 25*ai[k];
870     xp = x + k*5;
871     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
872     nz = ai[k+1] - ai[k];
873     vj = aj + ai[k];
874     xp = x + (*vj)*5;
875     while (nz--) {
876       /* xk += U(k,:)*x(:) */
877       x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
878       x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
879       x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
880       x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
881       x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
882       vj++;
883       v += 25; xp = x + (*vj)*5;
884     }
885     xp = x + k*5;
886     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
887   }
888   PetscFunctionReturn(0);
889 }
890 
891 #undef __FUNCT__
892 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering"
893 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
894 {
895   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
896   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
897   MatScalar      *aa=a->a;
898   PetscScalar    *x,*b;
899   PetscErrorCode ierr;
900 
901   PetscFunctionBegin;
902   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
903   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
904 
905   /* solve U^T * D * y = b by forward substitution */
906   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
907   ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
908 
909   /* solve U*x = y by back substitution */
910   ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
911 
912   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
913   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
914   ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917 
918 #undef __FUNCT__
919 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering"
920 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
921 {
922   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
923   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
924   MatScalar      *aa=a->a;
925   PetscScalar    *x,*b;
926   PetscErrorCode ierr;
927 
928   PetscFunctionBegin;
929   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
930   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
931   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
932   ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
933   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
934   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
935   ierr = PetscLogFlops(25*(a->nz + mbs));CHKERRQ(ierr);
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering"
941 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
942 {
943   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
944   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
945   MatScalar      *aa=a->a;
946   PetscScalar    *x,*b;
947   PetscErrorCode ierr;
948 
949   PetscFunctionBegin;
950   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
951   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
952   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
953   ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
954   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
955   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
956   ierr = PetscLogFlops(25*a->nz);CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "MatSolve_SeqSBAIJ_4"
962 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
963 {
964   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
965   IS             isrow=a->row;
966   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
967   PetscErrorCode ierr;
968   PetscInt       nz,*vj,k,*r,idx;
969   MatScalar      *aa=a->a,*v,*diag;
970   PetscScalar    *x,*b,x0,x1,x2,x3,*t,*tp;
971 
972   PetscFunctionBegin;
973   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
974   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
975   t  = a->solve_work;
976   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
977 
978   /* solve U^T * D * y = b by forward substitution */
979   tp = t;
980   for (k=0; k<mbs; k++) { /* t <- perm(b) */
981     idx   = 4*r[k];
982     tp[0] = b[idx];
983     tp[1] = b[idx+1];
984     tp[2] = b[idx+2];
985     tp[3] = b[idx+3];
986     tp += 4;
987   }
988 
989   for (k=0; k<mbs; k++){
990     v  = aa + 16*ai[k];
991     vj = aj + ai[k];
992     tp = t + k*4;
993     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
994     nz = ai[k+1] - ai[k];
995 
996     tp = t + (*vj)*4;
997     while (nz--) {
998       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
999       tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
1000       tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
1001       tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
1002       vj++; tp = t + (*vj)*4;
1003       v += 16;
1004     }
1005 
1006     /* xk = inv(Dk)*(Dk*xk) */
1007     diag  = aa+k*16;          /* ptr to inv(Dk) */
1008     tp    = t + k*4;
1009     tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
1010     tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
1011     tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
1012     tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
1013   }
1014 
1015   /* solve U*x = y by back substitution */
1016   for (k=mbs-1; k>=0; k--){
1017     v  = aa + 16*ai[k];
1018     vj = aj + ai[k];
1019     tp    = t + k*4;
1020     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
1021     nz = ai[k+1] - ai[k];
1022 
1023     tp = t + (*vj)*4;
1024     while (nz--) {
1025       /* xk += U(k,:)*x(:) */
1026       x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
1027       x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
1028       x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
1029       x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
1030       vj++; tp = t + (*vj)*4;
1031       v += 16;
1032     }
1033     tp    = t + k*4;
1034     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
1035     idx        = 4*r[k];
1036     x[idx]     = x0;
1037     x[idx+1]   = x1;
1038     x[idx+2]   = x2;
1039     x[idx+3]   = x3;
1040   }
1041 
1042   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1043   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1044   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1045   ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr);
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 #undef __FUNCT__
1050 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private"
1051 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1052 {
1053   MatScalar      *v,*diag;
1054   PetscScalar    *xp,x0,x1,x2,x3;
1055   PetscInt       nz,*vj,k;
1056 
1057   PetscFunctionBegin;
1058   for (k=0; k<mbs; k++){
1059     v  = aa + 16*ai[k];
1060     xp = x + k*4;
1061     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
1062     nz = ai[k+1] - ai[k];
1063     vj = aj + ai[k];
1064     xp = x + (*vj)*4;
1065     while (nz--) {
1066       /* x(:) += U(k,:)^T*(Dk*xk) */
1067       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
1068       xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
1069       xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
1070       xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
1071       vj++; xp = x + (*vj)*4;
1072       v += 16;
1073     }
1074     /* xk = inv(Dk)*(Dk*xk) */
1075     diag = aa+k*16;          /* ptr to inv(Dk) */
1076     xp   = x + k*4;
1077     xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
1078     xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
1079     xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
1080     xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
1081   }
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private"
1087 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1088 {
1089   MatScalar      *v;
1090   PetscScalar    *xp,x0,x1,x2,x3;
1091   PetscInt       nz,*vj,k;
1092 
1093   PetscFunctionBegin;
1094   for (k=mbs-1; k>=0; k--){
1095     v  = aa + 16*ai[k];
1096     xp = x + k*4;
1097     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
1098     nz = ai[k+1] - ai[k];
1099     vj = aj + ai[k];
1100     xp = x + (*vj)*4;
1101     while (nz--) {
1102       /* xk += U(k,:)*x(:) */
1103       x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
1104       x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
1105       x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
1106       x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
1107       vj++;
1108       v += 16; xp = x + (*vj)*4;
1109     }
1110     xp = x + k*4;
1111     xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
1112   }
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering"
1118 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1119 {
1120   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1121   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1122   MatScalar      *aa=a->a;
1123   PetscScalar    *x,*b;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1128   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1129 
1130   /* solve U^T * D * y = b by forward substitution */
1131   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
1132   ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1133 
1134   /* solve U*x = y by back substitution */
1135   ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1136   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1137   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1138   ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering"
1144 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1145 {
1146   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1147   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1148   MatScalar      *aa=a->a;
1149   PetscScalar    *x,*b;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1154   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1155   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
1156   ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1157   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1158   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1159   ierr = PetscLogFlops(16*a->nz + mbs);CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering"
1165 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1166 {
1167   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1168   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1169   MatScalar      *aa=a->a;
1170   PetscScalar    *x,*b;
1171   PetscErrorCode ierr;
1172 
1173   PetscFunctionBegin;
1174   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1175   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1176   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1177   ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1178   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1179   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1180   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatSolve_SeqSBAIJ_3"
1186 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
1187 {
1188   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1189   IS             isrow=a->row;
1190   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1191   PetscErrorCode ierr;
1192   PetscInt       nz,*vj,k,*r,idx;
1193   MatScalar      *aa=a->a,*v,*diag;
1194   PetscScalar    *x,*b,x0,x1,x2,*t,*tp;
1195 
1196   PetscFunctionBegin;
1197   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1198   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1199   t  = a->solve_work;
1200   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1201 
1202   /* solve U^T * D * y = b by forward substitution */
1203   tp = t;
1204   for (k=0; k<mbs; k++) { /* t <- perm(b) */
1205     idx   = 3*r[k];
1206     tp[0] = b[idx];
1207     tp[1] = b[idx+1];
1208     tp[2] = b[idx+2];
1209     tp += 3;
1210   }
1211 
1212   for (k=0; k<mbs; k++){
1213     v  = aa + 9*ai[k];
1214     vj = aj + ai[k];
1215     tp = t + k*3;
1216     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
1217     nz = ai[k+1] - ai[k];
1218 
1219     tp = t + (*vj)*3;
1220     while (nz--) {
1221       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1222       tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1223       tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1224       vj++; tp = t + (*vj)*3;
1225       v += 9;
1226     }
1227 
1228     /* xk = inv(Dk)*(Dk*xk) */
1229     diag  = aa+k*9;          /* ptr to inv(Dk) */
1230     tp    = t + k*3;
1231     tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1232     tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1233     tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1234   }
1235 
1236   /* solve U*x = y by back substitution */
1237   for (k=mbs-1; k>=0; k--){
1238     v  = aa + 9*ai[k];
1239     vj = aj + ai[k];
1240     tp    = t + k*3;
1241     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];  /* xk */
1242     nz = ai[k+1] - ai[k];
1243 
1244     tp = t + (*vj)*3;
1245     while (nz--) {
1246       /* xk += U(k,:)*x(:) */
1247       x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
1248       x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
1249       x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
1250       vj++; tp = t + (*vj)*3;
1251       v += 9;
1252     }
1253     tp    = t + k*3;
1254     tp[0] = x0; tp[1] = x1; tp[2] = x2;
1255     idx      = 3*r[k];
1256     x[idx]   = x0;
1257     x[idx+1] = x1;
1258     x[idx+2] = x2;
1259   }
1260 
1261   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1262   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1263   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1264   ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private"
1270 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1271 {
1272   MatScalar      *v,*diag;
1273   PetscScalar    *xp,x0,x1,x2;
1274   PetscInt       nz,*vj,k;
1275 
1276   PetscFunctionBegin;
1277   for (k=0; k<mbs; k++){
1278     v  = aa + 9*ai[k];
1279     xp = x + k*3;
1280     x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
1281     nz = ai[k+1] - ai[k];
1282     vj = aj + ai[k];
1283     xp = x + (*vj)*3;
1284     while (nz--) {
1285       /* x(:) += U(k,:)^T*(Dk*xk) */
1286       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1287       xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1288       xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1289       vj++; xp = x + (*vj)*3;
1290       v += 9;
1291     }
1292     /* xk = inv(Dk)*(Dk*xk) */
1293     diag = aa+k*9;          /* ptr to inv(Dk) */
1294     xp   = x + k*3;
1295     xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1296     xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1297     xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private"
1304 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1305 {
1306   MatScalar      *v;
1307   PetscScalar    *xp,x0,x1,x2;
1308   PetscInt       nz,*vj,k;
1309 
1310   PetscFunctionBegin;
1311   for (k=mbs-1; k>=0; k--){
1312     v  = aa + 9*ai[k];
1313     xp = x + k*3;
1314     x0 = xp[0]; x1 = xp[1]; x2 = xp[2];  /* xk */
1315     nz = ai[k+1] - ai[k];
1316     vj = aj + ai[k];
1317     xp = x + (*vj)*3;
1318     while (nz--) {
1319       /* xk += U(k,:)*x(:) */
1320       x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
1321       x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
1322       x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
1323       vj++;
1324       v += 9; xp = x + (*vj)*3;
1325     }
1326     xp = x + k*3;
1327     xp[0] = x0; xp[1] = x1; xp[2] = x2;
1328   }
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering"
1334 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1335 {
1336   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1337   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1338   MatScalar      *aa=a->a;
1339   PetscScalar    *x,*b;
1340   PetscErrorCode ierr;
1341 
1342   PetscFunctionBegin;
1343   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1344   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1345 
1346   /* solve U^T * D * y = b by forward substitution */
1347   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1348   ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1349 
1350   /* solve U*x = y by back substitution */
1351   ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1352 
1353   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1354   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1355   ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering"
1361 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1362 {
1363   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1364   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1365   MatScalar      *aa=a->a;
1366   PetscScalar    *x,*b;
1367   PetscErrorCode ierr;
1368 
1369   PetscFunctionBegin;
1370   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1371   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1372   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1373   ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1374   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1375   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1376   ierr = PetscLogFlops(9*(a->nz + mbs));CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering"
1382 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1383 {
1384   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1385   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1386   MatScalar      *aa=a->a;
1387   PetscScalar    *x,*b;
1388   PetscErrorCode ierr;
1389 
1390   PetscFunctionBegin;
1391   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1392   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1393   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1394   ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1395   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1396   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1397   ierr = PetscLogFlops(9*a->nz);CHKERRQ(ierr);
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatSolve_SeqSBAIJ_2"
1403 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
1404 {
1405   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ *)A->data;
1406   IS             isrow=a->row;
1407   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1408   PetscErrorCode ierr;
1409   PetscInt       nz,*vj,k,k2,*r,idx;
1410   MatScalar      *aa=a->a,*v,*diag;
1411   PetscScalar    *x,*b,x0,x1,*t;
1412 
1413   PetscFunctionBegin;
1414   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1415   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1416   t  = a->solve_work;
1417   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1418 
1419   /* solve U^T * D * y = perm(b) by forward substitution */
1420   for (k=0; k<mbs; k++) {  /* t <- perm(b) */
1421     idx = 2*r[k];
1422     t[k*2]   = b[idx];
1423     t[k*2+1] = b[idx+1];
1424   }
1425   for (k=0; k<mbs; k++){
1426     v  = aa + 4*ai[k];
1427     vj = aj + ai[k];
1428     k2 = k*2;
1429     x0 = t[k2]; x1 = t[k2+1];
1430     nz = ai[k+1] - ai[k];
1431     while (nz--) {
1432       t[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1433       t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1434       vj++; v += 4;
1435     }
1436     diag = aa+k*4;  /* ptr to inv(Dk) */
1437     t[k2]   = diag[0]*x0 + diag[2]*x1;
1438     t[k2+1] = diag[1]*x0 + diag[3]*x1;
1439   }
1440 
1441   /* solve U*x = y by back substitution */
1442   for (k=mbs-1; k>=0; k--){
1443     v  = aa + 4*ai[k];
1444     vj = aj + ai[k];
1445     k2 = k*2;
1446     x0 = t[k2]; x1 = t[k2+1];
1447     nz = ai[k+1] - ai[k];
1448     while (nz--) {
1449       x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1450       x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1451       vj++; v += 4;
1452     }
1453     t[k2]    = x0;
1454     t[k2+1]  = x1;
1455     idx      = 2*r[k];
1456     x[idx]   = x0;
1457     x[idx+1] = x1;
1458   }
1459 
1460   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1461   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1462   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1463   ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr);
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private"
1469 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1470 {
1471   MatScalar      *v,*diag;
1472   PetscScalar    x0,x1;
1473   PetscInt       nz,*vj,k,k2;
1474 
1475   PetscFunctionBegin;
1476   for (k=0; k<mbs; k++){
1477     v  = aa + 4*ai[k];
1478     vj = aj + ai[k];
1479     k2 = k*2;
1480     x0 = x[k2]; x1 = x[k2+1];  /* Dk*xk = k-th block of x */
1481     nz = ai[k+1] - ai[k];
1482 
1483     while (nz--) {
1484       /* x(:) += U(k,:)^T*(Dk*xk) */
1485       x[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1486       x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1487       vj++; v += 4;
1488     }
1489     /* xk = inv(Dk)*(Dk*xk) */
1490     diag = aa+k*4;          /* ptr to inv(Dk) */
1491     x[k2]   = diag[0]*x0 + diag[2]*x1;
1492     x[k2+1] = diag[1]*x0 + diag[3]*x1;
1493   }
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 #undef __FUNCT__
1498 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private"
1499 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1500 {
1501   MatScalar      *v;
1502   PetscScalar    x0,x1;
1503   PetscInt       nz,*vj,k,k2;
1504 
1505   PetscFunctionBegin;
1506   for (k=mbs-1; k>=0; k--){
1507     v  = aa + 4*ai[k];
1508     vj = aj + ai[k];
1509     k2 = k*2;
1510     x0 = x[k2]; x1 = x[k2+1];  /* xk */
1511     nz = ai[k+1] - ai[k];
1512     while (nz--) {
1513       /* xk += U(k,:)*x(:) */
1514       x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1515       x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1516       vj++; v += 4;
1517     }
1518     x[k2]     = x0;
1519     x[k2+1]   = x1;
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering"
1526 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1527 {
1528   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1529   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1530   MatScalar      *aa=a->a;
1531   PetscScalar    *x,*b;
1532   PetscErrorCode ierr;
1533 
1534   PetscFunctionBegin;
1535   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1536   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1537 
1538   /* solve U^T * D * y = b by forward substitution */
1539   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1540   ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1541 
1542   /* solve U*x = y by back substitution */
1543   ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1544 
1545   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1546   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1547   ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering"
1553 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1554 {
1555   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1556   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1557   MatScalar      *aa=a->a;
1558   PetscScalar    *x,*b;
1559   PetscErrorCode ierr;
1560 
1561   PetscFunctionBegin;
1562   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1563   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1564   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1565   ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1566   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1567   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1568   ierr = PetscLogFlops(4*(a->nz + mbs));CHKERRQ(ierr);
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering"
1574 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1575 {
1576   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1577   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1578   MatScalar      *aa=a->a;
1579   PetscScalar    *x,*b;
1580   PetscErrorCode ierr;
1581 
1582   PetscFunctionBegin;
1583   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1584   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1585   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1586   ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1587   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1588   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1589   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 #undef __FUNCT__
1594 #define __FUNCT__ "MatSolve_SeqSBAIJ_1"
1595 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1596 {
1597   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1598   IS             isrow=a->row;
1599   PetscErrorCode ierr;
1600   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j,*rp;
1601   MatScalar      *aa=a->a,*v;
1602   PetscScalar    *x,*b,xk,*t;
1603   PetscInt       nz,*vj,k;
1604 
1605   PetscFunctionBegin;
1606   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1607   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1608   t    = a->solve_work;
1609   ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1610 
1611   /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */
1612   for (k=0; k<mbs; k++) t[k] = b[rp[k]];
1613   for (k=0; k<mbs; k++){
1614     v  = aa + ai[k] + 1;
1615     vj = aj + ai[k] + 1;
1616     xk = t[k];
1617     nz = ai[k+1] - ai[k] - 1;
1618     while (nz--) t[*vj++] += (*v++) * xk;
1619     t[k] = xk*aa[ai[k]];  /* aa[k] = 1/D(k) */
1620   }
1621 
1622   /* solve U*perm(x) = y by back substitution */
1623   for (k=mbs-1; k>=0; k--){
1624     v  = aa + ai[k] + 1;
1625     vj = aj + ai[k] + 1;
1626     nz = ai[k+1] - ai[k] - 1;
1627     while (nz--) t[k] += (*v++) * t[*vj++];
1628     x[rp[k]] = t[k];
1629   }
1630 
1631   ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1632   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1633   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1634   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1"
1640 PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1641 {
1642   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1643   IS             isrow=a->row;
1644   PetscErrorCode ierr;
1645   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j,*rp;
1646   MatScalar      *aa=a->a,*v;
1647   PetscReal      diagk;
1648   PetscScalar    *x,*b,xk;
1649   PetscInt       nz,*vj,k;
1650 
1651   PetscFunctionBegin;
1652   /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */
1653   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1654   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1655   ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1656 
1657   for (k=0; k<mbs; k++) x[k] = b[rp[k]];
1658   for (k=0; k<mbs; k++){
1659     v  = aa + ai[k] + 1;
1660     vj = aj + ai[k] + 1;
1661     xk = x[k];
1662     nz = ai[k+1] - ai[k] - 1;
1663     while (nz--) x[*vj++] += (*v++) * xk;
1664 
1665     diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1666     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1667     x[k] = xk*sqrt(diagk);
1668   }
1669   ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1670   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1671   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1672   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1"
1678 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1679 {
1680   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1681   IS             isrow=a->row;
1682   PetscErrorCode ierr;
1683   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j,*rp;
1684   MatScalar      *aa=a->a,*v;
1685   PetscReal      diagk;
1686   PetscScalar    *x,*b,*t;
1687   PetscInt       nz,*vj,k;
1688 
1689   PetscFunctionBegin;
1690   /* solve D^(1/2)*U*perm(x) = b by back substitution */
1691   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1692   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1693   t    = a->solve_work;
1694   ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1695 
1696   for (k=mbs-1; k>=0; k--){
1697     v  = aa + ai[k] + 1;
1698     vj = aj + ai[k] + 1;
1699     diagk = PetscRealPart(aa[ai[k]]);
1700     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1701     t[k] = b[k] * sqrt(diagk);
1702     nz = ai[k+1] - ai[k] - 1;
1703     while (nz--) t[k] += (*v++) * t[*vj++];
1704     x[rp[k]] = t[k];
1705   }
1706   ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1707   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1708   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1709   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatSolves_SeqSBAIJ_1"
1715 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1716 {
1717   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1718   PetscErrorCode ierr;
1719 
1720   PetscFunctionBegin;
1721   if (A->rmap->bs == 1) {
1722     ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr);
1723   } else {
1724     IS              isrow=a->row;
1725     PetscInt             mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,i;
1726     MatScalar       *aa=a->a,*v;
1727     PetscScalar     *x,*b,*t;
1728     PetscInt             nz,*vj,k,n;
1729     if (bb->n > a->solves_work_n) {
1730       ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
1731       ierr = PetscMalloc(bb->n*A->rmap->N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr);
1732       a->solves_work_n = bb->n;
1733     }
1734     n    = bb->n;
1735     ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr);
1736     ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr);
1737     t    = a->solves_work;
1738 
1739     ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1740 
1741     /* solve U^T*D*y = perm(b) by forward substitution */
1742     for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs];} /* values are stored interlaced in t */
1743     for (k=0; k<mbs; k++){
1744       v  = aa + ai[k];
1745       vj = aj + ai[k];
1746       nz = ai[k+1] - ai[k];
1747       while (nz--) {
1748         for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1749         v++;vj++;
1750       }
1751       for (i=0; i<n; i++) t[n*k+i] *= aa[k];  /* note: aa[k] = 1/D(k) */
1752     }
1753 
1754     /* solve U*perm(x) = y by back substitution */
1755     for (k=mbs-1; k>=0; k--){
1756       v  = aa + ai[k];
1757       vj = aj + ai[k];
1758       nz = ai[k+1] - ai[k];
1759       while (nz--) {
1760         for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1761         v++;vj++;
1762       }
1763       for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i];
1764     }
1765 
1766     ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1767     ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr);
1768     ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr);
1769     ierr = PetscLogFlops(bb->n*(4*a->nz + A->rmap->N));CHKERRQ(ierr);
1770   }
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 /*
1775       Special case where the matrix was ILU(0) factored in the natural
1776    ordering. This eliminates the need for the column and row permutation.
1777 */
1778 #undef __FUNCT__
1779 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering"
1780 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1781 {
1782   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1783   PetscErrorCode ierr;
1784   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1785   MatScalar      *aa=a->a,*v;
1786   PetscScalar    *x,*b,xk;
1787   PetscInt       nz,*vj,k;
1788 
1789   PetscFunctionBegin;
1790   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1791   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1792 
1793   /* solve U^T*D*y = b by forward substitution */
1794   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1795   for (k=0; k<mbs; k++){
1796     v  = aa + ai[k] + 1;
1797     vj = aj + ai[k] + 1;
1798     xk = x[k];
1799     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1800     while (nz--) x[*vj++] += (*v++) * xk;
1801     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1802   }
1803 
1804   /* solve U*x = y by back substitution */
1805   for (k=mbs-2; k>=0; k--){
1806     v  = aa + ai[k] + 1;
1807     vj = aj + ai[k] + 1;
1808     xk = x[k];
1809     nz = ai[k+1] - ai[k] - 1;
1810     while (nz--) xk += (*v++) * x[*vj++];
1811     x[k] = xk;
1812   }
1813 
1814   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1815   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1816   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
1817   PetscFunctionReturn(0);
1818 }
1819 
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering"
1822 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1823 {
1824   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1825   PetscErrorCode ierr;
1826   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1827   MatScalar      *aa=a->a,*v;
1828   PetscReal      diagk;
1829   PetscScalar    *x,*b;
1830   PetscInt       nz,*vj,k;
1831 
1832   PetscFunctionBegin;
1833   /* solve U^T*D^(1/2)*x = b by forward substitution */
1834   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1835   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1836   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1837   for (k=0; k<mbs; k++){
1838     v  = aa + ai[k] + 1;
1839     vj = aj + ai[k] + 1;
1840     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1841     while (nz--) x[*vj++] += (*v++) * x[k];
1842     diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1843     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ2(PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[ai[k]]),PetscImaginaryPart(aa[ai[k]]));
1844     x[k] *= sqrt(diagk);
1845   }
1846   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1847   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1848   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 #undef __FUNCT__
1853 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering"
1854 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1855 {
1856   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1857   PetscErrorCode ierr;
1858   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1859   MatScalar      *aa=a->a,*v;
1860   PetscReal      diagk;
1861   PetscScalar    *x,*b;
1862   PetscInt       nz,*vj,k;
1863 
1864   PetscFunctionBegin;
1865   /* solve D^(1/2)*U*x = b by back substitution */
1866   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1867   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1868 
1869   for (k=mbs-1; k>=0; k--){
1870     v  = aa + ai[k] + 1;
1871     vj = aj + ai[k] + 1;
1872     diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1873     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1874     x[k] = sqrt(diagk)*b[k];
1875     nz = ai[k+1] - ai[k] - 1;
1876     while (nz--) x[k] += (*v++) * x[*vj++];
1877   }
1878   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1879   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1880   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 extern PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat,PetscTruth);
1885 
1886 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
1887 #undef __FUNCT__
1888 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR"
1889 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B)
1890 {
1891   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
1892   PetscErrorCode ierr;
1893   PetscInt       *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j;
1894   PetscInt       *jutmp,bs = A->rmap->bs,bs2=a->bs2;
1895   PetscInt       m,reallocs = 0,*levtmp;
1896   PetscInt       *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
1897   PetscInt       incrlev,*lev,shift,prow,nz;
1898   PetscReal      f = info->fill,levels = info->levels;
1899   PetscTruth     perm_identity;
1900 
1901   PetscFunctionBegin;
1902   /* check whether perm is the identity mapping */
1903   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1904 
1905   if (perm_identity){
1906     a->permute = PETSC_FALSE;
1907     ai = a->i; aj = a->j;
1908   } else { /*  non-trivial permutation */
1909     a->permute = PETSC_TRUE;
1910     ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
1911     ai = a->inew; aj = a->jnew;
1912   }
1913 
1914   /* initialization */
1915   ierr  = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1916   umax  = (PetscInt)(f*ai[mbs] + 1);
1917   ierr  = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr);
1918   umax += mbs + 1;
1919   shift = mbs + 1;
1920   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr);
1921   ierr  = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr);
1922   iu[0] = mbs + 1;
1923   juidx = mbs + 1;
1924   /* prowl: linked list for pivot row */
1925   ierr    = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr);
1926   /* q: linked list for col index */
1927   q       = prowl + mbs;
1928   levtmp  = q     + mbs;
1929 
1930   for (i=0; i<mbs; i++){
1931     prowl[i] = mbs;
1932     q[i] = 0;
1933   }
1934 
1935   /* for each row k */
1936   for (k=0; k<mbs; k++){
1937     nzk  = 0;
1938     q[k] = mbs;
1939     /* copy current row into linked list */
1940     nz = ai[rip[k]+1] - ai[rip[k]];
1941     j = ai[rip[k]];
1942     while (nz--){
1943       vj = rip[aj[j++]];
1944       if (vj > k){
1945         qm = k;
1946         do {
1947           m  = qm; qm = q[m];
1948         } while(qm < vj);
1949         if (qm == vj) {
1950           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
1951         }
1952         nzk++;
1953         q[m]       = vj;
1954         q[vj]      = qm;
1955         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1956       }
1957     }
1958 
1959     /* modify nonzero structure of k-th row by computing fill-in
1960        for each row prow to be merged in */
1961     prow = k;
1962     prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
1963 
1964     while (prow < k){
1965       /* merge row prow into k-th row */
1966       jmin = iu[prow] + 1;
1967       jmax = iu[prow+1];
1968       qm = k;
1969       for (j=jmin; j<jmax; j++){
1970         incrlev = lev[j-shift] + 1;
1971 	if (incrlev > levels) continue;
1972         vj      = ju[j];
1973         do {
1974           m = qm; qm = q[m];
1975         } while (qm < vj);
1976         if (qm != vj){      /* a fill */
1977           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1978           levtmp[vj] = incrlev;
1979         } else {
1980           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1981         }
1982       }
1983       prow = prowl[prow]; /* next pivot row */
1984     }
1985 
1986     /* add k to row list for first nonzero element in k-th row */
1987     if (nzk > 1){
1988       i = q[k]; /* col value of first nonzero element in k_th row of U */
1989       prowl[k] = prowl[i]; prowl[i] = k;
1990     }
1991     iu[k+1] = iu[k] + nzk;
1992 
1993     /* allocate more space to ju and lev if needed */
1994     if (iu[k+1] > umax) {
1995       /* estimate how much additional space we will need */
1996       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1997       /* just double the memory each time */
1998       maxadd = umax;
1999       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
2000       umax += maxadd;
2001 
2002       /* allocate a longer ju */
2003       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
2004       ierr     = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr);
2005       ierr     = PetscFree(ju);CHKERRQ(ierr);
2006       ju       = jutmp;
2007 
2008       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
2009       ierr     = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr);
2010       ierr     = PetscFree(lev);CHKERRQ(ierr);
2011       lev      = jutmp;
2012       reallocs += 2; /* count how many times we realloc */
2013     }
2014 
2015     /* save nonzero structure of k-th row in ju */
2016     i=k;
2017     while (nzk --) {
2018       i                = q[i];
2019       ju[juidx]        = i;
2020       lev[juidx-shift] = levtmp[i];
2021       juidx++;
2022     }
2023   }
2024 
2025 #if defined(PETSC_USE_INFO)
2026   if (ai[mbs] != 0) {
2027     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
2028     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2029     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2030     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
2031     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2032   } else {
2033     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2034   }
2035 #endif
2036 
2037   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2038   ierr = PetscFree(prowl);CHKERRQ(ierr);
2039   ierr = PetscFree(lev);CHKERRQ(ierr);
2040 
2041   /* put together the new matrix */
2042   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
2043 
2044   /* ierr = PetscLogObjectParent(*B,iperm);CHKERRQ(ierr); */
2045   b    = (Mat_SeqSBAIJ*)(*B)->data;
2046   ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr);
2047   b->singlemalloc = PETSC_FALSE;
2048   b->free_a       = PETSC_TRUE;
2049   b->free_ij      = PETSC_TRUE;
2050   /* the next line frees the default space generated by the Create() */
2051   ierr    = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr);
2052   ierr    = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
2053   b->j    = ju;
2054   b->i    = iu;
2055   b->diag = 0;
2056   b->ilen = 0;
2057   b->imax = 0;
2058 
2059   if (b->row) {
2060     ierr = ISDestroy(b->row);CHKERRQ(ierr);
2061   }
2062   if (b->icol) {
2063     ierr = ISDestroy(b->icol);CHKERRQ(ierr);
2064   }
2065   b->row  = perm;
2066   b->icol = perm;
2067   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2068   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2069   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2070   /* In b structure:  Free imax, ilen, old a, old j.
2071      Allocate idnew, solve_work, new a, new j */
2072   ierr    = PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2073   b->maxnz = b->nz = iu[mbs];
2074 
2075   (*B)->factor                 = MAT_FACTOR_CHOLESKY;
2076   (*B)->info.factor_mallocs    = reallocs;
2077   (*B)->info.fill_ratio_given  = f;
2078   if (ai[mbs] != 0) {
2079     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
2080   } else {
2081     (*B)->info.fill_ratio_needed = 0.0;
2082   }
2083   ierr = MatSeqSBAIJSetNumericFactorization(*B,perm_identity);CHKERRQ(ierr);
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #include "petscbt.h"
2088 #include "src/mat/utils/freespace.h"
2089 #undef __FUNCT__
2090 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ"
2091 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
2092 {
2093   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
2094   Mat_SeqSBAIJ       *b;
2095   PetscErrorCode     ierr;
2096   PetscTruth         perm_identity,free_ij = PETSC_TRUE,missing;
2097   PetscInt           bs=A->rmap->bs,am=a->mbs,d;
2098   PetscInt           reallocs=0,*rip,i,*ai,*aj,*ui;
2099   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2100   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
2101   PetscReal          fill=info->fill,levels=info->levels,ratio_needed;
2102   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2103   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2104   PetscBT            lnkbt;
2105 
2106   PetscFunctionBegin;
2107   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2108   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2109 
2110   /*
2111    This code originally uses Modified Sparse Row (MSR) storage
2112    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
2113    Then it is rewritten so the factor B takes seqsbaij format. However the associated
2114    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1,
2115    thus the original code in MSR format is still used for these cases.
2116    The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever
2117    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
2118   */
2119   if (bs > 1){
2120     ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr);
2121     PetscFunctionReturn(0);
2122   }
2123 
2124   /* check whether perm is the identity mapping */
2125   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2126 
2127   /* special case that simply copies fill pattern */
2128   if (!levels && perm_identity) {
2129     a->permute = PETSC_FALSE;
2130     /* reuse the column pointers and row offsets for memory savings */
2131     ui           = a->i;
2132     uj           = a->j;
2133     free_ij      = PETSC_FALSE;
2134     ratio_needed = 1.0;
2135   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2136     if (perm_identity){
2137       a->permute = PETSC_FALSE;
2138       ai = a->i; aj = a->j;
2139     } else { /*  non-trivial permutation */
2140       a->permute = PETSC_TRUE;
2141       ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
2142       ai   = a->inew; aj = a->jnew;
2143     }
2144     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2145 
2146     /* initialization */
2147     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2148     ui[0] = 0;
2149 
2150     /* jl: linked list for storing indices of the pivot rows
2151        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2152     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
2153     il         = jl + am;
2154     uj_ptr     = (PetscInt**)(il + am);
2155     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
2156     for (i=0; i<am; i++){
2157       jl[i] = am; il[i] = 0;
2158     }
2159 
2160     /* create and initialize a linked list for storing column indices of the active row k */
2161     nlnk = am + 1;
2162     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2163 
2164     /* initial FreeSpace size is fill*(ai[am]+1) */
2165     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2166     current_space = free_space;
2167     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2168     current_space_lvl = free_space_lvl;
2169 
2170     for (k=0; k<am; k++){  /* for each active row k */
2171       /* initialize lnk by the column indices of row rip[k] */
2172       nzk   = 0;
2173       ncols = ai[rip[k]+1] - ai[rip[k]];
2174       cols  = aj+ai[rip[k]];
2175       ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2176       nzk += nlnk;
2177 
2178       /* update lnk by computing fill-in for each pivot row to be merged in */
2179       prow = jl[k]; /* 1st pivot row */
2180 
2181       while (prow < k){
2182         nextprow = jl[prow];
2183 
2184         /* merge prow into k-th row */
2185         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2186         jmax = ui[prow+1];
2187         ncols = jmax-jmin;
2188         i     = jmin - ui[prow];
2189         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2190         j     = *(uj_lvl_ptr[prow] + i - 1);
2191         cols_lvl = uj_lvl_ptr[prow]+i;
2192         ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2193         nzk += nlnk;
2194 
2195         /* update il and jl for prow */
2196         if (jmin < jmax){
2197           il[prow] = jmin;
2198           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2199         }
2200         prow = nextprow;
2201       }
2202 
2203       /* if free space is not available, make more free space */
2204       if (current_space->local_remaining<nzk) {
2205         i = am - k + 1; /* num of unfactored rows */
2206         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2207         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2208         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2209         reallocs++;
2210       }
2211 
2212       /* copy data into free_space and free_space_lvl, then initialize lnk */
2213       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2214 
2215       /* add the k-th row into il and jl */
2216       if (nzk-1 > 0){
2217         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2218         jl[k] = jl[i]; jl[i] = k;
2219         il[k] = ui[k] + 1;
2220       }
2221       uj_ptr[k]     = current_space->array;
2222       uj_lvl_ptr[k] = current_space_lvl->array;
2223 
2224       current_space->array           += nzk;
2225       current_space->local_used      += nzk;
2226       current_space->local_remaining -= nzk;
2227       current_space_lvl->array           += nzk;
2228       current_space_lvl->local_used      += nzk;
2229       current_space_lvl->local_remaining -= nzk;
2230 
2231       ui[k+1] = ui[k] + nzk;
2232     }
2233 
2234 #if defined(PETSC_USE_INFO)
2235     if (ai[am] != 0) {
2236       PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2237       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2238       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2239       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2240     } else {
2241       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2242     }
2243 #endif
2244 
2245     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2246     ierr = PetscFree(jl);CHKERRQ(ierr);
2247 
2248     /* destroy list of free space and other temporary array(s) */
2249     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2250     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2251     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2252     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2253     if (ai[am] != 0) {
2254       ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2255     } else {
2256       ratio_needed = 0.0;
2257     }
2258   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2259 
2260   /* put together the new matrix in MATSEQSBAIJ format */
2261   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2262 
2263   b = (Mat_SeqSBAIJ*)(*fact)->data;
2264   ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr);
2265   b->singlemalloc = PETSC_FALSE;
2266   b->free_a  = PETSC_TRUE;
2267   b->free_ij = free_ij;
2268   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2269   b->j    = uj;
2270   b->i    = ui;
2271   b->diag    = 0;
2272   b->ilen    = 0;
2273   b->imax    = 0;
2274   b->row     = perm;
2275   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2276   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2277   b->icol = perm;
2278   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2279   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2280   b->maxnz = b->nz = ui[am];
2281 
2282   (*fact)->factor                 = MAT_FACTOR_CHOLESKY;
2283   (*fact)->info.factor_mallocs    = reallocs;
2284   (*fact)->info.fill_ratio_given  = fill;
2285   (*fact)->info.fill_ratio_needed = ratio_needed;
2286   ierr = MatSeqSBAIJSetNumericFactorization(*fact,perm_identity);CHKERRQ(ierr);
2287   PetscFunctionReturn(0);
2288 }
2289 
2290