xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact2.c (revision 7d6bfa3b9d7db0ccd4cc481237114ca8dbb0dbff)
1 #define PETSCMAT_DLL
2 
3 /*
4     Factorization code for SBAIJ format.
5 */
6 
7 #include "../src/mat/impls/sbaij/seq/sbaij.h"
8 #include "../src/mat/impls/baij/seq/baij.h"
9 #include "../src/inline/ilu.h"
10 #include "../src/inline/dot.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSolve_SeqSBAIJ_N"
14 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
15 {
16   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
17   IS              isrow=a->row;
18   PetscInt        mbs=a->mbs,*ai=a->i,*aj=a->j;
19   PetscErrorCode  ierr;
20   const PetscInt  *r;
21   PetscInt        nz,*vj,k,idx,k1;
22   PetscInt        bs=A->rmap->bs,bs2 = a->bs2;
23   MatScalar       *aa=a->a,*v,*diag;
24   PetscScalar     *x,*xk,*xj,*b,*xk_tmp,*t;
25 
26   PetscFunctionBegin;
27   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
28   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
29   t  = a->solve_work;
30   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
31   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
32 
33   /* solve U^T * D * y = b by forward substitution */
34   xk = t;
35   for (k=0; k<mbs; k++) { /* t <- perm(b) */
36     idx   = bs*r[k];
37     for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
38   }
39   for (k=0; k<mbs; k++){
40     v  = aa + bs2*ai[k];
41     xk = t + k*bs;      /* Dk*xk = k-th block of x */
42     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
43     nz = ai[k+1] - ai[k];
44     vj = aj + ai[k];
45     xj = t + (*vj)*bs;  /* *vj-th block of x, *vj>k */
46     while (nz--) {
47       /* x(:) += U(k,:)^T*(Dk*xk) */
48       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
49       vj++; xj = t + (*vj)*bs;
50       v += bs2;
51     }
52     /* xk = inv(Dk)*(Dk*xk) */
53     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
54     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
55   }
56 
57   /* solve U*x = y by back substitution */
58   for (k=mbs-1; k>=0; k--){
59     v  = aa + bs2*ai[k];
60     xk = t + k*bs;        /* xk */
61     nz = ai[k+1] - ai[k];
62     vj = aj + ai[k];
63     xj = t + (*vj)*bs;
64     while (nz--) {
65       /* xk += U(k,:)*x(:) */
66       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
67       vj++;
68       v += bs2; xj = t + (*vj)*bs;
69     }
70     idx   = bs*r[k];
71     for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
72   }
73 
74   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
75   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
76   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
77   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
78   ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N"
84 PetscErrorCode MatForwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
85 {
86   PetscFunctionBegin;
87   SETERRQ(1,"not implemented yet");
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N"
93 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
94 {
95   PetscFunctionBegin;
96   SETERRQ(1,"not implemented yet");
97   PetscFunctionReturn(0);
98 }
99 
100 #undef __FUNCT__
101 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private"
102 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
103 {
104   PetscErrorCode ierr;
105   PetscInt       nz,*vj,k;
106   PetscInt       bs2 = bs*bs;
107   MatScalar      *v,*diag;
108   PetscScalar    *xk,*xj,*xk_tmp;
109 
110   PetscFunctionBegin;
111   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
112   for (k=0; k<mbs; k++){
113     v  = aa + bs2*ai[k];
114     xk = x + k*bs;      /* Dk*xk = k-th block of x */
115     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
116     nz = ai[k+1] - ai[k];
117     vj = aj + ai[k];
118     xj = x + (*vj)*bs;  /* *vj-th block of x, *vj>k */
119     while (nz--) {
120       /* x(:) += U(k,:)^T*(Dk*xk) */
121       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
122       vj++; xj = x + (*vj)*bs;
123       v += bs2;
124     }
125     /* xk = inv(Dk)*(Dk*xk) */
126     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
127     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
128   }
129   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private"
135 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
136 {
137   PetscInt       nz,*vj,k;
138   PetscInt       bs2 = bs*bs;
139   MatScalar      *v;
140   PetscScalar    *xk,*xj;
141 
142   PetscFunctionBegin;
143   for (k=mbs-1; k>=0; k--){
144     v  = aa + bs2*ai[k];
145     xk = x + k*bs;        /* xk */
146     nz = ai[k+1] - ai[k];
147     vj = aj + ai[k];
148     xj = x + (*vj)*bs;
149     while (nz--) {
150       /* xk += U(k,:)*x(:) */
151       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
152       vj++;
153       v += bs2; xj = x + (*vj)*bs;
154     }
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNCT__
160 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering"
161 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
162 {
163   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
164   PetscErrorCode ierr;
165   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
166   PetscInt       bs=A->rmap->bs;
167   MatScalar      *aa=a->a;
168   PetscScalar    *x,*b;
169 #if defined(PETSC_USE_LOG)
170   PetscInt       bs2 = a->bs2;
171 #endif
172 
173   PetscFunctionBegin;
174   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
175   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
176 
177   /* solve U^T * D * y = b by forward substitution */
178   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
179   ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
180 
181   /* solve U*x = y by back substitution */
182   ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
183 
184   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
185   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
186   ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering"
192 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
193 {
194   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
195   PetscErrorCode ierr;
196   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
197   PetscInt       bs=A->rmap->bs;
198   MatScalar      *aa=a->a;
199   PetscScalar    *x,*b;
200 #if defined(PETSC_USE_LOG)
201   PetscInt       bs2 = a->bs2;
202 #endif
203 
204   PetscFunctionBegin;
205   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
206   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
207   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
208   ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
209   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
210   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
211   ierr = PetscLogFlops(bs2*a->nz + A->rmap->N);CHKERRQ(ierr);
212   PetscFunctionReturn(0);
213 }
214 
215 #undef __FUNCT__
216 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering"
217 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
218 {
219   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
220   PetscErrorCode ierr;
221   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
222   PetscInt       bs=A->rmap->bs;
223   MatScalar      *aa=a->a;
224   PetscScalar    *x,*b;
225 #if defined(PETSC_USE_LOG)
226   PetscInt       bs2 = a->bs2;
227 #endif
228 
229   PetscFunctionBegin;
230   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
231   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
232   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
233   ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
234   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
235   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
236   ierr = PetscLogFlops(bs2*a->nz);CHKERRQ(ierr);
237   PetscFunctionReturn(0);
238 }
239 
240 #undef __FUNCT__
241 #define __FUNCT__ "MatSolve_SeqSBAIJ_7"
242 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
243 {
244   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
245   IS             isrow=a->row;
246   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
247   PetscErrorCode ierr;
248   const PetscInt *r;
249   PetscInt       nz,*vj,k,idx;
250   MatScalar      *aa=a->a,*v,*d;
251   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
252 
253   PetscFunctionBegin;
254   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
255   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
256   t  = a->solve_work;
257   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
258 
259   /* solve U^T * D * y = b by forward substitution */
260   tp = t;
261   for (k=0; k<mbs; k++) { /* t <- perm(b) */
262     idx   = 7*r[k];
263     tp[0] = b[idx];
264     tp[1] = b[idx+1];
265     tp[2] = b[idx+2];
266     tp[3] = b[idx+3];
267     tp[4] = b[idx+4];
268     tp[5] = b[idx+5];
269     tp[6] = b[idx+6];
270     tp += 7;
271   }
272 
273   for (k=0; k<mbs; k++){
274     v  = aa + 49*ai[k];
275     vj = aj + ai[k];
276     tp = t + k*7;
277     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
278     nz = ai[k+1] - ai[k];
279     tp = t + (*vj)*7;
280     while (nz--) {
281       tp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
282       tp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
283       tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
284       tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
285       tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
286       tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
287       tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
288       vj++; tp = t + (*vj)*7;
289       v += 49;
290     }
291 
292     /* xk = inv(Dk)*(Dk*xk) */
293     d  = aa+k*49;          /* ptr to inv(Dk) */
294     tp    = t + k*7;
295     tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
296     tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
297     tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
298     tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
299     tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
300     tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
301     tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
302   }
303 
304   /* solve U*x = y by back substitution */
305   for (k=mbs-1; k>=0; k--){
306     v  = aa + 49*ai[k];
307     vj = aj + ai[k];
308     tp    = t + k*7;
309     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  x6=tp[6]; /* xk */
310     nz = ai[k+1] - ai[k];
311 
312     tp = t + (*vj)*7;
313     while (nz--) {
314       /* xk += U(k,:)*x(:) */
315       x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
316       x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
317       x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
318       x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
319       x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
320       x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
321       x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
322       vj++; tp = t + (*vj)*7;
323       v += 49;
324     }
325     tp    = t + k*7;
326     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
327     idx   = 7*r[k];
328     x[idx]     = x0;
329     x[idx+1]   = x1;
330     x[idx+2]   = x2;
331     x[idx+3]   = x3;
332     x[idx+4]   = x4;
333     x[idx+5]   = x5;
334     x[idx+6]   = x6;
335   }
336 
337   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
338   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
339   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
340   ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr);
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private"
346 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
347 {
348   MatScalar      *v,*d;
349   PetscScalar    *xp,x0,x1,x2,x3,x4,x5,x6;
350   PetscInt       nz,*vj,k;
351 
352   PetscFunctionBegin;
353   for (k=0; k<mbs; k++){
354     v  = aa + 49*ai[k];
355     xp = x + k*7;
356     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
357     nz = ai[k+1] - ai[k];
358     vj = aj + ai[k];
359     xp = x + (*vj)*7;
360     while (nz--) {
361       /* x(:) += U(k,:)^T*(Dk*xk) */
362       xp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
363       xp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
364       xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
365       xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
366       xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
367       xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
368       xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
369       vj++; xp = x + (*vj)*7;
370       v += 49;
371     }
372     /* xk = inv(Dk)*(Dk*xk) */
373     d  = aa+k*49;          /* ptr to inv(Dk) */
374     xp = x + k*7;
375     xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
376     xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
377     xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
378     xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
379     xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
380     xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
381     xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
382   }
383   PetscFunctionReturn(0);
384 }
385 
386 #undef __FUNCT__
387 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private"
388 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
389 {
390   MatScalar      *v;
391   PetscScalar    *xp,x0,x1,x2,x3,x4,x5,x6;
392   PetscInt       nz,*vj,k;
393 
394   PetscFunctionBegin;
395   for (k=mbs-1; k>=0; k--){
396     v  = aa + 49*ai[k];
397     xp = x + k*7;
398     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
399     nz = ai[k+1] - ai[k];
400     vj = aj + ai[k];
401     xp = x + (*vj)*7;
402     while (nz--) {
403       /* xk += U(k,:)*x(:) */
404       x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
405       x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
406       x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
407       x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
408       x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
409       x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
410       x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
411       vj++;
412       v += 49; xp = x + (*vj)*7;
413     }
414     xp = x + k*7;
415     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
416   }
417   PetscFunctionReturn(0);
418 }
419 
420 #undef __FUNCT__
421 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering"
422 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
423 {
424   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
425   PetscErrorCode ierr;
426   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
427   MatScalar      *aa=a->a;
428   PetscScalar    *x,*b;
429 
430   PetscFunctionBegin;
431   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
432   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
433 
434   /* solve U^T * D * y = b by forward substitution */
435   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
436   ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
437 
438   /* solve U*x = y by back substitution */
439   ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
440 
441   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
442   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
443   ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr);
444   PetscFunctionReturn(0);
445 }
446 
447 #undef __FUNCT__
448 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering"
449 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
450 {
451   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
452   PetscErrorCode ierr;
453   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
454   MatScalar      *aa=a->a;
455   PetscScalar    *x,*b;
456 
457   PetscFunctionBegin;
458   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
459   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
460   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
461   ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
462   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
463   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
464   ierr = PetscLogFlops(49*a->nz + mbs);CHKERRQ(ierr);
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering"
470 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
471 {
472   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
473   PetscErrorCode ierr;
474   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
475   MatScalar      *aa=a->a;
476   PetscScalar    *x,*b;
477 
478   PetscFunctionBegin;
479   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
480   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
481   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
482   ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
483   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
484   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
485   ierr = PetscLogFlops(49*a->nz);CHKERRQ(ierr);
486   PetscFunctionReturn(0);
487 }
488 
489 #undef __FUNCT__
490 #define __FUNCT__ "MatSolve_SeqSBAIJ_6"
491 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
492 {
493   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
494   IS             isrow=a->row;
495   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
496   PetscErrorCode ierr;
497   const PetscInt *r;
498   PetscInt       nz,*vj,k,idx;
499   MatScalar      *aa=a->a,*v,*d;
500   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;
501 
502   PetscFunctionBegin;
503   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
504   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
505   t  = a->solve_work;
506   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
507 
508   /* solve U^T * D * y = b by forward substitution */
509   tp = t;
510   for (k=0; k<mbs; k++) { /* t <- perm(b) */
511     idx   = 6*r[k];
512     tp[0] = b[idx];
513     tp[1] = b[idx+1];
514     tp[2] = b[idx+2];
515     tp[3] = b[idx+3];
516     tp[4] = b[idx+4];
517     tp[5] = b[idx+5];
518     tp += 6;
519   }
520 
521   for (k=0; k<mbs; k++){
522     v  = aa + 36*ai[k];
523     vj = aj + ai[k];
524     tp = t + k*6;
525     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
526     nz = ai[k+1] - ai[k];
527     tp = t + (*vj)*6;
528     while (nz--) {
529       tp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
530       tp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
531       tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
532       tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
533       tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
534       tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
535       vj++; tp = t + (*vj)*6;
536       v += 36;
537     }
538 
539     /* xk = inv(Dk)*(Dk*xk) */
540     d  = aa+k*36;          /* ptr to inv(Dk) */
541     tp    = t + k*6;
542     tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
543     tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
544     tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
545     tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
546     tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
547     tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
548   }
549 
550   /* solve U*x = y by back substitution */
551   for (k=mbs-1; k>=0; k--){
552     v  = aa + 36*ai[k];
553     vj = aj + ai[k];
554     tp    = t + k*6;
555     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  /* xk */
556     nz = ai[k+1] - ai[k];
557 
558     tp = t + (*vj)*6;
559     while (nz--) {
560       /* xk += U(k,:)*x(:) */
561       x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
562       x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
563       x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
564       x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
565       x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
566       x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
567       vj++; tp = t + (*vj)*6;
568       v += 36;
569     }
570     tp    = t + k*6;
571     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
572     idx   = 6*r[k];
573     x[idx]     = x0;
574     x[idx+1]   = x1;
575     x[idx+2]   = x2;
576     x[idx+3]   = x3;
577     x[idx+4]   = x4;
578     x[idx+5]   = x5;
579   }
580 
581   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
582   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
583   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
584   ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr);
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private"
590 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
591 {
592   MatScalar      *v,*d;
593   PetscScalar    *xp,x0,x1,x2,x3,x4,x5;
594   PetscInt       nz,*vj,k;
595 
596   PetscFunctionBegin;
597   for (k=0; k<mbs; k++){
598     v  = aa + 36*ai[k];
599     xp = x + k*6;
600     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
601     nz = ai[k+1] - ai[k];
602     vj = aj + ai[k];
603     xp = x + (*vj)*6;
604     while (nz--) {
605       /* x(:) += U(k,:)^T*(Dk*xk) */
606       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
607       xp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
608       xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
609       xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
610       xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
611       xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
612       vj++; xp = x + (*vj)*6;
613       v += 36;
614     }
615     /* xk = inv(Dk)*(Dk*xk) */
616     d  = aa+k*36;          /* ptr to inv(Dk) */
617     xp = x + k*6;
618     xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
619     xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
620     xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
621     xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
622     xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
623     xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
624   }
625   PetscFunctionReturn(0);
626 }
627 #undef __FUNCT__
628 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private"
629 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
630 {
631   MatScalar      *v;
632   PetscScalar    *xp,x0,x1,x2,x3,x4,x5;
633   PetscInt       nz,*vj,k;
634 
635   PetscFunctionBegin;
636   for (k=mbs-1; k>=0; k--){
637     v  = aa + 36*ai[k];
638     xp = x + k*6;
639     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
640     nz = ai[k+1] - ai[k];
641     vj = aj + ai[k];
642     xp = x + (*vj)*6;
643     while (nz--) {
644       /* xk += U(k,:)*x(:) */
645       x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
646       x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
647       x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
648       x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
649       x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
650       x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
651       vj++;
652       v += 36; xp = x + (*vj)*6;
653     }
654     xp = x + k*6;
655     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
656   }
657   PetscFunctionReturn(0);
658 }
659 
660 
661 #undef __FUNCT__
662 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering"
663 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
664 {
665   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
666   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
667   MatScalar      *aa=a->a;
668   PetscScalar    *x,*b;
669   PetscErrorCode ierr;
670 
671   PetscFunctionBegin;
672   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
673   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
674 
675   /* solve U^T * D * y = b by forward substitution */
676   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
677   ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
678 
679   /* solve U*x = y by back substitution */
680   ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
681 
682   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
683   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
684   ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr);
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering"
690 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
691 {
692   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
693   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
694   MatScalar      *aa=a->a;
695   PetscScalar    *x,*b;
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
700   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
701   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
702   ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
703   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
704   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
705   ierr = PetscLogFlops(36*a->nz + mbs);CHKERRQ(ierr);
706   PetscFunctionReturn(0);
707 }
708 
709 #undef __FUNCT__
710 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering"
711 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
712 {
713   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
714   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
715   MatScalar      *aa=a->a;
716   PetscScalar    *x,*b;
717   PetscErrorCode ierr;
718 
719   PetscFunctionBegin;
720   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
721   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
722   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
723   ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
724   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
725   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
726   ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr);
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatSolve_SeqSBAIJ_5"
732 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
733 {
734   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
735   IS             isrow=a->row;
736   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
737   PetscErrorCode ierr;
738   const PetscInt *r;
739   PetscInt       nz,*vj,k,idx;
740   MatScalar      *aa=a->a,*v,*diag;
741   PetscScalar    *x,*b,x0,x1,x2,x3,x4,*t,*tp;
742 
743   PetscFunctionBegin;
744   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
745   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
746   t  = a->solve_work;
747   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
748 
749   /* solve U^T * D * y = b by forward substitution */
750   tp = t;
751   for (k=0; k<mbs; k++) { /* t <- perm(b) */
752     idx   = 5*r[k];
753     tp[0] = b[idx];
754     tp[1] = b[idx+1];
755     tp[2] = b[idx+2];
756     tp[3] = b[idx+3];
757     tp[4] = b[idx+4];
758     tp += 5;
759   }
760 
761   for (k=0; k<mbs; k++){
762     v  = aa + 25*ai[k];
763     vj = aj + ai[k];
764     tp = t + k*5;
765     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
766     nz = ai[k+1] - ai[k];
767 
768     tp = t + (*vj)*5;
769     while (nz--) {
770       tp[0] +=  v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
771       tp[1] +=  v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
772       tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
773       tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
774       tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
775       vj++; tp = t + (*vj)*5;
776       v += 25;
777     }
778 
779     /* xk = inv(Dk)*(Dk*xk) */
780     diag  = aa+k*25;          /* ptr to inv(Dk) */
781     tp    = t + k*5;
782       tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
783       tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
784       tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
785       tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
786       tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
787   }
788 
789   /* solve U*x = y by back substitution */
790   for (k=mbs-1; k>=0; k--){
791     v  = aa + 25*ai[k];
792     vj = aj + ai[k];
793     tp    = t + k*5;
794     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
795     nz = ai[k+1] - ai[k];
796 
797     tp = t + (*vj)*5;
798     while (nz--) {
799       /* xk += U(k,:)*x(:) */
800       x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
801       x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
802       x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
803       x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
804       x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
805       vj++; tp = t + (*vj)*5;
806       v += 25;
807     }
808     tp    = t + k*5;
809     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
810     idx   = 5*r[k];
811     x[idx]     = x0;
812     x[idx+1]   = x1;
813     x[idx+2]   = x2;
814     x[idx+3]   = x3;
815     x[idx+4]   = x4;
816   }
817 
818   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
819   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
820   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
821   ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr);
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private"
827 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
828 {
829   MatScalar      *v,*diag;
830   PetscScalar    *xp,x0,x1,x2,x3,x4;
831   PetscInt       nz,*vj,k;
832 
833   PetscFunctionBegin;
834   for (k=0; k<mbs; k++){
835     v  = aa + 25*ai[k];
836     xp = x + k*5;
837     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
838     nz = ai[k+1] - ai[k];
839     vj = aj + ai[k];
840     xp = x + (*vj)*5;
841     while (nz--) {
842       /* x(:) += U(k,:)^T*(Dk*xk) */
843       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4;
844       xp[1] +=  v[5]*x0 +  v[6]*x1 +  v[7]*x2 + v[8]*x3 + v[9]*x4;
845       xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
846       xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
847       xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
848       vj++; xp = x + (*vj)*5;
849       v += 25;
850     }
851     /* xk = inv(Dk)*(Dk*xk) */
852     diag = aa+k*25;          /* ptr to inv(Dk) */
853     xp   = x + k*5;
854     xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
855     xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
856     xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
857     xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
858     xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
859   }
860   PetscFunctionReturn(0);
861 }
862 
863 #undef __FUNCT__
864 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private"
865 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
866 {
867   MatScalar      *v;
868   PetscScalar    *xp,x0,x1,x2,x3,x4;
869   PetscInt       nz,*vj,k;
870 
871   PetscFunctionBegin;
872   for (k=mbs-1; k>=0; k--){
873     v  = aa + 25*ai[k];
874     xp = x + k*5;
875     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
876     nz = ai[k+1] - ai[k];
877     vj = aj + ai[k];
878     xp = x + (*vj)*5;
879     while (nz--) {
880       /* xk += U(k,:)*x(:) */
881       x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
882       x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
883       x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
884       x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
885       x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
886       vj++;
887       v += 25; xp = x + (*vj)*5;
888     }
889     xp = x + k*5;
890     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
891   }
892   PetscFunctionReturn(0);
893 }
894 
895 #undef __FUNCT__
896 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering"
897 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
898 {
899   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
900   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
901   MatScalar      *aa=a->a;
902   PetscScalar    *x,*b;
903   PetscErrorCode ierr;
904 
905   PetscFunctionBegin;
906   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
907   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
908 
909   /* solve U^T * D * y = b by forward substitution */
910   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
911   ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
912 
913   /* solve U*x = y by back substitution */
914   ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
915 
916   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
917   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
918   ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr);
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering"
924 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
925 {
926   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
927   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
928   MatScalar      *aa=a->a;
929   PetscScalar    *x,*b;
930   PetscErrorCode ierr;
931 
932   PetscFunctionBegin;
933   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
934   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
935   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
936   ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
937   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
938   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
939   ierr = PetscLogFlops(25*(a->nz + mbs));CHKERRQ(ierr);
940   PetscFunctionReturn(0);
941 }
942 
943 #undef __FUNCT__
944 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering"
945 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
946 {
947   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
948   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
949   MatScalar      *aa=a->a;
950   PetscScalar    *x,*b;
951   PetscErrorCode ierr;
952 
953   PetscFunctionBegin;
954   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
955   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
956   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
957   ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
958   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
959   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
960   ierr = PetscLogFlops(25*a->nz);CHKERRQ(ierr);
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatSolve_SeqSBAIJ_4"
966 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
967 {
968   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
969   IS             isrow=a->row;
970   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
971   PetscErrorCode ierr;
972   const PetscInt *r;
973   PetscInt       nz,*vj,k,idx;
974   MatScalar      *aa=a->a,*v,*diag;
975   PetscScalar    *x,*b,x0,x1,x2,x3,*t,*tp;
976 
977   PetscFunctionBegin;
978   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
979   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
980   t  = a->solve_work;
981   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
982 
983   /* solve U^T * D * y = b by forward substitution */
984   tp = t;
985   for (k=0; k<mbs; k++) { /* t <- perm(b) */
986     idx   = 4*r[k];
987     tp[0] = b[idx];
988     tp[1] = b[idx+1];
989     tp[2] = b[idx+2];
990     tp[3] = b[idx+3];
991     tp += 4;
992   }
993 
994   for (k=0; k<mbs; k++){
995     v  = aa + 16*ai[k];
996     vj = aj + ai[k];
997     tp = t + k*4;
998     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
999     nz = ai[k+1] - ai[k];
1000 
1001     tp = t + (*vj)*4;
1002     while (nz--) {
1003       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
1004       tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
1005       tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
1006       tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
1007       vj++; tp = t + (*vj)*4;
1008       v += 16;
1009     }
1010 
1011     /* xk = inv(Dk)*(Dk*xk) */
1012     diag  = aa+k*16;          /* ptr to inv(Dk) */
1013     tp    = t + k*4;
1014     tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
1015     tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
1016     tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
1017     tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
1018   }
1019 
1020   /* solve U*x = y by back substitution */
1021   for (k=mbs-1; k>=0; k--){
1022     v  = aa + 16*ai[k];
1023     vj = aj + ai[k];
1024     tp    = t + k*4;
1025     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
1026     nz = ai[k+1] - ai[k];
1027 
1028     tp = t + (*vj)*4;
1029     while (nz--) {
1030       /* xk += U(k,:)*x(:) */
1031       x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
1032       x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
1033       x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
1034       x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
1035       vj++; tp = t + (*vj)*4;
1036       v += 16;
1037     }
1038     tp    = t + k*4;
1039     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
1040     idx        = 4*r[k];
1041     x[idx]     = x0;
1042     x[idx+1]   = x1;
1043     x[idx+2]   = x2;
1044     x[idx+3]   = x3;
1045   }
1046 
1047   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1048   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1049   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1050   ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr);
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNCT__
1055 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private"
1056 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1057 {
1058   MatScalar      *v,*diag;
1059   PetscScalar    *xp,x0,x1,x2,x3;
1060   PetscInt       nz,*vj,k;
1061 
1062   PetscFunctionBegin;
1063   for (k=0; k<mbs; k++){
1064     v  = aa + 16*ai[k];
1065     xp = x + k*4;
1066     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
1067     nz = ai[k+1] - ai[k];
1068     vj = aj + ai[k];
1069     xp = x + (*vj)*4;
1070     while (nz--) {
1071       /* x(:) += U(k,:)^T*(Dk*xk) */
1072       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
1073       xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
1074       xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
1075       xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
1076       vj++; xp = x + (*vj)*4;
1077       v += 16;
1078     }
1079     /* xk = inv(Dk)*(Dk*xk) */
1080     diag = aa+k*16;          /* ptr to inv(Dk) */
1081     xp   = x + k*4;
1082     xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
1083     xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
1084     xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
1085     xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private"
1092 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1093 {
1094   MatScalar      *v;
1095   PetscScalar    *xp,x0,x1,x2,x3;
1096   PetscInt       nz,*vj,k;
1097 
1098   PetscFunctionBegin;
1099   for (k=mbs-1; k>=0; k--){
1100     v  = aa + 16*ai[k];
1101     xp = x + k*4;
1102     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
1103     nz = ai[k+1] - ai[k];
1104     vj = aj + ai[k];
1105     xp = x + (*vj)*4;
1106     while (nz--) {
1107       /* xk += U(k,:)*x(:) */
1108       x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
1109       x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
1110       x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
1111       x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
1112       vj++;
1113       v += 16; xp = x + (*vj)*4;
1114     }
1115     xp = x + k*4;
1116     xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
1117   }
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering"
1123 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1124 {
1125   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1126   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1127   MatScalar      *aa=a->a;
1128   PetscScalar    *x,*b;
1129   PetscErrorCode ierr;
1130 
1131   PetscFunctionBegin;
1132   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1133   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1134 
1135   /* solve U^T * D * y = b by forward substitution */
1136   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
1137   ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1138 
1139   /* solve U*x = y by back substitution */
1140   ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1141   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1142   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1143   ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering"
1149 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1150 {
1151   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1152   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1153   MatScalar      *aa=a->a;
1154   PetscScalar    *x,*b;
1155   PetscErrorCode ierr;
1156 
1157   PetscFunctionBegin;
1158   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1159   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1160   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
1161   ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1162   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1163   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1164   ierr = PetscLogFlops(16*a->nz + mbs);CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 #undef __FUNCT__
1169 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering"
1170 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1171 {
1172   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1173   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1174   MatScalar      *aa=a->a;
1175   PetscScalar    *x,*b;
1176   PetscErrorCode ierr;
1177 
1178   PetscFunctionBegin;
1179   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1180   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1181   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1182   ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1183   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1184   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1185   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatSolve_SeqSBAIJ_3"
1191 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
1192 {
1193   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1194   IS             isrow=a->row;
1195   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1196   PetscErrorCode ierr;
1197   const PetscInt *r;
1198   PetscInt       nz,*vj,k,idx;
1199   MatScalar      *aa=a->a,*v,*diag;
1200   PetscScalar    *x,*b,x0,x1,x2,*t,*tp;
1201 
1202   PetscFunctionBegin;
1203   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1204   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1205   t  = a->solve_work;
1206   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1207 
1208   /* solve U^T * D * y = b by forward substitution */
1209   tp = t;
1210   for (k=0; k<mbs; k++) { /* t <- perm(b) */
1211     idx   = 3*r[k];
1212     tp[0] = b[idx];
1213     tp[1] = b[idx+1];
1214     tp[2] = b[idx+2];
1215     tp += 3;
1216   }
1217 
1218   for (k=0; k<mbs; k++){
1219     v  = aa + 9*ai[k];
1220     vj = aj + ai[k];
1221     tp = t + k*3;
1222     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
1223     nz = ai[k+1] - ai[k];
1224 
1225     tp = t + (*vj)*3;
1226     while (nz--) {
1227       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1228       tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1229       tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1230       vj++; tp = t + (*vj)*3;
1231       v += 9;
1232     }
1233 
1234     /* xk = inv(Dk)*(Dk*xk) */
1235     diag  = aa+k*9;          /* ptr to inv(Dk) */
1236     tp    = t + k*3;
1237     tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1238     tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1239     tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1240   }
1241 
1242   /* solve U*x = y by back substitution */
1243   for (k=mbs-1; k>=0; k--){
1244     v  = aa + 9*ai[k];
1245     vj = aj + ai[k];
1246     tp    = t + k*3;
1247     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];  /* xk */
1248     nz = ai[k+1] - ai[k];
1249 
1250     tp = t + (*vj)*3;
1251     while (nz--) {
1252       /* xk += U(k,:)*x(:) */
1253       x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
1254       x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
1255       x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
1256       vj++; tp = t + (*vj)*3;
1257       v += 9;
1258     }
1259     tp    = t + k*3;
1260     tp[0] = x0; tp[1] = x1; tp[2] = x2;
1261     idx      = 3*r[k];
1262     x[idx]   = x0;
1263     x[idx+1] = x1;
1264     x[idx+2] = x2;
1265   }
1266 
1267   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1268   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1269   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1270   ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr);
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 #undef __FUNCT__
1275 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private"
1276 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1277 {
1278   MatScalar      *v,*diag;
1279   PetscScalar    *xp,x0,x1,x2;
1280   PetscInt       nz,*vj,k;
1281 
1282   PetscFunctionBegin;
1283   for (k=0; k<mbs; k++){
1284     v  = aa + 9*ai[k];
1285     xp = x + k*3;
1286     x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
1287     nz = ai[k+1] - ai[k];
1288     vj = aj + ai[k];
1289     xp = x + (*vj)*3;
1290     while (nz--) {
1291       /* x(:) += U(k,:)^T*(Dk*xk) */
1292       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1293       xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1294       xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1295       vj++; xp = x + (*vj)*3;
1296       v += 9;
1297     }
1298     /* xk = inv(Dk)*(Dk*xk) */
1299     diag = aa+k*9;          /* ptr to inv(Dk) */
1300     xp   = x + k*3;
1301     xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1302     xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1303     xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1304   }
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private"
1310 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1311 {
1312   MatScalar      *v;
1313   PetscScalar    *xp,x0,x1,x2;
1314   PetscInt       nz,*vj,k;
1315 
1316   PetscFunctionBegin;
1317   for (k=mbs-1; k>=0; k--){
1318     v  = aa + 9*ai[k];
1319     xp = x + k*3;
1320     x0 = xp[0]; x1 = xp[1]; x2 = xp[2];  /* xk */
1321     nz = ai[k+1] - ai[k];
1322     vj = aj + ai[k];
1323     xp = x + (*vj)*3;
1324     while (nz--) {
1325       /* xk += U(k,:)*x(:) */
1326       x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
1327       x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
1328       x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
1329       vj++;
1330       v += 9; xp = x + (*vj)*3;
1331     }
1332     xp = x + k*3;
1333     xp[0] = x0; xp[1] = x1; xp[2] = x2;
1334   }
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering"
1340 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1341 {
1342   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1343   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1344   MatScalar      *aa=a->a;
1345   PetscScalar    *x,*b;
1346   PetscErrorCode ierr;
1347 
1348   PetscFunctionBegin;
1349   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1351 
1352   /* solve U^T * D * y = b by forward substitution */
1353   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1354   ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1355 
1356   /* solve U*x = y by back substitution */
1357   ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1358 
1359   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1360   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1361   ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr);
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering"
1367 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1368 {
1369   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1370   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1371   MatScalar      *aa=a->a;
1372   PetscScalar    *x,*b;
1373   PetscErrorCode ierr;
1374 
1375   PetscFunctionBegin;
1376   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1377   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1378   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1379   ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1380   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1381   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1382   ierr = PetscLogFlops(9*(a->nz + mbs));CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 #undef __FUNCT__
1387 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering"
1388 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1389 {
1390   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1391   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1392   MatScalar      *aa=a->a;
1393   PetscScalar    *x,*b;
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1398   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1399   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1400   ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1401   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1402   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1403   ierr = PetscLogFlops(9*a->nz);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatSolve_SeqSBAIJ_2"
1409 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
1410 {
1411   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ *)A->data;
1412   IS             isrow=a->row;
1413   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1414   PetscErrorCode ierr;
1415   const PetscInt *r;
1416   PetscInt       nz,*vj,k,k2,idx;
1417   MatScalar      *aa=a->a,*v,*diag;
1418   PetscScalar    *x,*b,x0,x1,*t;
1419 
1420   PetscFunctionBegin;
1421   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1422   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1423   t  = a->solve_work;
1424   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1425 
1426   /* solve U^T * D * y = perm(b) by forward substitution */
1427   for (k=0; k<mbs; k++) {  /* t <- perm(b) */
1428     idx = 2*r[k];
1429     t[k*2]   = b[idx];
1430     t[k*2+1] = b[idx+1];
1431   }
1432   for (k=0; k<mbs; k++){
1433     v  = aa + 4*ai[k];
1434     vj = aj + ai[k];
1435     k2 = k*2;
1436     x0 = t[k2]; x1 = t[k2+1];
1437     nz = ai[k+1] - ai[k];
1438     while (nz--) {
1439       t[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1440       t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1441       vj++; v += 4;
1442     }
1443     diag = aa+k*4;  /* ptr to inv(Dk) */
1444     t[k2]   = diag[0]*x0 + diag[2]*x1;
1445     t[k2+1] = diag[1]*x0 + diag[3]*x1;
1446   }
1447 
1448   /* solve U*x = y by back substitution */
1449   for (k=mbs-1; k>=0; k--){
1450     v  = aa + 4*ai[k];
1451     vj = aj + ai[k];
1452     k2 = k*2;
1453     x0 = t[k2]; x1 = t[k2+1];
1454     nz = ai[k+1] - ai[k];
1455     while (nz--) {
1456       x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1457       x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1458       vj++; v += 4;
1459     }
1460     t[k2]    = x0;
1461     t[k2+1]  = x1;
1462     idx      = 2*r[k];
1463     x[idx]   = x0;
1464     x[idx+1] = x1;
1465   }
1466 
1467   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1468   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1469   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1470   ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr);
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private"
1476 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1477 {
1478   MatScalar      *v,*diag;
1479   PetscScalar    x0,x1;
1480   PetscInt       nz,*vj,k,k2;
1481 
1482   PetscFunctionBegin;
1483   for (k=0; k<mbs; k++){
1484     v  = aa + 4*ai[k];
1485     vj = aj + ai[k];
1486     k2 = k*2;
1487     x0 = x[k2]; x1 = x[k2+1];  /* Dk*xk = k-th block of x */
1488     nz = ai[k+1] - ai[k];
1489 
1490     while (nz--) {
1491       /* x(:) += U(k,:)^T*(Dk*xk) */
1492       x[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1493       x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1494       vj++; v += 4;
1495     }
1496     /* xk = inv(Dk)*(Dk*xk) */
1497     diag = aa+k*4;          /* ptr to inv(Dk) */
1498     x[k2]   = diag[0]*x0 + diag[2]*x1;
1499     x[k2+1] = diag[1]*x0 + diag[3]*x1;
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private"
1506 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1507 {
1508   MatScalar      *v;
1509   PetscScalar    x0,x1;
1510   PetscInt       nz,*vj,k,k2;
1511 
1512   PetscFunctionBegin;
1513   for (k=mbs-1; k>=0; k--){
1514     v  = aa + 4*ai[k];
1515     vj = aj + ai[k];
1516     k2 = k*2;
1517     x0 = x[k2]; x1 = x[k2+1];  /* xk */
1518     nz = ai[k+1] - ai[k];
1519     while (nz--) {
1520       /* xk += U(k,:)*x(:) */
1521       x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1522       x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1523       vj++; v += 4;
1524     }
1525     x[k2]     = x0;
1526     x[k2+1]   = x1;
1527   }
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering"
1533 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1534 {
1535   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1536   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1537   MatScalar      *aa=a->a;
1538   PetscScalar    *x,*b;
1539   PetscErrorCode ierr;
1540 
1541   PetscFunctionBegin;
1542   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1543   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1544 
1545   /* solve U^T * D * y = b by forward substitution */
1546   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1547   ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1548 
1549   /* solve U*x = y by back substitution */
1550   ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1551 
1552   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1553   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1554   ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering"
1560 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1561 {
1562   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1563   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1564   MatScalar      *aa=a->a;
1565   PetscScalar    *x,*b;
1566   PetscErrorCode ierr;
1567 
1568   PetscFunctionBegin;
1569   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1570   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1571   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1572   ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1573   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1574   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1575   ierr = PetscLogFlops(4*(a->nz + mbs));CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering"
1581 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1582 {
1583   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1584   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1585   MatScalar      *aa=a->a;
1586   PetscScalar    *x,*b;
1587   PetscErrorCode ierr;
1588 
1589   PetscFunctionBegin;
1590   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1591   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1592   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1593   ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1594   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1595   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1596   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatSolve_SeqSBAIJ_1"
1602 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1603 {
1604   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ *)A->data;
1605   IS              isrow=a->row;
1606   PetscErrorCode  ierr;
1607   const PetscInt  mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj;
1608   const MatScalar *aa=a->a,*v;
1609   PetscScalar     *x,*b,xk,*t;
1610   PetscInt        nz,k;
1611 
1612   PetscFunctionBegin;
1613   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1614   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1615   t    = a->solve_work;
1616   ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1617 
1618   /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */
1619   for (k=0; k<mbs; k++) t[k] = b[rp[k]];
1620   for (k=0; k<mbs; k++){
1621     v  = aa + ai[k] + 1;
1622     vj = aj + ai[k] + 1;
1623     xk = t[k];
1624     nz = ai[k+1] - ai[k] - 1;
1625     while (nz--) t[*vj++] += (*v++) * xk;
1626     t[k] = xk*aa[ai[k]];  /* aa[k] = 1/D(k) */
1627   }
1628 
1629   /* solve U*perm(x) = y by back substitution */
1630   for (k=mbs-1; k>=0; k--){
1631     v  = aa + ai[k] + 1;
1632     vj = aj + ai[k] + 1;
1633     nz = ai[k+1] - ai[k] - 1;
1634     while (nz--) t[k] += (*v++) * t[*vj++];
1635     x[rp[k]] = t[k];
1636   }
1637 
1638   ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1639   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1640   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1641   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 #undef __FUNCT__
1646 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1"
1647 PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1648 {
1649   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ *)A->data;
1650   IS              isrow=a->row;
1651   PetscErrorCode  ierr;
1652   const PetscInt  mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj;
1653   const MatScalar *aa=a->a,*v;
1654   PetscReal       diagk;
1655   PetscScalar     *x,*b,xk;
1656   PetscInt        nz,k;
1657 
1658   PetscFunctionBegin;
1659   /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */
1660   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1661   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1662   ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1663 
1664   for (k=0; k<mbs; k++) x[k] = b[rp[k]];
1665   for (k=0; k<mbs; k++){
1666     v  = aa + ai[k] + 1;
1667     vj = aj + ai[k] + 1;
1668     xk = x[k];
1669     nz = ai[k+1] - ai[k] - 1;
1670     while (nz--) x[*vj++] += (*v++) * xk;
1671 
1672     diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1673     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1674     x[k] = xk*sqrt(diagk);
1675   }
1676   ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1677   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1678   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1679   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1"
1685 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1686 {
1687   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ *)A->data;
1688   IS              isrow=a->row;
1689   PetscErrorCode  ierr;
1690   const PetscInt  mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj;
1691   const MatScalar *aa=a->a,*v;
1692   PetscReal       diagk;
1693   PetscScalar     *x,*b,*t;
1694   PetscInt        nz,k;
1695 
1696   PetscFunctionBegin;
1697   /* solve D^(1/2)*U*perm(x) = b by back substitution */
1698   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1699   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1700   t    = a->solve_work;
1701   ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1702 
1703   for (k=mbs-1; k>=0; k--){
1704     v  = aa + ai[k] + 1;
1705     vj = aj + ai[k] + 1;
1706     diagk = PetscRealPart(aa[ai[k]]);
1707     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1708     t[k] = b[k] * sqrt(diagk);
1709     nz = ai[k+1] - ai[k] - 1;
1710     while (nz--) t[k] += (*v++) * t[*vj++];
1711     x[rp[k]] = t[k];
1712   }
1713   ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1714   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1715   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1716   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatSolves_SeqSBAIJ_1"
1722 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1723 {
1724   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1725   PetscErrorCode ierr;
1726 
1727   PetscFunctionBegin;
1728   if (A->rmap->bs == 1) {
1729     ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr);
1730   } else {
1731     IS              isrow=a->row;
1732     const PetscInt  *vj,mbs=a->mbs,*ai=a->i,*aj=a->j,*rp;
1733     const MatScalar *aa=a->a,*v;
1734     PetscScalar     *x,*b,*t;
1735     PetscInt        nz,k,n,i;
1736     if (bb->n > a->solves_work_n) {
1737       ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
1738       ierr = PetscMalloc(bb->n*A->rmap->N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr);
1739       a->solves_work_n = bb->n;
1740     }
1741     n    = bb->n;
1742     ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr);
1743     ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr);
1744     t    = a->solves_work;
1745 
1746     ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1747 
1748     /* solve U^T*D*y = perm(b) by forward substitution */
1749     for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs];} /* values are stored interlaced in t */
1750     for (k=0; k<mbs; k++){
1751       v  = aa + ai[k];
1752       vj = aj + ai[k];
1753       nz = ai[k+1] - ai[k];
1754       while (nz--) {
1755         for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1756         v++;vj++;
1757       }
1758       for (i=0; i<n; i++) t[n*k+i] *= aa[k];  /* note: aa[k] = 1/D(k) */
1759     }
1760 
1761     /* solve U*perm(x) = y by back substitution */
1762     for (k=mbs-1; k>=0; k--){
1763       v  = aa + ai[k];
1764       vj = aj + ai[k];
1765       nz = ai[k+1] - ai[k];
1766       while (nz--) {
1767         for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1768         v++;vj++;
1769       }
1770       for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i];
1771     }
1772 
1773     ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1774     ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr);
1775     ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr);
1776     ierr = PetscLogFlops(bb->n*(4*a->nz + A->rmap->N));CHKERRQ(ierr);
1777   }
1778   PetscFunctionReturn(0);
1779 }
1780 
1781 /*
1782       Special case where the matrix was ILU(0) factored in the natural
1783    ordering. This eliminates the need for the column and row permutation.
1784 */
1785 #undef __FUNCT__
1786 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering"
1787 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1788 {
1789   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1790   PetscErrorCode ierr;
1791   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1792   MatScalar      *aa=a->a,*v;
1793   PetscScalar    *x,*b,xk;
1794   PetscInt       nz,*vj,k;
1795 
1796   PetscFunctionBegin;
1797   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1798   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1799 
1800   /* solve U^T*D*y = b by forward substitution */
1801   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1802   for (k=0; k<mbs; k++){
1803     v  = aa + ai[k] + 1;
1804     vj = aj + ai[k] + 1;
1805     xk = x[k];
1806     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1807     while (nz--) x[*vj++] += (*v++) * xk;
1808     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1809   }
1810 
1811   /* solve U*x = y by back substitution */
1812   for (k=mbs-2; k>=0; k--){
1813     v  = aa + ai[k] + 1;
1814     vj = aj + ai[k] + 1;
1815     xk = x[k];
1816     nz = ai[k+1] - ai[k] - 1;
1817     while (nz--) xk += (*v++) * x[*vj++];
1818     x[k] = xk;
1819   }
1820 
1821   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1822   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1823   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering"
1829 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1830 {
1831   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1832   PetscErrorCode ierr;
1833   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1834   MatScalar      *aa=a->a,*v;
1835   PetscReal      diagk;
1836   PetscScalar    *x,*b;
1837   PetscInt       nz,*vj,k;
1838 
1839   PetscFunctionBegin;
1840   /* solve U^T*D^(1/2)*x = b by forward substitution */
1841   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1842   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1843   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1844   for (k=0; k<mbs; k++){
1845     v  = aa + ai[k] + 1;
1846     vj = aj + ai[k] + 1;
1847     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1848     while (nz--) x[*vj++] += (*v++) * x[k];
1849     diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1850     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ2(PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[ai[k]]),PetscImaginaryPart(aa[ai[k]]));
1851     x[k] *= sqrt(diagk);
1852   }
1853   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1854   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1855   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering"
1861 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1862 {
1863   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1864   PetscErrorCode ierr;
1865   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1866   MatScalar      *aa=a->a,*v;
1867   PetscReal      diagk;
1868   PetscScalar    *x,*b;
1869   PetscInt       nz,*vj,k;
1870 
1871   PetscFunctionBegin;
1872   /* solve D^(1/2)*U*x = b by back substitution */
1873   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1874   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1875 
1876   for (k=mbs-1; k>=0; k--){
1877     v  = aa + ai[k] + 1;
1878     vj = aj + ai[k] + 1;
1879     diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1880     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1881     x[k] = sqrt(diagk)*b[k];
1882     nz = ai[k+1] - ai[k] - 1;
1883     while (nz--) x[k] += (*v++) * x[*vj++];
1884   }
1885   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1886   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1887   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 extern PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat,PetscTruth);
1892 
1893 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR"
1896 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat B,Mat A,IS perm,const MatFactorInfo *info)
1897 {
1898   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
1899   PetscErrorCode ierr;
1900   const PetscInt *rip,mbs = a->mbs,*ai = a->i,*aj = a->j;
1901   PetscInt       *jutmp,bs = A->rmap->bs,bs2=a->bs2,i;
1902   PetscInt       m,reallocs = 0,*levtmp;
1903   PetscInt       *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
1904   PetscInt       incrlev,*lev,shift,prow,nz;
1905   PetscReal      f = info->fill,levels = info->levels;
1906   PetscTruth     perm_identity;
1907 
1908   PetscFunctionBegin;
1909   /* check whether perm is the identity mapping */
1910   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1911 
1912   if (perm_identity){
1913     a->permute = PETSC_FALSE;
1914     ai = a->i; aj = a->j;
1915   } else { /*  non-trivial permutation */
1916     a->permute = PETSC_TRUE;
1917     ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
1918     ai = a->inew; aj = a->jnew;
1919   }
1920 
1921   /* initialization */
1922   ierr  = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1923   umax  = (PetscInt)(f*ai[mbs] + 1);
1924   ierr  = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr);
1925   umax += mbs + 1;
1926   shift = mbs + 1;
1927   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr);
1928   ierr  = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr);
1929   iu[0] = mbs + 1;
1930   juidx = mbs + 1;
1931   /* prowl: linked list for pivot row */
1932   ierr    = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr);
1933   /* q: linked list for col index */
1934   q       = prowl + mbs;
1935   levtmp  = q     + mbs;
1936 
1937   for (i=0; i<mbs; i++){
1938     prowl[i] = mbs;
1939     q[i] = 0;
1940   }
1941 
1942   /* for each row k */
1943   for (k=0; k<mbs; k++){
1944     nzk  = 0;
1945     q[k] = mbs;
1946     /* copy current row into linked list */
1947     nz = ai[rip[k]+1] - ai[rip[k]];
1948     j = ai[rip[k]];
1949     while (nz--){
1950       vj = rip[aj[j++]];
1951       if (vj > k){
1952         qm = k;
1953         do {
1954           m  = qm; qm = q[m];
1955         } while(qm < vj);
1956         if (qm == vj) {
1957           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
1958         }
1959         nzk++;
1960         q[m]       = vj;
1961         q[vj]      = qm;
1962         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1963       }
1964     }
1965 
1966     /* modify nonzero structure of k-th row by computing fill-in
1967        for each row prow to be merged in */
1968     prow = k;
1969     prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
1970 
1971     while (prow < k){
1972       /* merge row prow into k-th row */
1973       jmin = iu[prow] + 1;
1974       jmax = iu[prow+1];
1975       qm = k;
1976       for (j=jmin; j<jmax; j++){
1977         incrlev = lev[j-shift] + 1;
1978 	if (incrlev > levels) continue;
1979         vj      = ju[j];
1980         do {
1981           m = qm; qm = q[m];
1982         } while (qm < vj);
1983         if (qm != vj){      /* a fill */
1984           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1985           levtmp[vj] = incrlev;
1986         } else {
1987           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1988         }
1989       }
1990       prow = prowl[prow]; /* next pivot row */
1991     }
1992 
1993     /* add k to row list for first nonzero element in k-th row */
1994     if (nzk > 1){
1995       i = q[k]; /* col value of first nonzero element in k_th row of U */
1996       prowl[k] = prowl[i]; prowl[i] = k;
1997     }
1998     iu[k+1] = iu[k] + nzk;
1999 
2000     /* allocate more space to ju and lev if needed */
2001     if (iu[k+1] > umax) {
2002       /* estimate how much additional space we will need */
2003       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
2004       /* just double the memory each time */
2005       maxadd = umax;
2006       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
2007       umax += maxadd;
2008 
2009       /* allocate a longer ju */
2010       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
2011       ierr     = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr);
2012       ierr     = PetscFree(ju);CHKERRQ(ierr);
2013       ju       = jutmp;
2014 
2015       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
2016       ierr     = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr);
2017       ierr     = PetscFree(lev);CHKERRQ(ierr);
2018       lev      = jutmp;
2019       reallocs += 2; /* count how many times we realloc */
2020     }
2021 
2022     /* save nonzero structure of k-th row in ju */
2023     i=k;
2024     while (nzk --) {
2025       i                = q[i];
2026       ju[juidx]        = i;
2027       lev[juidx-shift] = levtmp[i];
2028       juidx++;
2029     }
2030   }
2031 
2032 #if defined(PETSC_USE_INFO)
2033   if (ai[mbs] != 0) {
2034     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
2035     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2036     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2037     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
2038     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2039   } else {
2040     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2041   }
2042 #endif
2043 
2044   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2045   ierr = PetscFree(prowl);CHKERRQ(ierr);
2046   ierr = PetscFree(lev);CHKERRQ(ierr);
2047 
2048   /* put together the new matrix */
2049   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,PETSC_NULL);CHKERRQ(ierr);
2050 
2051   /* ierr = PetscLogObjectParent(B,iperm);CHKERRQ(ierr); */
2052   b    = (Mat_SeqSBAIJ*)(B)->data;
2053   ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr);
2054   b->singlemalloc = PETSC_FALSE;
2055   b->free_a       = PETSC_TRUE;
2056   b->free_ij      = PETSC_TRUE;
2057   /* the next line frees the default space generated by the Create() */
2058   ierr    = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr);
2059   ierr    = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
2060   b->j    = ju;
2061   b->i    = iu;
2062   b->diag = 0;
2063   b->ilen = 0;
2064   b->imax = 0;
2065 
2066   if (b->row) {
2067     ierr = ISDestroy(b->row);CHKERRQ(ierr);
2068   }
2069   if (b->icol) {
2070     ierr = ISDestroy(b->icol);CHKERRQ(ierr);
2071   }
2072   b->row  = perm;
2073   b->icol = perm;
2074   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2075   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2076   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2077   /* In b structure:  Free imax, ilen, old a, old j.
2078      Allocate idnew, solve_work, new a, new j */
2079   ierr    = PetscLogObjectMemory(B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2080   b->maxnz = b->nz = iu[mbs];
2081 
2082   (B)->info.factor_mallocs    = reallocs;
2083   (B)->info.fill_ratio_given  = f;
2084   if (ai[mbs] != 0) {
2085     (B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
2086   } else {
2087     (B)->info.fill_ratio_needed = 0.0;
2088   }
2089   ierr = MatSeqSBAIJSetNumericFactorization(B,perm_identity);CHKERRQ(ierr);
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 #include "petscbt.h"
2094 #include "../src/mat/utils/freespace.h"
2095 #undef __FUNCT__
2096 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ"
2097 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2098 {
2099   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
2100   Mat_SeqSBAIJ       *b;
2101   PetscErrorCode     ierr;
2102   PetscTruth         perm_identity,free_ij = PETSC_TRUE,missing;
2103   PetscInt           bs=A->rmap->bs,am=a->mbs,d;
2104   const PetscInt     *cols,*rip,*ai,*aj;
2105   PetscInt           reallocs=0,i,*ui;
2106   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2107   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
2108   PetscReal          fill=info->fill,levels=info->levels,ratio_needed;
2109   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2110   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2111   PetscBT            lnkbt;
2112 
2113   PetscFunctionBegin;
2114   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2115   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2116 
2117   /*
2118    This code originally uses Modified Sparse Row (MSR) storage
2119    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
2120    Then it is rewritten so the factor B takes seqsbaij format. However the associated
2121    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1,
2122    thus the original code in MSR format is still used for these cases.
2123    The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever
2124    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
2125   */
2126   if (bs > 1){
2127     ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);CHKERRQ(ierr);
2128     PetscFunctionReturn(0);
2129   }
2130 
2131   /* check whether perm is the identity mapping */
2132   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2133 
2134   /* special case that simply copies fill pattern */
2135   if (!levels && perm_identity) {
2136     a->permute = PETSC_FALSE;
2137     /* reuse the column pointers and row offsets for memory savings */
2138     ui           = a->i;
2139     uj           = a->j;
2140     free_ij      = PETSC_FALSE;
2141     ratio_needed = 1.0;
2142   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2143     if (perm_identity){
2144       a->permute = PETSC_FALSE;
2145       ai = a->i; aj = a->j;
2146     } else { /*  non-trivial permutation */
2147       a->permute = PETSC_TRUE;
2148       ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
2149       ai   = a->inew; aj = a->jnew;
2150     }
2151     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2152 
2153     /* initialization */
2154     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2155     ui[0] = 0;
2156 
2157     /* jl: linked list for storing indices of the pivot rows
2158        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2159     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
2160     il         = jl + am;
2161     uj_ptr     = (PetscInt**)(il + am);
2162     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
2163     for (i=0; i<am; i++){
2164       jl[i] = am; il[i] = 0;
2165     }
2166 
2167     /* create and initialize a linked list for storing column indices of the active row k */
2168     nlnk = am + 1;
2169     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2170 
2171     /* initial FreeSpace size is fill*(ai[am]+1) */
2172     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2173     current_space = free_space;
2174     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2175     current_space_lvl = free_space_lvl;
2176 
2177     for (k=0; k<am; k++){  /* for each active row k */
2178       /* initialize lnk by the column indices of row rip[k] */
2179       nzk   = 0;
2180       ncols = ai[rip[k]+1] - ai[rip[k]];
2181       cols  = aj+ai[rip[k]];
2182       ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2183       nzk += nlnk;
2184 
2185       /* update lnk by computing fill-in for each pivot row to be merged in */
2186       prow = jl[k]; /* 1st pivot row */
2187 
2188       while (prow < k){
2189         nextprow = jl[prow];
2190 
2191         /* merge prow into k-th row */
2192         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2193         jmax = ui[prow+1];
2194         ncols = jmax-jmin;
2195         i     = jmin - ui[prow];
2196         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2197         j     = *(uj_lvl_ptr[prow] + i - 1);
2198         cols_lvl = uj_lvl_ptr[prow]+i;
2199         ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2200         nzk += nlnk;
2201 
2202         /* update il and jl for prow */
2203         if (jmin < jmax){
2204           il[prow] = jmin;
2205           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2206         }
2207         prow = nextprow;
2208       }
2209 
2210       /* if free space is not available, make more free space */
2211       if (current_space->local_remaining<nzk) {
2212         i = am - k + 1; /* num of unfactored rows */
2213         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2214         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2215         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2216         reallocs++;
2217       }
2218 
2219       /* copy data into free_space and free_space_lvl, then initialize lnk */
2220       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2221 
2222       /* add the k-th row into il and jl */
2223       if (nzk-1 > 0){
2224         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2225         jl[k] = jl[i]; jl[i] = k;
2226         il[k] = ui[k] + 1;
2227       }
2228       uj_ptr[k]     = current_space->array;
2229       uj_lvl_ptr[k] = current_space_lvl->array;
2230 
2231       current_space->array           += nzk;
2232       current_space->local_used      += nzk;
2233       current_space->local_remaining -= nzk;
2234       current_space_lvl->array           += nzk;
2235       current_space_lvl->local_used      += nzk;
2236       current_space_lvl->local_remaining -= nzk;
2237 
2238       ui[k+1] = ui[k] + nzk;
2239     }
2240 
2241 #if defined(PETSC_USE_INFO)
2242     if (ai[am] != 0) {
2243       PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2244       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2245       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2246       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2247     } else {
2248       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2249     }
2250 #endif
2251 
2252     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2253     ierr = PetscFree(jl);CHKERRQ(ierr);
2254 
2255     /* destroy list of free space and other temporary array(s) */
2256     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2257     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2258     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2259     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2260     if (ai[am] != 0) {
2261       ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2262     } else {
2263       ratio_needed = 0.0;
2264     }
2265   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2266 
2267   /* put together the new matrix in MATSEQSBAIJ format */
2268   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2269 
2270   b = (Mat_SeqSBAIJ*)(fact)->data;
2271   ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr);
2272   b->singlemalloc = PETSC_FALSE;
2273   b->free_a  = PETSC_TRUE;
2274   b->free_ij = free_ij;
2275   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2276   b->j    = uj;
2277   b->i    = ui;
2278   b->diag    = 0;
2279   b->ilen    = 0;
2280   b->imax    = 0;
2281   b->row     = perm;
2282   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2283   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2284   b->icol = perm;
2285   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2286   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2287   b->maxnz = b->nz = ui[am];
2288 
2289   (fact)->info.factor_mallocs    = reallocs;
2290   (fact)->info.fill_ratio_given  = fill;
2291   (fact)->info.fill_ratio_needed = ratio_needed;
2292   ierr = MatSeqSBAIJSetNumericFactorization(fact,perm_identity);CHKERRQ(ierr);
2293   PetscFunctionReturn(0);
2294 }
2295 
2296