xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact2.c (revision 09f3b4e5628a00a1eaf17d80982cfbcc515cc9c1)
1 #define PETSCMAT_DLL
2 
3 /*
4     Factorization code for SBAIJ format.
5 */
6 
7 #include "src/mat/impls/sbaij/seq/sbaij.h"
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/inline/ilu.h"
10 #include "src/inline/dot.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSolve_SeqSBAIJ_N"
14 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
15 {
16   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
17   IS              isrow=a->row;
18   PetscInt        mbs=a->mbs,*ai=a->i,*aj=a->j;
19   PetscErrorCode  ierr;
20   PetscInt        nz,*vj,k,*r,idx,k1;
21   PetscInt        bs=A->bs,bs2 = a->bs2;
22   MatScalar       *aa=a->a,*v,*diag;
23   PetscScalar     *x,*xk,*xj,*b,*xk_tmp,*t;
24 
25   PetscFunctionBegin;
26   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
27   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
28   t  = a->solve_work;
29   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
30   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
31 
32   /* solve U^T * D * y = b by forward substitution */
33   xk = t;
34   for (k=0; k<mbs; k++) { /* t <- perm(b) */
35     idx   = bs*r[k];
36     for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
37   }
38   for (k=0; k<mbs; k++){
39     v  = aa + bs2*ai[k];
40     xk = t + k*bs;      /* Dk*xk = k-th block of x */
41     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
42     nz = ai[k+1] - ai[k];
43     vj = aj + ai[k];
44     xj = t + (*vj)*bs;  /* *vj-th block of x, *vj>k */
45     while (nz--) {
46       /* x(:) += U(k,:)^T*(Dk*xk) */
47       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
48       vj++; xj = t + (*vj)*bs;
49       v += bs2;
50     }
51     /* xk = inv(Dk)*(Dk*xk) */
52     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
53     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
54   }
55 
56   /* solve U*x = y by back substitution */
57   for (k=mbs-1; k>=0; k--){
58     v  = aa + bs2*ai[k];
59     xk = t + k*bs;        /* xk */
60     nz = ai[k+1] - ai[k];
61     vj = aj + ai[k];
62     xj = t + (*vj)*bs;
63     while (nz--) {
64       /* xk += U(k,:)*x(:) */
65       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
66       vj++;
67       v += bs2; xj = t + (*vj)*bs;
68     }
69     idx   = bs*r[k];
70     for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
71   }
72 
73   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
74   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
75   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
76   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
77   ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering"
83 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
84 {
85   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
86   PetscErrorCode ierr;
87   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
88   PetscInt       nz,*vj,k;
89   PetscInt       bs=A->bs,bs2 = a->bs2;
90   MatScalar      *aa=a->a,*v,*diag;
91   PetscScalar    *x,*xk,*xj,*b,*xk_tmp;
92 
93   PetscFunctionBegin;
94 
95   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
96   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
97 
98   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
99 
100   /* solve U^T * D * y = b by forward substitution */
101   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
102   for (k=0; k<mbs; k++){
103     v  = aa + bs2*ai[k];
104     xk = x + k*bs;      /* Dk*xk = k-th block of x */
105     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
106     nz = ai[k+1] - ai[k];
107     vj = aj + ai[k];
108     xj = x + (*vj)*bs;  /* *vj-th block of x, *vj>k */
109     while (nz--) {
110       /* x(:) += U(k,:)^T*(Dk*xk) */
111       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
112       vj++; xj = x + (*vj)*bs;
113       v += bs2;
114     }
115     /* xk = inv(Dk)*(Dk*xk) */
116     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
117     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
118   }
119 
120   /* solve U*x = y by back substitution */
121   for (k=mbs-1; k>=0; k--){
122     v  = aa + bs2*ai[k];
123     xk = x + k*bs;        /* xk */
124     nz = ai[k+1] - ai[k];
125     vj = aj + ai[k];
126     xj = x + (*vj)*bs;
127     while (nz--) {
128       /* xk += U(k,:)*x(:) */
129       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
130       vj++;
131       v += bs2; xj = x + (*vj)*bs;
132     }
133   }
134 
135   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
136   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
137   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
138   ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatSolve_SeqSBAIJ_7"
144 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
145 {
146   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
147   IS             isrow=a->row;
148   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
149   PetscErrorCode ierr;
150   PetscInt       nz,*vj,k,*r,idx;
151   MatScalar      *aa=a->a,*v,*d;
152   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
153 
154   PetscFunctionBegin;
155   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
156   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
157   t  = a->solve_work;
158   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
159 
160   /* solve U^T * D * y = b by forward substitution */
161   tp = t;
162   for (k=0; k<mbs; k++) { /* t <- perm(b) */
163     idx   = 7*r[k];
164     tp[0] = b[idx];
165     tp[1] = b[idx+1];
166     tp[2] = b[idx+2];
167     tp[3] = b[idx+3];
168     tp[4] = b[idx+4];
169     tp[5] = b[idx+5];
170     tp[6] = b[idx+6];
171     tp += 7;
172   }
173 
174   for (k=0; k<mbs; k++){
175     v  = aa + 49*ai[k];
176     vj = aj + ai[k];
177     tp = t + k*7;
178     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
179     nz = ai[k+1] - ai[k];
180     tp = t + (*vj)*7;
181     while (nz--) {
182       tp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
183       tp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
184       tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
185       tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
186       tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
187       tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
188       tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
189       vj++; tp = t + (*vj)*7;
190       v += 49;
191     }
192 
193     /* xk = inv(Dk)*(Dk*xk) */
194     d  = aa+k*49;          /* ptr to inv(Dk) */
195     tp    = t + k*7;
196     tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
197     tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
198     tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
199     tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
200     tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
201     tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
202     tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
203   }
204 
205   /* solve U*x = y by back substitution */
206   for (k=mbs-1; k>=0; k--){
207     v  = aa + 49*ai[k];
208     vj = aj + ai[k];
209     tp    = t + k*7;
210     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  x6=tp[6]; /* xk */
211     nz = ai[k+1] - ai[k];
212 
213     tp = t + (*vj)*7;
214     while (nz--) {
215       /* xk += U(k,:)*x(:) */
216       x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
217       x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
218       x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
219       x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
220       x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
221       x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
222       x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
223       vj++; tp = t + (*vj)*7;
224       v += 49;
225     }
226     tp    = t + k*7;
227     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
228     idx   = 7*r[k];
229     x[idx]     = x0;
230     x[idx+1]   = x1;
231     x[idx+2]   = x2;
232     x[idx+3]   = x3;
233     x[idx+4]   = x4;
234     x[idx+5]   = x5;
235     x[idx+6]   = x6;
236   }
237 
238   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
239   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
240   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
241   ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr);
242   PetscFunctionReturn(0);
243 }
244 
245 #undef __FUNCT__
246 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering"
247 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
248 {
249   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
250   PetscErrorCode ierr;
251   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
252   MatScalar      *aa=a->a,*v,*d;
253   PetscScalar    *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6;
254   PetscInt       nz,*vj,k;
255 
256   PetscFunctionBegin;
257   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
258   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
259 
260   /* solve U^T * D * y = b by forward substitution */
261   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
262   for (k=0; k<mbs; k++){
263     v  = aa + 49*ai[k];
264     xp = x + k*7;
265     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
266     nz = ai[k+1] - ai[k];
267     vj = aj + ai[k];
268     xp = x + (*vj)*7;
269     while (nz--) {
270       /* x(:) += U(k,:)^T*(Dk*xk) */
271       xp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
272       xp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
273       xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
274       xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
275       xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
276       xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
277       xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
278       vj++; xp = x + (*vj)*7;
279       v += 49;
280     }
281     /* xk = inv(Dk)*(Dk*xk) */
282     d  = aa+k*49;          /* ptr to inv(Dk) */
283     xp = x + k*7;
284     xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
285     xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
286     xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
287     xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
288     xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
289     xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
290     xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
291   }
292 
293   /* solve U*x = y by back substitution */
294   for (k=mbs-1; k>=0; k--){
295     v  = aa + 49*ai[k];
296     xp = x + k*7;
297     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
298     nz = ai[k+1] - ai[k];
299     vj = aj + ai[k];
300     xp = x + (*vj)*7;
301     while (nz--) {
302       /* xk += U(k,:)*x(:) */
303       x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
304       x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
305       x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
306       x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
307       x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
308       x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
309       x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
310       vj++;
311       v += 49; xp = x + (*vj)*7;
312     }
313     xp = x + k*7;
314     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
315   }
316 
317   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
318   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
319   ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 #undef __FUNCT__
324 #define __FUNCT__ "MatSolve_SeqSBAIJ_6"
325 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
326 {
327   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
328   IS             isrow=a->row;
329   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
330   PetscErrorCode ierr;
331   PetscInt       nz,*vj,k,*r,idx;
332   MatScalar      *aa=a->a,*v,*d;
333   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;
334 
335   PetscFunctionBegin;
336   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
337   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
338   t  = a->solve_work;
339   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
340 
341   /* solve U^T * D * y = b by forward substitution */
342   tp = t;
343   for (k=0; k<mbs; k++) { /* t <- perm(b) */
344     idx   = 6*r[k];
345     tp[0] = b[idx];
346     tp[1] = b[idx+1];
347     tp[2] = b[idx+2];
348     tp[3] = b[idx+3];
349     tp[4] = b[idx+4];
350     tp[5] = b[idx+5];
351     tp += 6;
352   }
353 
354   for (k=0; k<mbs; k++){
355     v  = aa + 36*ai[k];
356     vj = aj + ai[k];
357     tp = t + k*6;
358     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
359     nz = ai[k+1] - ai[k];
360     tp = t + (*vj)*6;
361     while (nz--) {
362       tp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
363       tp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
364       tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
365       tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
366       tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
367       tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
368       vj++; tp = t + (*vj)*6;
369       v += 36;
370     }
371 
372     /* xk = inv(Dk)*(Dk*xk) */
373     d  = aa+k*36;          /* ptr to inv(Dk) */
374     tp    = t + k*6;
375     tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
376     tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
377     tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
378     tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
379     tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
380     tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
381   }
382 
383   /* solve U*x = y by back substitution */
384   for (k=mbs-1; k>=0; k--){
385     v  = aa + 36*ai[k];
386     vj = aj + ai[k];
387     tp    = t + k*6;
388     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  /* xk */
389     nz = ai[k+1] - ai[k];
390 
391     tp = t + (*vj)*6;
392     while (nz--) {
393       /* xk += U(k,:)*x(:) */
394       x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
395       x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
396       x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
397       x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
398       x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
399       x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
400       vj++; tp = t + (*vj)*6;
401       v += 36;
402     }
403     tp    = t + k*6;
404     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
405     idx   = 6*r[k];
406     x[idx]     = x0;
407     x[idx+1]   = x1;
408     x[idx+2]   = x2;
409     x[idx+3]   = x3;
410     x[idx+4]   = x4;
411     x[idx+5]   = x5;
412   }
413 
414   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
415   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
416   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
417   ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering"
423 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
424 {
425   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
426   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
427   MatScalar      *aa=a->a,*v,*d;
428   PetscScalar    *x,*xp,*b,x0,x1,x2,x3,x4,x5;
429   PetscErrorCode ierr;
430   PetscInt       nz,*vj,k;
431 
432   PetscFunctionBegin;
433 
434   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
435   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
436 
437   /* solve U^T * D * y = b by forward substitution */
438   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
439   for (k=0; k<mbs; k++){
440     v  = aa + 36*ai[k];
441     xp = x + k*6;
442     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
443     nz = ai[k+1] - ai[k];
444     vj = aj + ai[k];
445     xp = x + (*vj)*6;
446     while (nz--) {
447       /* x(:) += U(k,:)^T*(Dk*xk) */
448       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
449       xp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
450       xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
451       xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
452       xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
453       xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
454       vj++; xp = x + (*vj)*6;
455       v += 36;
456     }
457     /* xk = inv(Dk)*(Dk*xk) */
458     d  = aa+k*36;          /* ptr to inv(Dk) */
459     xp = x + k*6;
460     xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
461     xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
462     xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
463     xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
464     xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
465     xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
466   }
467 
468   /* solve U*x = y by back substitution */
469   for (k=mbs-1; k>=0; k--){
470     v  = aa + 36*ai[k];
471     xp = x + k*6;
472     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
473     nz = ai[k+1] - ai[k];
474     vj = aj + ai[k];
475     xp = x + (*vj)*6;
476     while (nz--) {
477       /* xk += U(k,:)*x(:) */
478       x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
479       x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
480       x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
481       x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
482       x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
483       x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
484       vj++;
485       v += 36; xp = x + (*vj)*6;
486     }
487     xp = x + k*6;
488     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
489   }
490 
491   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
492   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
493   ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "MatSolve_SeqSBAIJ_5"
499 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
500 {
501   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
502   IS             isrow=a->row;
503   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
504   PetscErrorCode ierr;
505   PetscInt       nz,*vj,k,*r,idx;
506   MatScalar      *aa=a->a,*v,*diag;
507   PetscScalar    *x,*b,x0,x1,x2,x3,x4,*t,*tp;
508 
509   PetscFunctionBegin;
510   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
511   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
512   t  = a->solve_work;
513   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
514 
515   /* solve U^T * D * y = b by forward substitution */
516   tp = t;
517   for (k=0; k<mbs; k++) { /* t <- perm(b) */
518     idx   = 5*r[k];
519     tp[0] = b[idx];
520     tp[1] = b[idx+1];
521     tp[2] = b[idx+2];
522     tp[3] = b[idx+3];
523     tp[4] = b[idx+4];
524     tp += 5;
525   }
526 
527   for (k=0; k<mbs; k++){
528     v  = aa + 25*ai[k];
529     vj = aj + ai[k];
530     tp = t + k*5;
531     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
532     nz = ai[k+1] - ai[k];
533 
534     tp = t + (*vj)*5;
535     while (nz--) {
536       tp[0] +=  v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
537       tp[1] +=  v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
538       tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
539       tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
540       tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
541       vj++; tp = t + (*vj)*5;
542       v += 25;
543     }
544 
545     /* xk = inv(Dk)*(Dk*xk) */
546     diag  = aa+k*25;          /* ptr to inv(Dk) */
547     tp    = t + k*5;
548       tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
549       tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
550       tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
551       tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
552       tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
553   }
554 
555   /* solve U*x = y by back substitution */
556   for (k=mbs-1; k>=0; k--){
557     v  = aa + 25*ai[k];
558     vj = aj + ai[k];
559     tp    = t + k*5;
560     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
561     nz = ai[k+1] - ai[k];
562 
563     tp = t + (*vj)*5;
564     while (nz--) {
565       /* xk += U(k,:)*x(:) */
566       x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
567       x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
568       x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
569       x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
570       x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
571       vj++; tp = t + (*vj)*5;
572       v += 25;
573     }
574     tp    = t + k*5;
575     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
576     idx   = 5*r[k];
577     x[idx]     = x0;
578     x[idx+1]   = x1;
579     x[idx+2]   = x2;
580     x[idx+3]   = x3;
581     x[idx+4]   = x4;
582   }
583 
584   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
585   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
586   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
587   ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering"
593 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
594 {
595   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
596   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
597   MatScalar      *aa=a->a,*v,*diag;
598   PetscScalar    *x,*xp,*b,x0,x1,x2,x3,x4;
599   PetscErrorCode ierr;
600   PetscInt       nz,*vj,k;
601 
602   PetscFunctionBegin;
603 
604   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
605   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
606 
607   /* solve U^T * D * y = b by forward substitution */
608   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
609   for (k=0; k<mbs; k++){
610     v  = aa + 25*ai[k];
611     xp = x + k*5;
612     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
613     nz = ai[k+1] - ai[k];
614     vj = aj + ai[k];
615     xp = x + (*vj)*5;
616     while (nz--) {
617       /* x(:) += U(k,:)^T*(Dk*xk) */
618       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4;
619       xp[1] +=  v[5]*x0 +  v[6]*x1 +  v[7]*x2 + v[8]*x3 + v[9]*x4;
620       xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
621       xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
622       xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
623       vj++; xp = x + (*vj)*5;
624       v += 25;
625     }
626     /* xk = inv(Dk)*(Dk*xk) */
627     diag = aa+k*25;          /* ptr to inv(Dk) */
628     xp   = x + k*5;
629     xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
630     xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
631     xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
632     xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
633     xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
634   }
635 
636   /* solve U*x = y by back substitution */
637   for (k=mbs-1; k>=0; k--){
638     v  = aa + 25*ai[k];
639     xp = x + k*5;
640     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
641     nz = ai[k+1] - ai[k];
642     vj = aj + ai[k];
643     xp = x + (*vj)*5;
644     while (nz--) {
645       /* xk += U(k,:)*x(:) */
646       x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
647       x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
648       x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
649       x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
650       x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
651       vj++;
652       v += 25; xp = x + (*vj)*5;
653     }
654     xp = x + k*5;
655     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
656   }
657 
658   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
659   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
660   ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr);
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNCT__
665 #define __FUNCT__ "MatSolve_SeqSBAIJ_4"
666 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
667 {
668   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
669   IS             isrow=a->row;
670   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
671   PetscErrorCode ierr;
672   PetscInt       nz,*vj,k,*r,idx;
673   MatScalar      *aa=a->a,*v,*diag;
674   PetscScalar    *x,*b,x0,x1,x2,x3,*t,*tp;
675 
676   PetscFunctionBegin;
677   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
678   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
679   t  = a->solve_work;
680   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
681 
682   /* solve U^T * D * y = b by forward substitution */
683   tp = t;
684   for (k=0; k<mbs; k++) { /* t <- perm(b) */
685     idx   = 4*r[k];
686     tp[0] = b[idx];
687     tp[1] = b[idx+1];
688     tp[2] = b[idx+2];
689     tp[3] = b[idx+3];
690     tp += 4;
691   }
692 
693   for (k=0; k<mbs; k++){
694     v  = aa + 16*ai[k];
695     vj = aj + ai[k];
696     tp = t + k*4;
697     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
698     nz = ai[k+1] - ai[k];
699 
700     tp = t + (*vj)*4;
701     while (nz--) {
702       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
703       tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
704       tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
705       tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
706       vj++; tp = t + (*vj)*4;
707       v += 16;
708     }
709 
710     /* xk = inv(Dk)*(Dk*xk) */
711     diag  = aa+k*16;          /* ptr to inv(Dk) */
712     tp    = t + k*4;
713     tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
714     tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
715     tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
716     tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
717   }
718 
719   /* solve U*x = y by back substitution */
720   for (k=mbs-1; k>=0; k--){
721     v  = aa + 16*ai[k];
722     vj = aj + ai[k];
723     tp    = t + k*4;
724     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
725     nz = ai[k+1] - ai[k];
726 
727     tp = t + (*vj)*4;
728     while (nz--) {
729       /* xk += U(k,:)*x(:) */
730       x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
731       x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
732       x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
733       x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
734       vj++; tp = t + (*vj)*4;
735       v += 16;
736     }
737     tp    = t + k*4;
738     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
739     idx        = 4*r[k];
740     x[idx]     = x0;
741     x[idx+1]   = x1;
742     x[idx+2]   = x2;
743     x[idx+3]   = x3;
744   }
745 
746   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
747   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
748   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
749   ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 /*
754    Special case where the matrix was factored in the natural ordering.
755    This eliminates the need for the column and row permutation.
756 */
757 #undef __FUNCT__
758 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering"
759 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
760 {
761   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
762   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
763   MatScalar      *aa=a->a,*v,*diag;
764   PetscScalar    *x,*xp,*b,x0,x1,x2,x3;
765   PetscErrorCode ierr;
766   PetscInt       nz,*vj,k;
767 
768   PetscFunctionBegin;
769 
770   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
771   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
772 
773   /* solve U^T * D * y = b by forward substitution */
774   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
775   for (k=0; k<mbs; k++){
776     v  = aa + 16*ai[k];
777     xp = x + k*4;
778     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
779     nz = ai[k+1] - ai[k];
780     vj = aj + ai[k];
781     xp = x + (*vj)*4;
782     while (nz--) {
783       /* x(:) += U(k,:)^T*(Dk*xk) */
784       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
785       xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
786       xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
787       xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
788       vj++; xp = x + (*vj)*4;
789       v += 16;
790     }
791     /* xk = inv(Dk)*(Dk*xk) */
792     diag = aa+k*16;          /* ptr to inv(Dk) */
793     xp   = x + k*4;
794     xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
795     xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
796     xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
797     xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
798   }
799 
800   /* solve U*x = y by back substitution */
801   for (k=mbs-1; k>=0; k--){
802     v  = aa + 16*ai[k];
803     xp = x + k*4;
804     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
805     nz = ai[k+1] - ai[k];
806     vj = aj + ai[k];
807     xp = x + (*vj)*4;
808     while (nz--) {
809       /* xk += U(k,:)*x(:) */
810       x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
811       x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
812       x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
813       x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
814       vj++;
815       v += 16; xp = x + (*vj)*4;
816     }
817     xp = x + k*4;
818     xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
819   }
820 
821   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
822   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
823   ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr);
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNCT__
828 #define __FUNCT__ "MatSolve_SeqSBAIJ_3"
829 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
830 {
831   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
832   IS             isrow=a->row;
833   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
834   PetscErrorCode ierr;
835   PetscInt       nz,*vj,k,*r,idx;
836   MatScalar      *aa=a->a,*v,*diag;
837   PetscScalar    *x,*b,x0,x1,x2,*t,*tp;
838 
839   PetscFunctionBegin;
840   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
841   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
842   t  = a->solve_work;
843   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
844 
845   /* solve U^T * D * y = b by forward substitution */
846   tp = t;
847   for (k=0; k<mbs; k++) { /* t <- perm(b) */
848     idx   = 3*r[k];
849     tp[0] = b[idx];
850     tp[1] = b[idx+1];
851     tp[2] = b[idx+2];
852     tp += 3;
853   }
854 
855   for (k=0; k<mbs; k++){
856     v  = aa + 9*ai[k];
857     vj = aj + ai[k];
858     tp = t + k*3;
859     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
860     nz = ai[k+1] - ai[k];
861 
862     tp = t + (*vj)*3;
863     while (nz--) {
864       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
865       tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
866       tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
867       vj++; tp = t + (*vj)*3;
868       v += 9;
869     }
870 
871     /* xk = inv(Dk)*(Dk*xk) */
872     diag  = aa+k*9;          /* ptr to inv(Dk) */
873     tp    = t + k*3;
874     tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
875     tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
876     tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
877   }
878 
879   /* solve U*x = y by back substitution */
880   for (k=mbs-1; k>=0; k--){
881     v  = aa + 9*ai[k];
882     vj = aj + ai[k];
883     tp    = t + k*3;
884     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];  /* xk */
885     nz = ai[k+1] - ai[k];
886 
887     tp = t + (*vj)*3;
888     while (nz--) {
889       /* xk += U(k,:)*x(:) */
890       x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
891       x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
892       x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
893       vj++; tp = t + (*vj)*3;
894       v += 9;
895     }
896     tp    = t + k*3;
897     tp[0] = x0; tp[1] = x1; tp[2] = x2;
898     idx      = 3*r[k];
899     x[idx]   = x0;
900     x[idx+1] = x1;
901     x[idx+2] = x2;
902   }
903 
904   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
905   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
906   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
907   ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 /*
912    Special case where the matrix was factored in the natural ordering.
913    This eliminates the need for the column and row permutation.
914 */
915 #undef __FUNCT__
916 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering"
917 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
918 {
919   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
920   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
921   MatScalar      *aa=a->a,*v,*diag;
922   PetscScalar    *x,*xp,*b,x0,x1,x2;
923   PetscErrorCode ierr;
924   PetscInt       nz,*vj,k;
925 
926   PetscFunctionBegin;
927 
928   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
929   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
930 
931   /* solve U^T * D * y = b by forward substitution */
932   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
933   for (k=0; k<mbs; k++){
934     v  = aa + 9*ai[k];
935     xp = x + k*3;
936     x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
937     nz = ai[k+1] - ai[k];
938     vj = aj + ai[k];
939     xp = x + (*vj)*3;
940     while (nz--) {
941       /* x(:) += U(k,:)^T*(Dk*xk) */
942       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
943       xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
944       xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
945       vj++; xp = x + (*vj)*3;
946       v += 9;
947     }
948     /* xk = inv(Dk)*(Dk*xk) */
949     diag = aa+k*9;          /* ptr to inv(Dk) */
950     xp   = x + k*3;
951     xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
952     xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
953     xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
954   }
955 
956   /* solve U*x = y by back substitution */
957   for (k=mbs-1; k>=0; k--){
958     v  = aa + 9*ai[k];
959     xp = x + k*3;
960     x0 = xp[0]; x1 = xp[1]; x2 = xp[2];  /* xk */
961     nz = ai[k+1] - ai[k];
962     vj = aj + ai[k];
963     xp = x + (*vj)*3;
964     while (nz--) {
965       /* xk += U(k,:)*x(:) */
966       x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
967       x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
968       x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
969       vj++;
970       v += 9; xp = x + (*vj)*3;
971     }
972     xp = x + k*3;
973     xp[0] = x0; xp[1] = x1; xp[2] = x2;
974   }
975 
976   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
977   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
978   ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr);
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatSolve_SeqSBAIJ_2"
984 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
985 {
986   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ *)A->data;
987   IS             isrow=a->row;
988   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
989   PetscErrorCode ierr;
990   PetscInt       nz,*vj,k,k2,*r,idx;
991   MatScalar      *aa=a->a,*v,*diag;
992   PetscScalar    *x,*b,x0,x1,*t;
993 
994   PetscFunctionBegin;
995   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
996   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
997   t  = a->solve_work;
998   /* printf("called MatSolve_SeqSBAIJ_2\n"); */
999   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1000 
1001   /* solve U^T * D * y = perm(b) by forward substitution */
1002   for (k=0; k<mbs; k++) {  /* t <- perm(b) */
1003     idx = 2*r[k];
1004     t[k*2]   = b[idx];
1005     t[k*2+1] = b[idx+1];
1006   }
1007   for (k=0; k<mbs; k++){
1008     v  = aa + 4*ai[k];
1009     vj = aj + ai[k];
1010     k2 = k*2;
1011     x0 = t[k2]; x1 = t[k2+1];
1012     nz = ai[k+1] - ai[k];
1013     while (nz--) {
1014       t[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1015       t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1016       vj++; v += 4;
1017     }
1018     diag = aa+k*4;  /* ptr to inv(Dk) */
1019     t[k2]   = diag[0]*x0 + diag[2]*x1;
1020     t[k2+1] = diag[1]*x0 + diag[3]*x1;
1021   }
1022 
1023   /* solve U*x = y by back substitution */
1024   for (k=mbs-1; k>=0; k--){
1025     v  = aa + 4*ai[k];
1026     vj = aj + ai[k];
1027     k2 = k*2;
1028     x0 = t[k2]; x1 = t[k2+1];
1029     nz = ai[k+1] - ai[k];
1030     while (nz--) {
1031       x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1032       x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1033       vj++; v += 4;
1034     }
1035     t[k2]    = x0;
1036     t[k2+1]  = x1;
1037     idx      = 2*r[k];
1038     x[idx]   = x0;
1039     x[idx+1] = x1;
1040   }
1041 
1042   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1043   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1044   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1045   ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr);
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 /*
1050    Special case where the matrix was factored in the natural ordering.
1051    This eliminates the need for the column and row permutation.
1052 */
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering"
1055 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1056 {
1057   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1058   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1059   MatScalar      *aa=a->a,*v,*diag;
1060   PetscScalar    *x,*b,x0,x1;
1061   PetscErrorCode ierr;
1062   PetscInt       nz,*vj,k,k2;
1063 
1064   PetscFunctionBegin;
1065 
1066   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1067   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1068 
1069   /* solve U^T * D * y = b by forward substitution */
1070   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1071   for (k=0; k<mbs; k++){
1072     v  = aa + 4*ai[k];
1073     vj = aj + ai[k];
1074     k2 = k*2;
1075     x0 = x[k2]; x1 = x[k2+1];  /* Dk*xk = k-th block of x */
1076     nz = ai[k+1] - ai[k];
1077 
1078     while (nz--) {
1079       /* x(:) += U(k,:)^T*(Dk*xk) */
1080       x[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1081       x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1082       vj++; v += 4;
1083     }
1084     /* xk = inv(Dk)*(Dk*xk) */
1085     diag = aa+k*4;          /* ptr to inv(Dk) */
1086     x[k2]   = diag[0]*x0 + diag[2]*x1;
1087     x[k2+1] = diag[1]*x0 + diag[3]*x1;
1088   }
1089 
1090   /* solve U*x = y by back substitution */
1091   for (k=mbs-1; k>=0; k--){
1092     v  = aa + 4*ai[k];
1093     vj = aj + ai[k];
1094     k2 = k*2;
1095     x0 = x[k2]; x1 = x[k2+1];  /* xk */
1096     nz = ai[k+1] - ai[k];
1097     while (nz--) {
1098       /* xk += U(k,:)*x(:) */
1099       x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1100       x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1101       vj++; v += 4;
1102     }
1103     x[k2]     = x0;
1104     x[k2+1]   = x1;
1105   }
1106 
1107   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1108   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1109   ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatSolve_SeqSBAIJ_1"
1115 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1116 {
1117   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1118   IS             isrow=a->row;
1119   PetscErrorCode ierr;
1120   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j,*rip;
1121   MatScalar      *aa=a->a,*v;
1122   PetscScalar    *x,*b,xk,*t;
1123   PetscInt       nz,*vj,k;
1124 
1125   PetscFunctionBegin;
1126   if (!mbs) PetscFunctionReturn(0);
1127   /* printf(" MatSolve_SeqSBAIJ_1 is called\n"); */
1128 
1129   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1130   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1131   t    = a->solve_work;
1132 
1133   ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1134 
1135   /* solve U^T*D*y = perm(b) by forward substitution */
1136   for (k=0; k<mbs; k++) t[k] = b[rip[k]];
1137   for (k=0; k<mbs; k++){
1138     v  = aa + ai[k] + 1;
1139     vj = aj + ai[k] + 1;
1140     xk = t[k];
1141     nz = ai[k+1] - ai[k] - 1;
1142     while (nz--) t[*vj++] += (*v++) * xk;
1143     t[k] = xk*aa[ai[k]];  /* aa[k] = 1/D(k) */
1144   }
1145 
1146   /* solve U*x = y by back substitution */
1147   for (k=mbs-1; k>=0; k--){
1148     v  = aa + ai[k] + 1;
1149     vj = aj + ai[k] + 1;
1150     xk = t[k];
1151     nz = ai[k+1] - ai[k] - 1;
1152     while (nz--) xk += (*v++) * t[*vj++];
1153     t[k]      = xk;
1154     x[rip[k]] = xk;
1155   }
1156 
1157   ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1158   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1159   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1160   ierr = PetscLogFlops(4*a->nz + A->m);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatSolves_SeqSBAIJ_1"
1166 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1167 {
1168   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1169   PetscErrorCode ierr;
1170 
1171   PetscFunctionBegin;
1172   if (A->bs == 1) {
1173     ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr);
1174   } else {
1175     IS              isrow=a->row;
1176     PetscInt             mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i;
1177     MatScalar       *aa=a->a,*v;
1178     PetscScalar     *x,*b,*t;
1179     PetscInt             nz,*vj,k,n;
1180     if (bb->n > a->solves_work_n) {
1181       if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
1182       ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr);
1183       a->solves_work_n = bb->n;
1184     }
1185     n    = bb->n;
1186     ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr);
1187     ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr);
1188     t    = a->solves_work;
1189 
1190     ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1191 
1192     /* solve U^T*D*y = perm(b) by forward substitution */
1193     for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */
1194     for (k=0; k<mbs; k++){
1195       v  = aa + ai[k];
1196       vj = aj + ai[k];
1197       nz = ai[k+1] - ai[k];
1198       while (nz--) {
1199         for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1200         v++;vj++;
1201       }
1202       for (i=0; i<n; i++) t[n*k+i] *= aa[k];  /* note: aa[k] = 1/D(k) */
1203     }
1204 
1205     /* solve U*x = y by back substitution */
1206     for (k=mbs-1; k>=0; k--){
1207       v  = aa + ai[k];
1208       vj = aj + ai[k];
1209       nz = ai[k+1] - ai[k];
1210       while (nz--) {
1211         for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1212         v++;vj++;
1213       }
1214       for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i];
1215     }
1216 
1217     ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1218     ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr);
1219     ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr);
1220     ierr = PetscLogFlops(bb->n*(4*a->nz + A->m));CHKERRQ(ierr);
1221   }
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 /*
1226       Special case where the matrix was ILU(0) factored in the natural
1227    ordering. This eliminates the need for the column and row permutation.
1228 */
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering"
1231 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1232 {
1233   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1234   PetscErrorCode ierr;
1235   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1236   MatScalar      *aa=a->a,*v;
1237   PetscScalar    *x,*b,xk;
1238   PetscInt       nz,*vj,k;
1239 
1240   PetscFunctionBegin;
1241   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1242   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1243 
1244   /* solve U^T*D*y = b by forward substitution */
1245   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1246   for (k=0; k<mbs; k++){
1247     v  = aa + ai[k] + 1;
1248     vj = aj + ai[k] + 1;
1249     xk = x[k];
1250     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1251     while (nz--) x[*vj++] += (*v++) * xk;
1252     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1253   }
1254 
1255   /* solve U*x = y by back substitution */
1256   for (k=mbs-2; k>=0; k--){
1257     v  = aa + ai[k] + 1;
1258     vj = aj + ai[k] + 1;
1259     xk = x[k];
1260     nz = ai[k+1] - ai[k] - 1;
1261     while (nz--) xk += (*v++) * x[*vj++];
1262     x[k] = xk;
1263   }
1264 
1265   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1266   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1267   ierr = PetscLogFlops(4*a->nz + A->m);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR"
1274 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B)
1275 {
1276   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
1277   PetscErrorCode ierr;
1278   PetscInt       *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j;
1279   PetscInt       *jutmp,bs = A->bs,bs2=a->bs2;
1280   PetscInt       m,reallocs = 0,*levtmp;
1281   PetscInt       *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
1282   PetscInt       incrlev,*lev,shift,prow,nz;
1283   PetscReal      f = info->fill,levels = info->levels;
1284   PetscTruth     perm_identity;
1285 
1286   PetscFunctionBegin;
1287   /* check whether perm is the identity mapping */
1288   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1289 
1290   if (perm_identity){
1291     a->permute = PETSC_FALSE;
1292     ai = a->i; aj = a->j;
1293   } else { /*  non-trivial permutation */
1294     a->permute = PETSC_TRUE;
1295     ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
1296     ai = a->inew; aj = a->jnew;
1297   }
1298 
1299   /* initialization */
1300   ierr  = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1301   umax  = (PetscInt)(f*ai[mbs] + 1);
1302   ierr  = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr);
1303   umax += mbs + 1;
1304   shift = mbs + 1;
1305   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr);
1306   ierr  = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr);
1307   iu[0] = mbs + 1;
1308   juidx = mbs + 1;
1309   /* prowl: linked list for pivot row */
1310   ierr    = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr);
1311   /* q: linked list for col index */
1312   q       = prowl + mbs;
1313   levtmp  = q     + mbs;
1314 
1315   for (i=0; i<mbs; i++){
1316     prowl[i] = mbs;
1317     q[i] = 0;
1318   }
1319 
1320   /* for each row k */
1321   for (k=0; k<mbs; k++){
1322     nzk  = 0;
1323     q[k] = mbs;
1324     /* copy current row into linked list */
1325     nz = ai[rip[k]+1] - ai[rip[k]];
1326     j = ai[rip[k]];
1327     while (nz--){
1328       vj = rip[aj[j++]];
1329       if (vj > k){
1330         qm = k;
1331         do {
1332           m  = qm; qm = q[m];
1333         } while(qm < vj);
1334         if (qm == vj) {
1335           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
1336         }
1337         nzk++;
1338         q[m]       = vj;
1339         q[vj]      = qm;
1340         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1341       }
1342     }
1343 
1344     /* modify nonzero structure of k-th row by computing fill-in
1345        for each row prow to be merged in */
1346     prow = k;
1347     prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
1348 
1349     while (prow < k){
1350       /* merge row prow into k-th row */
1351       jmin = iu[prow] + 1;
1352       jmax = iu[prow+1];
1353       qm = k;
1354       for (j=jmin; j<jmax; j++){
1355         incrlev = lev[j-shift] + 1;
1356 	if (incrlev > levels) continue;
1357         vj      = ju[j];
1358         do {
1359           m = qm; qm = q[m];
1360         } while (qm < vj);
1361         if (qm != vj){      /* a fill */
1362           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1363           levtmp[vj] = incrlev;
1364         } else {
1365           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1366         }
1367       }
1368       prow = prowl[prow]; /* next pivot row */
1369     }
1370 
1371     /* add k to row list for first nonzero element in k-th row */
1372     if (nzk > 1){
1373       i = q[k]; /* col value of first nonzero element in k_th row of U */
1374       prowl[k] = prowl[i]; prowl[i] = k;
1375     }
1376     iu[k+1] = iu[k] + nzk;
1377 
1378     /* allocate more space to ju and lev if needed */
1379     if (iu[k+1] > umax) {
1380       /* estimate how much additional space we will need */
1381       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1382       /* just double the memory each time */
1383       maxadd = umax;
1384       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1385       umax += maxadd;
1386 
1387       /* allocate a longer ju */
1388       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
1389       ierr     = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr);
1390       ierr     = PetscFree(ju);CHKERRQ(ierr);
1391       ju       = jutmp;
1392 
1393       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
1394       ierr     = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr);
1395       ierr     = PetscFree(lev);CHKERRQ(ierr);
1396       lev      = jutmp;
1397       reallocs += 2; /* count how many times we realloc */
1398     }
1399 
1400     /* save nonzero structure of k-th row in ju */
1401     i=k;
1402     while (nzk --) {
1403       i                = q[i];
1404       ju[juidx]        = i;
1405       lev[juidx-shift] = levtmp[i];
1406       juidx++;
1407     }
1408   }
1409 
1410 #if defined(PETSC_USE_VERBOSE)
1411   if (ai[mbs] != 0) {
1412     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1413     ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr);
1414     ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af));CHKERRQ(ierr);
1415     ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af));CHKERRQ(ierr);
1416     ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n"));CHKERRQ(ierr);
1417   } else {
1418     ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n"));CHKERRQ(ierr);
1419   }
1420 #endif
1421 
1422   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1423   ierr = PetscFree(prowl);CHKERRQ(ierr);
1424   ierr = PetscFree(lev);CHKERRQ(ierr);
1425 
1426   /* put together the new matrix */
1427   ierr = MatCreate(A->comm,B);CHKERRQ(ierr);
1428   ierr = MatSetSizes(*B,bs*mbs,bs*mbs,bs*mbs,bs*mbs);CHKERRQ(ierr);
1429   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
1430   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
1431 
1432   /* ierr = PetscLogObjectParent(*B,iperm);CHKERRQ(ierr); */
1433   b    = (Mat_SeqSBAIJ*)(*B)->data;
1434   ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr);
1435   b->singlemalloc = PETSC_FALSE;
1436   /* the next line frees the default space generated by the Create() */
1437   ierr    = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr);
1438   ierr    = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1439   b->j    = ju;
1440   b->i    = iu;
1441   b->diag = 0;
1442   b->ilen = 0;
1443   b->imax = 0;
1444 
1445   if (b->row) {
1446     ierr = ISDestroy(b->row);CHKERRQ(ierr);
1447   }
1448   if (b->icol) {
1449     ierr = ISDestroy(b->icol);CHKERRQ(ierr);
1450   }
1451   b->row  = perm;
1452   b->icol = perm;
1453   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1454   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1455   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1456   /* In b structure:  Free imax, ilen, old a, old j.
1457      Allocate idnew, solve_work, new a, new j */
1458   ierr    = PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1459   b->maxnz = b->nz = iu[mbs];
1460 
1461   (*B)->factor                 = FACTOR_CHOLESKY;
1462   (*B)->info.factor_mallocs    = reallocs;
1463   (*B)->info.fill_ratio_given  = f;
1464   if (ai[mbs] != 0) {
1465     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1466   } else {
1467     (*B)->info.fill_ratio_needed = 0.0;
1468   }
1469 
1470   if (perm_identity){
1471     switch (bs) {
1472       case 1:
1473         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1474         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1475         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1476         (*B)->ops->solves                = MatSolves_SeqSBAIJ_1;
1477         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n"));CHKERRQ(ierr);
1478         break;
1479       case 2:
1480         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1481         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1482         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1483         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr);
1484         break;
1485       case 3:
1486         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1487         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1488         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1489         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr);
1490         break;
1491       case 4:
1492         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1493         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1494         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1495         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr);
1496         break;
1497       case 5:
1498         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1499         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1500         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1501         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr);
1502         break;
1503       case 6:
1504         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1505         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1506         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1507         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr);
1508         break;
1509       case 7:
1510         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1511         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1512         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1513         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr);
1514       break;
1515       default:
1516         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
1517         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1518         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1519         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"));CHKERRQ(ierr);
1520       break;
1521     }
1522   }
1523 
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #include "petscbt.h"
1528 #include "src/mat/utils/freespace.h"
1529 #undef __FUNCT__
1530 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ"
1531 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1532 {
1533   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
1534   Mat_SeqSBAIJ       *b;
1535   Mat                B;
1536   PetscErrorCode     ierr;
1537   PetscTruth         perm_identity;
1538   PetscInt           bs=A->bs,am=a->mbs;
1539   PetscInt           reallocs=0,*rip,i,*ai,*aj,*ui;
1540   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1541   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1542   PetscReal          fill=info->fill,levels=info->levels;
1543   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1544   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1545   PetscBT            lnkbt;
1546 
1547   PetscFunctionBegin;
1548   /*
1549    This code originally uses Modified Sparse Row (MSR) storage
1550    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise!
1551    Then it is rewritten so the factor B takes seqsbaij format. However the associated
1552    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1,
1553    thus the original code in MSR format is still used for these cases.
1554    The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever
1555    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
1556   */
1557   if (bs > 1){
1558     ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr);
1559     PetscFunctionReturn(0);
1560   }
1561 
1562   /* check whether perm is the identity mapping */
1563   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1564 
1565   /* special case that simply copies fill pattern */
1566   if (!levels && perm_identity) {
1567     a->permute = PETSC_FALSE;
1568     ai = a->i; aj = a->j;
1569     ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1570     ierr = PetscMemcpy(ui,ai,(am+1)*sizeof(PetscInt));CHKERRQ(ierr);
1571     ierr = PetscMalloc(ai[am]*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1572     ierr = PetscMemcpy(uj,aj,ai[am]*sizeof(PetscInt));CHKERRQ(ierr);
1573 
1574   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1575     if (perm_identity){
1576       a->permute = PETSC_FALSE;
1577       ai = a->i; aj = a->j;
1578     } else { /*  non-trivial permutation */
1579       a->permute = PETSC_TRUE;
1580       ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
1581       ai   = a->inew; aj = a->jnew;
1582     }
1583     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1584 
1585     /* initialization */
1586     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1587     ui[0] = 0;
1588 
1589     /* jl: linked list for storing indices of the pivot rows
1590        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1591     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1592     il         = jl + am;
1593     uj_ptr     = (PetscInt**)(il + am);
1594     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1595     for (i=0; i<am; i++){
1596       jl[i] = am; il[i] = 0;
1597     }
1598 
1599     /* create and initialize a linked list for storing column indices of the active row k */
1600     nlnk = am + 1;
1601     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1602 
1603     /* initial FreeSpace size is fill*(ai[am]+1) */
1604     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1605     current_space = free_space;
1606     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1607     current_space_lvl = free_space_lvl;
1608 
1609     for (k=0; k<am; k++){  /* for each active row k */
1610       /* initialize lnk by the column indices of row rip[k] */
1611       nzk   = 0;
1612       ncols = ai[rip[k]+1] - ai[rip[k]];
1613       cols  = aj+ai[rip[k]];
1614       ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1615       nzk += nlnk;
1616 
1617       /* update lnk by computing fill-in for each pivot row to be merged in */
1618       prow = jl[k]; /* 1st pivot row */
1619 
1620       while (prow < k){
1621         nextprow = jl[prow];
1622 
1623         /* merge prow into k-th row */
1624         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1625         jmax = ui[prow+1];
1626         ncols = jmax-jmin;
1627         i     = jmin - ui[prow];
1628         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1629         j     = *(uj_lvl_ptr[prow] + i - 1);
1630         cols_lvl = uj_lvl_ptr[prow]+i;
1631         ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1632         nzk += nlnk;
1633 
1634         /* update il and jl for prow */
1635         if (jmin < jmax){
1636           il[prow] = jmin;
1637           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1638         }
1639         prow = nextprow;
1640       }
1641 
1642       /* if free space is not available, make more free space */
1643       if (current_space->local_remaining<nzk) {
1644         i = am - k + 1; /* num of unfactored rows */
1645         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1646         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1647         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1648         reallocs++;
1649       }
1650 
1651       /* copy data into free_space and free_space_lvl, then initialize lnk */
1652       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1653 
1654       /* add the k-th row into il and jl */
1655       if (nzk-1 > 0){
1656         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1657         jl[k] = jl[i]; jl[i] = k;
1658         il[k] = ui[k] + 1;
1659       }
1660       uj_ptr[k]     = current_space->array;
1661       uj_lvl_ptr[k] = current_space_lvl->array;
1662 
1663       current_space->array           += nzk;
1664       current_space->local_used      += nzk;
1665       current_space->local_remaining -= nzk;
1666       current_space_lvl->array           += nzk;
1667       current_space_lvl->local_used      += nzk;
1668       current_space_lvl->local_remaining -= nzk;
1669 
1670       ui[k+1] = ui[k] + nzk;
1671     }
1672 
1673 #if defined(PETSC_USE_VERBOSE)
1674     if (ai[am] != 0) {
1675       PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1676       ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr);
1677       ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr);
1678       ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr);
1679     } else {
1680       ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr);
1681     }
1682 #endif
1683 
1684     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1685     ierr = PetscFree(jl);CHKERRQ(ierr);
1686 
1687     /* destroy list of free space and other temporary array(s) */
1688     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1689     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1690     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1691     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1692   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1693 
1694   /* put together the new matrix in MATSEQSBAIJ format */
1695   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1696   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1697   B = *fact;
1698   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1699   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,PETSC_NULL);CHKERRQ(ierr);
1700 
1701   b = (Mat_SeqSBAIJ*)B->data;
1702   ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr);
1703   b->singlemalloc = PETSC_FALSE;
1704   /* the next line frees the default space generated by the Create() */
1705   ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr);
1706   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1707   b->j    = uj;
1708   b->i    = ui;
1709   b->diag = 0;
1710   b->ilen = 0;
1711   b->imax = 0;
1712   b->row  = perm;
1713   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1714   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1715   b->icol = perm;
1716   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1717   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1718   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1719   b->maxnz = b->nz = ui[am];
1720 
1721   B->factor                 = FACTOR_CHOLESKY;
1722   B->info.factor_mallocs    = reallocs;
1723   B->info.fill_ratio_given  = fill;
1724   if (ai[am] != 0) {
1725     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1726   } else {
1727     B->info.fill_ratio_needed = 0.0;
1728   }
1729 
1730   if (perm_identity){
1731     switch (bs) {
1732       case 1:
1733         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1734         B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1735         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1736         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n"));CHKERRQ(ierr);
1737         break;
1738       case 2:
1739         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1740         B->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1741         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1742         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr);
1743         break;
1744       case 3:
1745         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1746         B->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1747         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1748         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr);
1749         break;
1750       case 4:
1751         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1752         B->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1753         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1754         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr);
1755         break;
1756       case 5:
1757         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1758         B->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1759         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1760         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr);
1761         break;
1762       case 6:
1763         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1764         B->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1765         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1766         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr);
1767         break;
1768       case 7:
1769         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1770         B->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1771         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1772         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr);
1773       break;
1774       default:
1775         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
1776         B->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1777         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1778         ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"));CHKERRQ(ierr);
1779       break;
1780     }
1781   }
1782   PetscFunctionReturn(0);
1783 }
1784 
1785