xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact2.c (revision 6849ba73f22fecb8f92ef896a42e4e8bd4cd6965)
1 /*
2     Factorization code for SBAIJ format.
3 */
4 
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 #include "src/mat/impls/baij/seq/baij.h"
7 #include "src/inline/ilu.h"
8 #include "src/inline/dot.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering"
12 PetscErrorCode MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
13 {
14   PetscFunctionBegin;
15   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
16 }
17 
18 #undef __FUNCT__
19 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering"
20 PetscErrorCode MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
21 {
22   PetscFunctionBegin;
23   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
24 }
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering"
28 PetscErrorCode MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
29 {
30   PetscFunctionBegin;
31   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
32 }
33 
34 #undef __FUNCT__
35 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering"
36 PetscErrorCode MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
37 {
38   PetscFunctionBegin;
39   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
40 }
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering"
44 PetscErrorCode MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
45 {
46   PetscFunctionBegin;
47   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
48 }
49 
50 #undef __FUNCT__
51 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering"
52 PetscErrorCode MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
53 {
54   PetscFunctionBegin;
55   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering"
60 PetscErrorCode MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
61 {
62   PetscFunctionBegin;
63   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1"
68 PetscErrorCode MatSolveTranspose_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
69 {
70   PetscFunctionBegin;
71   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2"
76 PetscErrorCode MatSolveTranspose_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
77 {
78   PetscFunctionBegin;
79   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3"
84 PetscErrorCode MatSolveTranspose_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
85 {
86   PetscFunctionBegin;
87   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4"
92 PetscErrorCode MatSolveTranspose_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
93 {
94   PetscFunctionBegin;
95   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
96 }
97 
98 #undef __FUNCT__
99 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5"
100 PetscErrorCode MatSolveTranspose_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
101 {
102   PetscFunctionBegin;
103   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6"
108 PetscErrorCode MatSolveTranspose_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
109 {
110   PetscFunctionBegin;
111   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
112 }
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7"
116 PetscErrorCode MatSolveTranspose_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
117 {
118   PetscFunctionBegin;
119   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatSolve_SeqSBAIJ_N"
124 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
125 {
126   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
127   IS              isrow=a->row;
128   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
129   PetscErrorCode ierr;
130   int             nz,*vj,k,*r,idx,k1;
131   int             bs=a->bs,bs2 = a->bs2;
132   MatScalar       *aa=a->a,*v,*diag;
133   PetscScalar     *x,*xk,*xj,*b,*xk_tmp,*t;
134 
135   PetscFunctionBegin;
136   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
137   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
138   t  = a->solve_work;
139   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
140   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
141 
142   /* solve U^T * D * y = b by forward substitution */
143   xk = t;
144   for (k=0; k<mbs; k++) { /* t <- perm(b) */
145     idx   = bs*r[k];
146     for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
147   }
148   for (k=0; k<mbs; k++){
149     v  = aa + bs2*ai[k];
150     xk = t + k*bs;      /* Dk*xk = k-th block of x */
151     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
152     nz = ai[k+1] - ai[k];
153     vj = aj + ai[k];
154     xj = t + (*vj)*bs;  /* *vj-th block of x, *vj>k */
155     while (nz--) {
156       /* x(:) += U(k,:)^T*(Dk*xk) */
157       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
158       vj++; xj = t + (*vj)*bs;
159       v += bs2;
160     }
161     /* xk = inv(Dk)*(Dk*xk) */
162     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
163     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
164   }
165 
166   /* solve U*x = y by back substitution */
167   for (k=mbs-1; k>=0; k--){
168     v  = aa + bs2*ai[k];
169     xk = t + k*bs;        /* xk */
170     nz = ai[k+1] - ai[k];
171     vj = aj + ai[k];
172     xj = t + (*vj)*bs;
173     while (nz--) {
174       /* xk += U(k,:)*x(:) */
175       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
176       vj++;
177       v += bs2; xj = t + (*vj)*bs;
178     }
179     idx   = bs*r[k];
180     for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
181   }
182 
183   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
184   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
185   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
186   PetscLogFlops(bs2*(2*a->nz + mbs));
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering"
192 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
193 {
194   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
195   PetscErrorCode ierr;
196   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
197   int             nz,*vj,k;
198   int             bs=a->bs,bs2 = a->bs2;
199   MatScalar       *aa=a->a,*v,*diag;
200   PetscScalar     *x,*xk,*xj,*b,*xk_tmp;
201 
202   PetscFunctionBegin;
203 
204   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
205   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
206 
207   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
208 
209   /* solve U^T * D * y = b by forward substitution */
210   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
211   for (k=0; k<mbs; k++){
212     v  = aa + bs2*ai[k];
213     xk = x + k*bs;      /* Dk*xk = k-th block of x */
214     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
215     nz = ai[k+1] - ai[k];
216     vj = aj + ai[k];
217     xj = x + (*vj)*bs;  /* *vj-th block of x, *vj>k */
218     while (nz--) {
219       /* x(:) += U(k,:)^T*(Dk*xk) */
220       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
221       vj++; xj = x + (*vj)*bs;
222       v += bs2;
223     }
224     /* xk = inv(Dk)*(Dk*xk) */
225     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
226     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
227   }
228 
229   /* solve U*x = y by back substitution */
230   for (k=mbs-1; k>=0; k--){
231     v  = aa + bs2*ai[k];
232     xk = x + k*bs;        /* xk */
233     nz = ai[k+1] - ai[k];
234     vj = aj + ai[k];
235     xj = x + (*vj)*bs;
236     while (nz--) {
237       /* xk += U(k,:)*x(:) */
238       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
239       vj++;
240       v += bs2; xj = x + (*vj)*bs;
241     }
242   }
243 
244   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
245   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
246   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
247   PetscLogFlops(bs2*(2*a->nz + mbs));
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNCT__
252 #define __FUNCT__ "MatSolve_SeqSBAIJ_7"
253 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
254 {
255   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
256   IS              isrow=a->row;
257   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
258   PetscErrorCode ierr;
259   int             nz,*vj,k,*r,idx;
260   MatScalar       *aa=a->a,*v,*d;
261   PetscScalar     *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
262 
263   PetscFunctionBegin;
264   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
265   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
266   t  = a->solve_work;
267   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
268 
269   /* solve U^T * D * y = b by forward substitution */
270   tp = t;
271   for (k=0; k<mbs; k++) { /* t <- perm(b) */
272     idx   = 7*r[k];
273     tp[0] = b[idx];
274     tp[1] = b[idx+1];
275     tp[2] = b[idx+2];
276     tp[3] = b[idx+3];
277     tp[4] = b[idx+4];
278     tp[5] = b[idx+5];
279     tp[6] = b[idx+6];
280     tp += 7;
281   }
282 
283   for (k=0; k<mbs; k++){
284     v  = aa + 49*ai[k];
285     vj = aj + ai[k];
286     tp = t + k*7;
287     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
288     nz = ai[k+1] - ai[k];
289     tp = t + (*vj)*7;
290     while (nz--) {
291       tp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
292       tp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
293       tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
294       tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
295       tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
296       tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
297       tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
298       vj++; tp = t + (*vj)*7;
299       v += 49;
300     }
301 
302     /* xk = inv(Dk)*(Dk*xk) */
303     d  = aa+k*49;          /* ptr to inv(Dk) */
304     tp    = t + k*7;
305     tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
306     tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
307     tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
308     tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
309     tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
310     tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
311     tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
312   }
313 
314   /* solve U*x = y by back substitution */
315   for (k=mbs-1; k>=0; k--){
316     v  = aa + 49*ai[k];
317     vj = aj + ai[k];
318     tp    = t + k*7;
319     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  x6=tp[6]; /* xk */
320     nz = ai[k+1] - ai[k];
321 
322     tp = t + (*vj)*7;
323     while (nz--) {
324       /* xk += U(k,:)*x(:) */
325       x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
326       x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
327       x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
328       x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
329       x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
330       x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
331       x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
332       vj++; tp = t + (*vj)*7;
333       v += 49;
334     }
335     tp    = t + k*7;
336     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
337     idx   = 7*r[k];
338     x[idx]     = x0;
339     x[idx+1]   = x1;
340     x[idx+2]   = x2;
341     x[idx+3]   = x3;
342     x[idx+4]   = x4;
343     x[idx+5]   = x5;
344     x[idx+6]   = x6;
345   }
346 
347   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
348   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
349   PetscLogFlops(49*(2*a->nz + mbs));
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering"
355 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
356 {
357   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
358   PetscErrorCode ierr;
359   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
360   MatScalar       *aa=a->a,*v,*d;
361   PetscScalar     *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6;
362   int             nz,*vj,k;
363 
364   PetscFunctionBegin;
365 
366   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
367   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
368 
369   /* solve U^T * D * y = b by forward substitution */
370   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
371   for (k=0; k<mbs; k++){
372     v  = aa + 49*ai[k];
373     xp = x + k*7;
374     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
375     nz = ai[k+1] - ai[k];
376     vj = aj + ai[k];
377     xp = x + (*vj)*7;
378     while (nz--) {
379       /* x(:) += U(k,:)^T*(Dk*xk) */
380       xp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
381       xp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
382       xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
383       xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
384       xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
385       xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
386       xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
387       vj++; xp = x + (*vj)*7;
388       v += 49;
389     }
390     /* xk = inv(Dk)*(Dk*xk) */
391     d  = aa+k*49;          /* ptr to inv(Dk) */
392     xp = x + k*7;
393     xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
394     xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
395     xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
396     xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
397     xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
398     xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
399     xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
400   }
401 
402   /* solve U*x = y by back substitution */
403   for (k=mbs-1; k>=0; k--){
404     v  = aa + 49*ai[k];
405     xp = x + k*7;
406     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
407     nz = ai[k+1] - ai[k];
408     vj = aj + ai[k];
409     xp = x + (*vj)*7;
410     while (nz--) {
411       /* xk += U(k,:)*x(:) */
412       x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
413       x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
414       x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
415       x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
416       x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
417       x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
418       x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
419       vj++;
420       v += 49; xp = x + (*vj)*7;
421     }
422     xp = x + k*7;
423     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
424   }
425 
426   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
427   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
428   PetscLogFlops(49*(2*a->nz + mbs));
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "MatSolve_SeqSBAIJ_6"
434 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
435 {
436   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
437   IS              isrow=a->row;
438   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
439   PetscErrorCode ierr;
440   int             nz,*vj,k,*r,idx;
441   MatScalar       *aa=a->a,*v,*d;
442   PetscScalar     *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;
443 
444   PetscFunctionBegin;
445   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
446   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
447   t  = a->solve_work;
448   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
449 
450   /* solve U^T * D * y = b by forward substitution */
451   tp = t;
452   for (k=0; k<mbs; k++) { /* t <- perm(b) */
453     idx   = 6*r[k];
454     tp[0] = b[idx];
455     tp[1] = b[idx+1];
456     tp[2] = b[idx+2];
457     tp[3] = b[idx+3];
458     tp[4] = b[idx+4];
459     tp[5] = b[idx+5];
460     tp += 6;
461   }
462 
463   for (k=0; k<mbs; k++){
464     v  = aa + 36*ai[k];
465     vj = aj + ai[k];
466     tp = t + k*6;
467     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
468     nz = ai[k+1] - ai[k];
469     tp = t + (*vj)*6;
470     while (nz--) {
471       tp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
472       tp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
473       tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
474       tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
475       tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
476       tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
477       vj++; tp = t + (*vj)*6;
478       v += 36;
479     }
480 
481     /* xk = inv(Dk)*(Dk*xk) */
482     d  = aa+k*36;          /* ptr to inv(Dk) */
483     tp    = t + k*6;
484     tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
485     tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
486     tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
487     tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
488     tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
489     tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
490   }
491 
492   /* solve U*x = y by back substitution */
493   for (k=mbs-1; k>=0; k--){
494     v  = aa + 36*ai[k];
495     vj = aj + ai[k];
496     tp    = t + k*6;
497     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  /* xk */
498     nz = ai[k+1] - ai[k];
499 
500     tp = t + (*vj)*6;
501     while (nz--) {
502       /* xk += U(k,:)*x(:) */
503       x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
504       x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
505       x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
506       x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
507       x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
508       x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
509       vj++; tp = t + (*vj)*6;
510       v += 36;
511     }
512     tp    = t + k*6;
513     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
514     idx   = 6*r[k];
515     x[idx]     = x0;
516     x[idx+1]   = x1;
517     x[idx+2]   = x2;
518     x[idx+3]   = x3;
519     x[idx+4]   = x4;
520     x[idx+5]   = x5;
521   }
522 
523   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
524   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
525   PetscLogFlops(36*(2*a->nz + mbs));
526   PetscFunctionReturn(0);
527 }
528 
529 #undef __FUNCT__
530 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering"
531 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
532 {
533   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
534   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
535   MatScalar       *aa=a->a,*v,*d;
536   PetscScalar     *x,*xp,*b,x0,x1,x2,x3,x4,x5;
537   PetscErrorCode ierr;
538   int             nz,*vj,k;
539 
540   PetscFunctionBegin;
541 
542   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
543   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
544 
545   /* solve U^T * D * y = b by forward substitution */
546   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
547   for (k=0; k<mbs; k++){
548     v  = aa + 36*ai[k];
549     xp = x + k*6;
550     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
551     nz = ai[k+1] - ai[k];
552     vj = aj + ai[k];
553     xp = x + (*vj)*6;
554     while (nz--) {
555       /* x(:) += U(k,:)^T*(Dk*xk) */
556       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
557       xp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
558       xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
559       xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
560       xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
561       xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
562       vj++; xp = x + (*vj)*6;
563       v += 36;
564     }
565     /* xk = inv(Dk)*(Dk*xk) */
566     d  = aa+k*36;          /* ptr to inv(Dk) */
567     xp = x + k*6;
568     xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
569     xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
570     xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
571     xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
572     xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
573     xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
574   }
575 
576   /* solve U*x = y by back substitution */
577   for (k=mbs-1; k>=0; k--){
578     v  = aa + 36*ai[k];
579     xp = x + k*6;
580     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
581     nz = ai[k+1] - ai[k];
582     vj = aj + ai[k];
583     xp = x + (*vj)*6;
584     while (nz--) {
585       /* xk += U(k,:)*x(:) */
586       x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
587       x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
588       x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
589       x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
590       x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
591       x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
592       vj++;
593       v += 36; xp = x + (*vj)*6;
594     }
595     xp = x + k*6;
596     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
597   }
598 
599   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
600   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
601   PetscLogFlops(36*(2*a->nz + mbs));
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatSolve_SeqSBAIJ_5"
607 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
608 {
609   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
610   IS              isrow=a->row;
611   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
612   PetscErrorCode ierr;
613   int             nz,*vj,k,*r,idx;
614   MatScalar       *aa=a->a,*v,*diag;
615   PetscScalar     *x,*b,x0,x1,x2,x3,x4,*t,*tp;
616 
617   PetscFunctionBegin;
618   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
619   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
620   t  = a->solve_work;
621   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
622 
623   /* solve U^T * D * y = b by forward substitution */
624   tp = t;
625   for (k=0; k<mbs; k++) { /* t <- perm(b) */
626     idx   = 5*r[k];
627     tp[0] = b[idx];
628     tp[1] = b[idx+1];
629     tp[2] = b[idx+2];
630     tp[3] = b[idx+3];
631     tp[4] = b[idx+4];
632     tp += 5;
633   }
634 
635   for (k=0; k<mbs; k++){
636     v  = aa + 25*ai[k];
637     vj = aj + ai[k];
638     tp = t + k*5;
639     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
640     nz = ai[k+1] - ai[k];
641 
642     tp = t + (*vj)*5;
643     while (nz--) {
644       tp[0] +=  v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
645       tp[1] +=  v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
646       tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
647       tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
648       tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
649       vj++; tp = t + (*vj)*5;
650       v += 25;
651     }
652 
653     /* xk = inv(Dk)*(Dk*xk) */
654     diag  = aa+k*25;          /* ptr to inv(Dk) */
655     tp    = t + k*5;
656       tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
657       tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
658       tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
659       tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
660       tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
661   }
662 
663   /* solve U*x = y by back substitution */
664   for (k=mbs-1; k>=0; k--){
665     v  = aa + 25*ai[k];
666     vj = aj + ai[k];
667     tp    = t + k*5;
668     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
669     nz = ai[k+1] - ai[k];
670 
671     tp = t + (*vj)*5;
672     while (nz--) {
673       /* xk += U(k,:)*x(:) */
674       x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
675       x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
676       x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
677       x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
678       x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
679       vj++; tp = t + (*vj)*5;
680       v += 25;
681     }
682     tp    = t + k*5;
683     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
684     idx   = 5*r[k];
685     x[idx]     = x0;
686     x[idx+1]   = x1;
687     x[idx+2]   = x2;
688     x[idx+3]   = x3;
689     x[idx+4]   = x4;
690   }
691 
692   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
693   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
694   PetscLogFlops(25*(2*a->nz + mbs));
695   PetscFunctionReturn(0);
696 }
697 
698 #undef __FUNCT__
699 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering"
700 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
701 {
702   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
703   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
704   MatScalar       *aa=a->a,*v,*diag;
705   PetscScalar     *x,*xp,*b,x0,x1,x2,x3,x4;
706   PetscErrorCode ierr;
707   int             nz,*vj,k;
708 
709   PetscFunctionBegin;
710 
711   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
712   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
713 
714   /* solve U^T * D * y = b by forward substitution */
715   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
716   for (k=0; k<mbs; k++){
717     v  = aa + 25*ai[k];
718     xp = x + k*5;
719     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
720     nz = ai[k+1] - ai[k];
721     vj = aj + ai[k];
722     xp = x + (*vj)*5;
723     while (nz--) {
724       /* x(:) += U(k,:)^T*(Dk*xk) */
725       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4;
726       xp[1] +=  v[5]*x0 +  v[6]*x1 +  v[7]*x2 + v[8]*x3 + v[9]*x4;
727       xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
728       xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
729       xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
730       vj++; xp = x + (*vj)*5;
731       v += 25;
732     }
733     /* xk = inv(Dk)*(Dk*xk) */
734     diag = aa+k*25;          /* ptr to inv(Dk) */
735     xp   = x + k*5;
736     xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
737     xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
738     xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
739     xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
740     xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
741   }
742 
743   /* solve U*x = y by back substitution */
744   for (k=mbs-1; k>=0; k--){
745     v  = aa + 25*ai[k];
746     xp = x + k*5;
747     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
748     nz = ai[k+1] - ai[k];
749     vj = aj + ai[k];
750     xp = x + (*vj)*5;
751     while (nz--) {
752       /* xk += U(k,:)*x(:) */
753       x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
754       x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
755       x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
756       x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
757       x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
758       vj++;
759       v += 25; xp = x + (*vj)*5;
760     }
761     xp = x + k*5;
762     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
763   }
764 
765   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
766   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
767   PetscLogFlops(25*(2*a->nz + mbs));
768   PetscFunctionReturn(0);
769 }
770 
771 #undef __FUNCT__
772 #define __FUNCT__ "MatSolve_SeqSBAIJ_4"
773 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
774 {
775   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
776   IS              isrow=a->row;
777   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
778   PetscErrorCode ierr;
779   int             nz,*vj,k,*r,idx;
780   MatScalar       *aa=a->a,*v,*diag;
781   PetscScalar     *x,*b,x0,x1,x2,x3,*t,*tp;
782 
783   PetscFunctionBegin;
784   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
785   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
786   t  = a->solve_work;
787   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
788 
789   /* solve U^T * D * y = b by forward substitution */
790   tp = t;
791   for (k=0; k<mbs; k++) { /* t <- perm(b) */
792     idx   = 4*r[k];
793     tp[0] = b[idx];
794     tp[1] = b[idx+1];
795     tp[2] = b[idx+2];
796     tp[3] = b[idx+3];
797     tp += 4;
798   }
799 
800   for (k=0; k<mbs; k++){
801     v  = aa + 16*ai[k];
802     vj = aj + ai[k];
803     tp = t + k*4;
804     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
805     nz = ai[k+1] - ai[k];
806 
807     tp = t + (*vj)*4;
808     while (nz--) {
809       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
810       tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
811       tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
812       tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
813       vj++; tp = t + (*vj)*4;
814       v += 16;
815     }
816 
817     /* xk = inv(Dk)*(Dk*xk) */
818     diag  = aa+k*16;          /* ptr to inv(Dk) */
819     tp    = t + k*4;
820     tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
821     tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
822     tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
823     tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
824   }
825 
826   /* solve U*x = y by back substitution */
827   for (k=mbs-1; k>=0; k--){
828     v  = aa + 16*ai[k];
829     vj = aj + ai[k];
830     tp    = t + k*4;
831     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
832     nz = ai[k+1] - ai[k];
833 
834     tp = t + (*vj)*4;
835     while (nz--) {
836       /* xk += U(k,:)*x(:) */
837       x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
838       x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
839       x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
840       x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
841       vj++; tp = t + (*vj)*4;
842       v += 16;
843     }
844     tp    = t + k*4;
845     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
846     idx        = 4*r[k];
847     x[idx]     = x0;
848     x[idx+1]   = x1;
849     x[idx+2]   = x2;
850     x[idx+3]   = x3;
851   }
852 
853   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
854   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
855   PetscLogFlops(16*(2*a->nz + mbs));
856   PetscFunctionReturn(0);
857 }
858 
859 /*
860    Special case where the matrix was factored in the natural ordering.
861    This eliminates the need for the column and row permutation.
862 */
863 #undef __FUNCT__
864 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering"
865 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
866 {
867   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
868   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
869   MatScalar       *aa=a->a,*v,*diag;
870   PetscScalar     *x,*xp,*b,x0,x1,x2,x3;
871   PetscErrorCode ierr;
872   int             nz,*vj,k;
873 
874   PetscFunctionBegin;
875 
876   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
877   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
878 
879   /* solve U^T * D * y = b by forward substitution */
880   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
881   for (k=0; k<mbs; k++){
882     v  = aa + 16*ai[k];
883     xp = x + k*4;
884     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
885     nz = ai[k+1] - ai[k];
886     vj = aj + ai[k];
887     xp = x + (*vj)*4;
888     while (nz--) {
889       /* x(:) += U(k,:)^T*(Dk*xk) */
890       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
891       xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
892       xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
893       xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
894       vj++; xp = x + (*vj)*4;
895       v += 16;
896     }
897     /* xk = inv(Dk)*(Dk*xk) */
898     diag = aa+k*16;          /* ptr to inv(Dk) */
899     xp   = x + k*4;
900     xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
901     xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
902     xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
903     xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
904   }
905 
906   /* solve U*x = y by back substitution */
907   for (k=mbs-1; k>=0; k--){
908     v  = aa + 16*ai[k];
909     xp = x + k*4;
910     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
911     nz = ai[k+1] - ai[k];
912     vj = aj + ai[k];
913     xp = x + (*vj)*4;
914     while (nz--) {
915       /* xk += U(k,:)*x(:) */
916       x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
917       x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
918       x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
919       x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
920       vj++;
921       v += 16; xp = x + (*vj)*4;
922     }
923     xp = x + k*4;
924     xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
925   }
926 
927   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
928   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
929   PetscLogFlops(16*(2*a->nz + mbs));
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatSolve_SeqSBAIJ_3"
935 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
936 {
937   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
938   IS              isrow=a->row;
939   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
940   PetscErrorCode ierr;
941   int             nz,*vj,k,*r,idx;
942   MatScalar       *aa=a->a,*v,*diag;
943   PetscScalar     *x,*b,x0,x1,x2,*t,*tp;
944 
945   PetscFunctionBegin;
946   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
947   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
948   t  = a->solve_work;
949   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
950 
951   /* solve U^T * D * y = b by forward substitution */
952   tp = t;
953   for (k=0; k<mbs; k++) { /* t <- perm(b) */
954     idx   = 3*r[k];
955     tp[0] = b[idx];
956     tp[1] = b[idx+1];
957     tp[2] = b[idx+2];
958     tp += 3;
959   }
960 
961   for (k=0; k<mbs; k++){
962     v  = aa + 9*ai[k];
963     vj = aj + ai[k];
964     tp = t + k*3;
965     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
966     nz = ai[k+1] - ai[k];
967 
968     tp = t + (*vj)*3;
969     while (nz--) {
970       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
971       tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
972       tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
973       vj++; tp = t + (*vj)*3;
974       v += 9;
975     }
976 
977     /* xk = inv(Dk)*(Dk*xk) */
978     diag  = aa+k*9;          /* ptr to inv(Dk) */
979     tp    = t + k*3;
980     tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
981     tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
982     tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
983   }
984 
985   /* solve U*x = y by back substitution */
986   for (k=mbs-1; k>=0; k--){
987     v  = aa + 9*ai[k];
988     vj = aj + ai[k];
989     tp    = t + k*3;
990     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];  /* xk */
991     nz = ai[k+1] - ai[k];
992 
993     tp = t + (*vj)*3;
994     while (nz--) {
995       /* xk += U(k,:)*x(:) */
996       x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
997       x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
998       x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
999       vj++; tp = t + (*vj)*3;
1000       v += 9;
1001     }
1002     tp    = t + k*3;
1003     tp[0] = x0; tp[1] = x1; tp[2] = x2;
1004     idx      = 3*r[k];
1005     x[idx]   = x0;
1006     x[idx+1] = x1;
1007     x[idx+2] = x2;
1008   }
1009 
1010   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1011   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1012   PetscLogFlops(9*(2*a->nz + mbs));
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 /*
1017    Special case where the matrix was factored in the natural ordering.
1018    This eliminates the need for the column and row permutation.
1019 */
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering"
1022 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1023 {
1024   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
1025   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
1026   MatScalar       *aa=a->a,*v,*diag;
1027   PetscScalar     *x,*xp,*b,x0,x1,x2;
1028   PetscErrorCode ierr;
1029   int             nz,*vj,k;
1030 
1031   PetscFunctionBegin;
1032 
1033   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1034   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1035 
1036   /* solve U^T * D * y = b by forward substitution */
1037   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1038   for (k=0; k<mbs; k++){
1039     v  = aa + 9*ai[k];
1040     xp = x + k*3;
1041     x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
1042     nz = ai[k+1] - ai[k];
1043     vj = aj + ai[k];
1044     xp = x + (*vj)*3;
1045     while (nz--) {
1046       /* x(:) += U(k,:)^T*(Dk*xk) */
1047       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1048       xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1049       xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1050       vj++; xp = x + (*vj)*3;
1051       v += 9;
1052     }
1053     /* xk = inv(Dk)*(Dk*xk) */
1054     diag = aa+k*9;          /* ptr to inv(Dk) */
1055     xp   = x + k*3;
1056     xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1057     xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1058     xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1059   }
1060 
1061   /* solve U*x = y by back substitution */
1062   for (k=mbs-1; k>=0; k--){
1063     v  = aa + 9*ai[k];
1064     xp = x + k*3;
1065     x0 = xp[0]; x1 = xp[1]; x2 = xp[2];  /* xk */
1066     nz = ai[k+1] - ai[k];
1067     vj = aj + ai[k];
1068     xp = x + (*vj)*3;
1069     while (nz--) {
1070       /* xk += U(k,:)*x(:) */
1071       x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
1072       x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
1073       x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
1074       vj++;
1075       v += 9; xp = x + (*vj)*3;
1076     }
1077     xp = x + k*3;
1078     xp[0] = x0; xp[1] = x1; xp[2] = x2;
1079   }
1080 
1081   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1082   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1083   PetscLogFlops(9*(2*a->nz + mbs));
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatSolve_SeqSBAIJ_2"
1089 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
1090 {
1091   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ *)A->data;
1092   IS              isrow=a->row;
1093   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
1094   PetscErrorCode ierr;
1095   int             nz,*vj,k,k2,*r,idx;
1096   MatScalar       *aa=a->a,*v,*diag;
1097   PetscScalar     *x,*b,x0,x1,*t;
1098 
1099   PetscFunctionBegin;
1100   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1101   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1102   t  = a->solve_work;
1103   /* printf("called MatSolve_SeqSBAIJ_2\n"); */
1104   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1105 
1106   /* solve U^T * D * y = perm(b) by forward substitution */
1107   for (k=0; k<mbs; k++) {  /* t <- perm(b) */
1108     idx = 2*r[k];
1109     t[k*2]   = b[idx];
1110     t[k*2+1] = b[idx+1];
1111   }
1112   for (k=0; k<mbs; k++){
1113     v  = aa + 4*ai[k];
1114     vj = aj + ai[k];
1115     k2 = k*2;
1116     x0 = t[k2]; x1 = t[k2+1];
1117     nz = ai[k+1] - ai[k];
1118     while (nz--) {
1119       t[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1120       t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1121       vj++; v += 4;
1122     }
1123     diag = aa+k*4;  /* ptr to inv(Dk) */
1124     t[k2]   = diag[0]*x0 + diag[2]*x1;
1125     t[k2+1] = diag[1]*x0 + diag[3]*x1;
1126   }
1127 
1128   /* solve U*x = y by back substitution */
1129   for (k=mbs-1; k>=0; k--){
1130     v  = aa + 4*ai[k];
1131     vj = aj + ai[k];
1132     k2 = k*2;
1133     x0 = t[k2]; x1 = t[k2+1];
1134     nz = ai[k+1] - ai[k];
1135     while (nz--) {
1136       x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1137       x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1138       vj++; v += 4;
1139     }
1140     t[k2]    = x0;
1141     t[k2+1]  = x1;
1142     idx      = 2*r[k];
1143     x[idx]   = x0;
1144     x[idx+1] = x1;
1145   }
1146 
1147   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1148   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1149   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1150   PetscLogFlops(4*(2*a->nz + mbs));
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 /*
1155    Special case where the matrix was factored in the natural ordering.
1156    This eliminates the need for the column and row permutation.
1157 */
1158 #undef __FUNCT__
1159 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering"
1160 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1161 {
1162   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
1163   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
1164   MatScalar       *aa=a->a,*v,*diag;
1165   PetscScalar     *x,*b,x0,x1;
1166   PetscErrorCode ierr;
1167   int             nz,*vj,k,k2;
1168 
1169   PetscFunctionBegin;
1170 
1171   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1172   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1173 
1174   /* solve U^T * D * y = b by forward substitution */
1175   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1176   for (k=0; k<mbs; k++){
1177     v  = aa + 4*ai[k];
1178     vj = aj + ai[k];
1179     k2 = k*2;
1180     x0 = x[k2]; x1 = x[k2+1];  /* Dk*xk = k-th block of x */
1181     nz = ai[k+1] - ai[k];
1182 
1183     while (nz--) {
1184       /* x(:) += U(k,:)^T*(Dk*xk) */
1185       x[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1186       x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1187       vj++; v += 4;
1188     }
1189     /* xk = inv(Dk)*(Dk*xk) */
1190     diag = aa+k*4;          /* ptr to inv(Dk) */
1191     x[k2]   = diag[0]*x0 + diag[2]*x1;
1192     x[k2+1] = diag[1]*x0 + diag[3]*x1;
1193   }
1194 
1195   /* solve U*x = y by back substitution */
1196   for (k=mbs-1; k>=0; k--){
1197     v  = aa + 4*ai[k];
1198     vj = aj + ai[k];
1199     k2 = k*2;
1200     x0 = x[k2]; x1 = x[k2+1];  /* xk */
1201     nz = ai[k+1] - ai[k];
1202     while (nz--) {
1203       /* xk += U(k,:)*x(:) */
1204       x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1205       x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1206       vj++; v += 4;
1207     }
1208     x[k2]     = x0;
1209     x[k2+1]   = x1;
1210   }
1211 
1212   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1214   PetscLogFlops(4*(2*a->nz + mbs)); /* bs2*(2*a->nz + mbs) */
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatSolve_SeqSBAIJ_1"
1220 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1221 {
1222   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ *)A->data;
1223   IS              isrow=a->row;
1224   PetscErrorCode ierr;
1225   int             mbs=a->mbs,*ai=a->i,*aj=a->j,*rip;
1226   MatScalar       *aa=a->a,*v;
1227   PetscScalar     *x,*b,xk,*t;
1228   int             nz,*vj,k;
1229 
1230   PetscFunctionBegin;
1231   if (!mbs) PetscFunctionReturn(0);
1232 
1233   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1234   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1235   t    = a->solve_work;
1236 
1237   ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1238 
1239   /* solve U^T*D*y = perm(b) by forward substitution */
1240   for (k=0; k<mbs; k++) t[k] = b[rip[k]];
1241   for (k=0; k<mbs; k++){
1242     v  = aa + ai[k];
1243     vj = aj + ai[k];
1244     xk = t[k];
1245     nz = ai[k+1] - ai[k];
1246     while (nz--) t[*vj++] += (*v++) * xk;
1247     t[k] = xk*aa[k];  /* note: aa[k] = 1/D(k) */
1248   }
1249 
1250   /* solve U*x = y by back substitution */
1251   for (k=mbs-1; k>=0; k--){
1252     v  = aa + ai[k];
1253     vj = aj + ai[k];
1254     xk = t[k];
1255     nz = ai[k+1] - ai[k];
1256     while (nz--) xk += (*v++) * t[*vj++];
1257     t[k]      = xk;
1258     x[rip[k]] = xk;
1259   }
1260 
1261   ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1262   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1263   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1264   PetscLogFlops(4*a->nz + A->m);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatSolves_SeqSBAIJ_1"
1270 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1271 {
1272   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276   if (a->bs == 1) {
1277     ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr);
1278   } else {
1279     IS              isrow=a->row;
1280     int             mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i;
1281     MatScalar       *aa=a->a,*v;
1282     PetscScalar     *x,*b,*t;
1283     int             nz,*vj,k,n;
1284     if (bb->n > a->solves_work_n) {
1285       if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
1286       ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr);
1287       a->solves_work_n = bb->n;
1288     }
1289     n    = bb->n;
1290     ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr);
1291     ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr);
1292     t    = a->solves_work;
1293 
1294     ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1295 
1296     /* solve U^T*D*y = perm(b) by forward substitution */
1297     for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */
1298     for (k=0; k<mbs; k++){
1299       v  = aa + ai[k];
1300       vj = aj + ai[k];
1301       nz = ai[k+1] - ai[k];
1302       while (nz--) {
1303         for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1304         v++;vj++;
1305       }
1306       for (i=0; i<n; i++) t[n*k+i] *= aa[k];  /* note: aa[k] = 1/D(k) */
1307     }
1308 
1309     /* solve U*x = y by back substitution */
1310     for (k=mbs-1; k>=0; k--){
1311       v  = aa + ai[k];
1312       vj = aj + ai[k];
1313       nz = ai[k+1] - ai[k];
1314       while (nz--) {
1315         for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1316         v++;vj++;
1317       }
1318       for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i];
1319     }
1320 
1321     ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1322     ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr);
1323     ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr);
1324     PetscLogFlops(bb->n*(4*a->nz + A->m));
1325   }
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 /*
1330       Special case where the matrix was ILU(0) factored in the natural
1331    ordering. This eliminates the need for the column and row permutation.
1332 */
1333 #undef __FUNCT__
1334 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering"
1335 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1336 {
1337   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ *)A->data;
1338   PetscErrorCode ierr;
1339   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
1340   MatScalar       *aa=a->a,*v;
1341   PetscScalar     *x,*b,xk;
1342   int             nz,*vj,k;
1343 
1344   PetscFunctionBegin;
1345   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1346   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1347 
1348   /* solve U^T*D*y = b by forward substitution */
1349   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1350   for (k=0; k<mbs; k++){
1351     v  = aa + ai[k] + 1;
1352     vj = aj + ai[k] + 1;
1353     xk = x[k];
1354     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1355     while (nz--) x[*vj++] += (*v++) * xk;
1356     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1357   }
1358 
1359   /* solve U*x = y by back substitution */
1360   for (k=mbs-2; k>=0; k--){
1361     v  = aa + ai[k] + 1;
1362     vj = aj + ai[k] + 1;
1363     xk = x[k];
1364     nz = ai[k+1] - ai[k] - 1;
1365     while (nz--) xk += (*v++) * x[*vj++];
1366     x[k] = xk;
1367   }
1368 
1369   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1370   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1371   PetscLogFlops(4*a->nz + A->m);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ"
1378 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B)
1379 {
1380   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
1381   PetscErrorCode ierr;
1382   int         *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j;
1383   int         *jutmp,bs = a->bs,bs2=a->bs2;
1384   int         m,realloc = 0,*levtmp;
1385   int         *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd,*jl;
1386   int         incrlev,*lev,shift,prow,nz;
1387   int         *il,ili,nextprow;
1388   PetscReal   f = info->fill,levels = info->levels;
1389   PetscTruth  perm_identity;
1390 
1391   PetscFunctionBegin;
1392   /* check whether perm is the identity mapping */
1393   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1394 
1395   /* special case that simply copies fill pattern */
1396   if (!levels && perm_identity && bs==1) {
1397     ierr = MatDuplicate_SeqSBAIJ(A,MAT_DO_NOT_COPY_VALUES,B);CHKERRQ(ierr);
1398     (*B)->factor    = FACTOR_CHOLESKY;
1399     b               = (Mat_SeqSBAIJ*)(*B)->data;
1400     b->row          = perm;
1401     b->icol         = perm;
1402     b->factor_damping   = info->damping;
1403     b->factor_shift     = info->shift;
1404     b->factor_zeropivot = info->zeropivot;
1405     ierr         = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1406     ierr         = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1407     ierr         = PetscMalloc(((*B)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1408     (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1409     (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1410     PetscFunctionReturn(0);
1411   }
1412 
1413   /* --- inplace icc(levels), levels>0, ie, *B has same data structure as A --- */
1414   if (levels > 0 && perm_identity && bs==1 ){
1415     if (!perm_identity) a->permute = PETSC_TRUE;
1416 
1417   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1418 
1419   if (perm_identity){ /* without permutation */
1420     ai = a->i; aj = a->j;
1421   } else {            /* non-trivial permutation */
1422     ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr);
1423     ai = a->inew; aj = a->jnew;
1424   }
1425 
1426   /* initialization */
1427   ierr  = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr);
1428   umax  = (int)(f*ai[mbs] + 1);
1429   ierr  = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr);
1430   ierr  = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr);
1431   iu[0] = 0;
1432   juidx = 0; /* index for ju */
1433   ierr  = PetscMalloc((4*mbs+1)*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */
1434   q      = jl + mbs;   /* linked list for col index of active row */
1435   levtmp = q + mbs;
1436   il     = levtmp + mbs;
1437   for (i=0; i<mbs; i++){
1438     jl[i] = mbs;
1439     q[i]  = 0;
1440     il[i] = 0;
1441   }
1442 
1443   /* for each row k */
1444   for (k=0; k<mbs; k++){
1445     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
1446     q[k] = mbs;
1447     /* initialize nonzero structure of k-th row to row rip[k] of A */
1448     jmin = ai[rip[k]] +1; /* exclude diag[k] */
1449     jmax = ai[rip[k]+1];
1450     for (j=jmin; j<jmax; j++){
1451       vj = rip[aj[j]]; /* col. value */
1452       if(vj > k){
1453         qm = k;
1454         do {
1455           m  = qm; qm = q[m];
1456         } while(qm < vj);
1457         if (qm == vj) {
1458           SETERRQ(1," error: duplicate entry in A\n");
1459         }
1460         nzk++;
1461         q[m]  = vj;
1462         q[vj] = qm;
1463         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1464       } /* if(vj > k) */
1465     } /* for (j=jmin; j<jmax; j++) */
1466 
1467     /* modify nonzero structure of k-th row by computing fill-in
1468        for each row i to be merged in */
1469     prow = k;
1470     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
1471 
1472     while (prow < k){
1473       nextprow = jl[prow];
1474       /* merge row prow into k-th row */
1475       ili  = il[prow];
1476       jmin = ili + 1;  /* points to 2nd nzero entry in U(prow,k:mbs-1) */
1477       jmax = iu[prow+1];
1478       qm   = k;
1479       for (j=jmin; j<jmax; j++){
1480         vj = ju[j];
1481         incrlev = lev[j] + 1;
1482         if (incrlev > levels) continue;
1483         do {
1484           m = qm; qm = q[m];
1485         } while (qm < vj);
1486         if (qm != vj){  /* a fill */
1487           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1488           levtmp[vj]  = incrlev;
1489         } else {
1490           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1491         }
1492       }
1493       if (jmin < jmax){ /* update il and jl */
1494         il[prow] = jmin;
1495         j = ju[jmin];
1496         jl[prow] = jl[j]; jl[j] = prow;
1497       }
1498       prow = nextprow;
1499     }
1500 
1501     /* add the first nonzero element in U(k, k+1:mbs-1) to jl */
1502     if (nzk > 0){
1503       i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1504       jl[k] = jl[i]; jl[i] = k;
1505       il[k] = iu[k] + 1;
1506     }
1507     iu[k+1] = iu[k] + nzk + 1;  /* include diag[k] */
1508 
1509     /* allocate more space to ju if needed */
1510     if (iu[k+1] > umax) {
1511       /* estimate how much additional space we will need */
1512       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1513       /* just double the memory each time */
1514       maxadd = umax;
1515       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1516       umax += maxadd;
1517 
1518       /* allocate a longer ju */
1519       ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1520       ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr);
1521       ierr = PetscFree(ju);CHKERRQ(ierr);
1522       ju   = jutmp;
1523 
1524       ierr     = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1525       ierr     = PetscMemcpy(jutmp,lev,(iu[k])*sizeof(int));CHKERRQ(ierr);
1526       ierr     = PetscFree(lev);CHKERRQ(ierr);
1527       lev      = jutmp;
1528       realloc += 2; /* count how many times we realloc */
1529     }
1530 
1531     /* save nonzero structure of k-th row in ju */
1532     ju[juidx]  = k; /* diag[k] */
1533     lev[juidx] = 0;
1534     juidx++;
1535     i = k;
1536     while (nzk --) {
1537       i           = q[i];
1538       ju[juidx] = i;
1539       lev[juidx] = levtmp[i];
1540       juidx++;
1541     }
1542   } /* end of for (k=0; k<mbs; k++) */
1543 
1544   if (ai[mbs] != 0) {
1545     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1546     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1547     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1548     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
1549     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
1550   } else {
1551      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
1552   }
1553 
1554   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1555   ierr = PetscFree(jl);CHKERRQ(ierr);
1556   ierr = PetscFree(lev);CHKERRQ(ierr);
1557 
1558   /* put together the new matrix */
1559   ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr);
1560   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
1561   ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
1562 
1563   /* PetscLogObjectParent(*B,iperm); */
1564   b = (Mat_SeqSBAIJ*)(*B)->data;
1565   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1566   b->singlemalloc = PETSC_FALSE;
1567   /* the next line frees the default space generated by the Create() */
1568   ierr = PetscFree(b->a);CHKERRQ(ierr);
1569   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1570   ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1571   b->j    = ju;
1572   b->i    = iu;
1573   b->diag = 0;
1574   b->ilen = 0;
1575   b->imax = 0;
1576   b->row  = perm;
1577   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1578   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1579   b->icol = perm;
1580   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1581   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1582   /* In b structure:  Free imax, ilen, old a, old j.
1583      Allocate idnew, solve_work, new a, new j */
1584   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
1585   b->maxnz          = b->nz = iu[mbs];
1586   b->factor_damping   = info->damping;
1587   b->factor_shift     = info->shift;
1588   b->factor_zeropivot = info->zeropivot;
1589 
1590   (*B)->factor                 = FACTOR_CHOLESKY;
1591   (*B)->info.factor_mallocs    = realloc;
1592   (*B)->info.fill_ratio_given  = f;
1593   if (ai[mbs] != 0) {
1594     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1595   } else {
1596     (*B)->info.fill_ratio_needed = 0.0;
1597   }
1598 
1599 
1600   (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1601   (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1602   PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
1603 
1604   PetscFunctionReturn(0);
1605   } /* end of if (levels > 0 && perm_identity && bs==1 ) */
1606 
1607   if (!perm_identity) a->permute = PETSC_TRUE;
1608   if (perm_identity){
1609     ai = a->i; aj = a->j;
1610   } else { /*  non-trivial permutation */
1611     ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
1612     ai = a->inew; aj = a->jnew;
1613   }
1614 
1615   /* initialization */
1616   ierr  = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1617   umax  = (int)(f*ai[mbs] + 1);
1618   ierr  = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr);
1619   umax += mbs + 1;
1620   shift = mbs + 1;
1621   ierr  = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr);
1622   ierr  = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr);
1623   iu[0] = mbs + 1;
1624   juidx = mbs + 1;
1625   /* prowl: linked list for pivot row */
1626   ierr    = PetscMalloc((3*mbs+1)*sizeof(int),&prowl);CHKERRQ(ierr);
1627   /* q: linked list for col index */
1628   q       = prowl + mbs;
1629   levtmp  = q     + mbs;
1630 
1631   for (i=0; i<mbs; i++){
1632     prowl[i] = mbs;
1633     q[i] = 0;
1634   }
1635 
1636   /* for each row k */
1637   for (k=0; k<mbs; k++){
1638     nzk  = 0;
1639     q[k] = mbs;
1640     /* copy current row into linked list */
1641     nz = ai[rip[k]+1] - ai[rip[k]];
1642     j = ai[rip[k]];
1643     while (nz--){
1644       vj = rip[aj[j++]];
1645       if (vj > k){
1646         qm = k;
1647         do {
1648           m  = qm; qm = q[m];
1649         } while(qm < vj);
1650         if (qm == vj) {
1651           SETERRQ(1," error: duplicate entry in A\n");
1652         }
1653         nzk++;
1654         q[m]       = vj;
1655         q[vj]      = qm;
1656         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1657       }
1658     }
1659 
1660     /* modify nonzero structure of k-th row by computing fill-in
1661        for each row prow to be merged in */
1662     prow = k;
1663     prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
1664 
1665     while (prow < k){
1666       /* merge row prow into k-th row */
1667       jmin = iu[prow] + 1;
1668       jmax = iu[prow+1];
1669       qm = k;
1670       for (j=jmin; j<jmax; j++){
1671         incrlev = lev[j-shift] + 1;
1672 	if (incrlev > levels) continue;
1673 
1674         vj      = ju[j];
1675         do {
1676           m = qm; qm = q[m];
1677         } while (qm < vj);
1678         if (qm != vj){      /* a fill */
1679           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1680           levtmp[vj] = incrlev;
1681         } else {
1682           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1683         }
1684       }
1685       prow = prowl[prow]; /* next pivot row */
1686     }
1687 
1688     /* add k to row list for first nonzero element in k-th row */
1689     if (nzk > 1){
1690       i = q[k]; /* col value of first nonzero element in k_th row of U */
1691       prowl[k] = prowl[i]; prowl[i] = k;
1692     }
1693     iu[k+1] = iu[k] + nzk;
1694 
1695     /* allocate more space to ju and lev if needed */
1696     if (iu[k+1] > umax) {
1697       /* estimate how much additional space we will need */
1698       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1699       /* just double the memory each time */
1700       maxadd = umax;
1701       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1702       umax += maxadd;
1703 
1704       /* allocate a longer ju */
1705       ierr     = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1706       ierr     = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr);
1707       ierr     = PetscFree(ju);CHKERRQ(ierr);
1708       ju       = jutmp;
1709 
1710       ierr     = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1711       ierr     = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(int));CHKERRQ(ierr);
1712       ierr     = PetscFree(lev);CHKERRQ(ierr);
1713       lev      = jutmp;
1714       realloc += 2; /* count how many times we realloc */
1715     }
1716 
1717     /* save nonzero structure of k-th row in ju */
1718     i=k;
1719     while (nzk --) {
1720       i                = q[i];
1721       ju[juidx]        = i;
1722       lev[juidx-shift] = levtmp[i];
1723       juidx++;
1724     }
1725   }
1726 
1727   if (ai[mbs] != 0) {
1728     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1729     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1730     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af);
1731     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af);
1732     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n");
1733   } else {
1734     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
1735   }
1736 
1737   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1738   ierr = PetscFree(prowl);CHKERRQ(ierr);
1739   ierr = PetscFree(lev);CHKERRQ(ierr);
1740 
1741   /* put together the new matrix */
1742   ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr);
1743   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
1744   ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
1745 
1746   /* PetscLogObjectParent(*B,iperm); */
1747   b    = (Mat_SeqSBAIJ*)(*B)->data;
1748   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1749   b->singlemalloc = PETSC_FALSE;
1750   /* the next line frees the default space generated by the Create() */
1751   ierr    = PetscFree(b->a);CHKERRQ(ierr);
1752   ierr    = PetscFree(b->ilen);CHKERRQ(ierr);
1753   ierr    = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1754   b->j    = ju;
1755   b->i    = iu;
1756   b->diag = 0;
1757   b->ilen = 0;
1758   b->imax = 0;
1759 
1760   if (b->row) {
1761     ierr = ISDestroy(b->row);CHKERRQ(ierr);
1762   }
1763   if (b->icol) {
1764     ierr = ISDestroy(b->icol);CHKERRQ(ierr);
1765   }
1766   b->row  = perm;
1767   b->icol = perm;
1768   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1769   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1770   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1771   /* In b structure:  Free imax, ilen, old a, old j.
1772      Allocate idnew, solve_work, new a, new j */
1773   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
1774   b->maxnz = b->nz = iu[mbs];
1775 
1776   (*B)->factor                 = FACTOR_CHOLESKY;
1777   (*B)->info.factor_mallocs    = realloc;
1778   (*B)->info.fill_ratio_given  = f;
1779   if (ai[mbs] != 0) {
1780     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1781   } else {
1782     (*B)->info.fill_ratio_needed = 0.0;
1783   }
1784 
1785   if (perm_identity){
1786     switch (bs) {
1787       case 1:
1788         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1789         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1790         (*B)->ops->solves                = MatSolves_SeqSBAIJ_1;
1791         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n");
1792         break;
1793       case 2:
1794         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1795         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1796         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1797         break;
1798       case 3:
1799         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1800         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1801         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
1802         break;
1803       case 4:
1804         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1805         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1806         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1807         break;
1808       case 5:
1809         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1810         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1811         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1812         break;
1813       case 6:
1814         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1815         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1816         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1817         break;
1818       case 7:
1819         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1820         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1821         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1822       break;
1823       default:
1824         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
1825         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1826         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
1827       break;
1828     }
1829   }
1830 
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 
1835 
1836