1 #include <../src/mat/impls/sbaij/seq/sbaij.h>
2 #include <petsc/private/kernels/blockinvert.h>
3
4 /* Version for when blocks are 7 by 7 */
MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat C,Mat A,const MatFactorInfo * info)5 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat C, Mat A, const MatFactorInfo *info)
6 {
7 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
8 IS perm = b->row;
9 const PetscInt *ai, *aj, *perm_ptr, *bi = b->i, *bj = b->j;
10 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili, i, j, mbs = a->mbs;
11 MatScalar *ba = b->a, *aa, *ap, *dk, *uik;
12 MatScalar *u, *d, *w, *wp, u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12;
13 MatScalar u13, u14, u15, u16, u17, u18, u19, u20, u21, u22, u23, u24, u25, u26, u27;
14 MatScalar u28, u29, u30, u31, u32, u33, u34, u35, u36, u37, u38, u39, u40, u41;
15 MatScalar u42, u43, u44, u45, u46, u47, u48;
16 PetscReal shift = info->shiftamount;
17 PetscBool allowzeropivot, zeropivotdetected;
18
19 PetscFunctionBegin;
20 /* initialization */
21 allowzeropivot = PetscNot(A->erroriffailure);
22 PetscCall(PetscCalloc1(49 * mbs, &w));
23 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
24 il[0] = 0;
25 for (i = 0; i < mbs; i++) jl[i] = mbs;
26
27 PetscCall(PetscMalloc2(49, &dk, 49, &uik));
28 PetscCall(ISGetIndices(perm, &perm_ptr));
29
30 /* check permutation */
31 if (!a->permute) {
32 ai = a->i;
33 aj = a->j;
34 aa = a->a;
35 } else {
36 ai = a->inew;
37 aj = a->jnew;
38 PetscCall(PetscMalloc1(49 * ai[mbs], &aa));
39 PetscCall(PetscArraycpy(aa, a->a, 49 * ai[mbs]));
40 PetscCall(PetscMalloc1(ai[mbs], &a2anew));
41 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
42
43 for (i = 0; i < mbs; i++) {
44 jmin = ai[i];
45 jmax = ai[i + 1];
46 for (j = jmin; j < jmax; j++) {
47 while (a2anew[j] != j) {
48 k = a2anew[j];
49 a2anew[j] = a2anew[k];
50 a2anew[k] = k;
51 for (k1 = 0; k1 < 49; k1++) {
52 dk[k1] = aa[k * 49 + k1];
53 aa[k * 49 + k1] = aa[j * 49 + k1];
54 aa[j * 49 + k1] = dk[k1];
55 }
56 }
57 /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */
58 if (i > aj[j]) {
59 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
60 ap = aa + j * 49; /* ptr to the beginning of j-th block of aa */
61 for (k = 0; k < 49; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
62 for (k = 0; k < 7; k++) { /* j-th block of aa <- dk^T */
63 for (k1 = 0; k1 < 7; k1++) *ap++ = dk[k + 7 * k1];
64 }
65 }
66 }
67 }
68 PetscCall(PetscFree(a2anew));
69 }
70
71 /* for each row k */
72 for (k = 0; k < mbs; k++) {
73 /*initialize k-th row with elements nonzero in row perm(k) of A */
74 jmin = ai[perm_ptr[k]];
75 jmax = ai[perm_ptr[k] + 1];
76 if (jmin < jmax) {
77 ap = aa + jmin * 49;
78 for (j = jmin; j < jmax; j++) {
79 vj = perm_ptr[aj[j]]; /* block col. index */
80 wp = w + vj * 49;
81 for (i = 0; i < 49; i++) *wp++ = *ap++;
82 }
83 }
84
85 /* modify k-th row by adding in those rows i with U(i,k) != 0 */
86 PetscCall(PetscArraycpy(dk, w + k * 49, 49));
87 i = jl[k]; /* first row to be added to k_th row */
88
89 while (i < mbs) {
90 nexti = jl[i]; /* next row to be added to k_th row */
91
92 /* compute multiplier */
93 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
94
95 /* uik = -inv(Di)*U_bar(i,k) */
96 d = ba + i * 49;
97 u = ba + ili * 49;
98
99 u0 = u[0];
100 u1 = u[1];
101 u2 = u[2];
102 u3 = u[3];
103 u4 = u[4];
104 u5 = u[5];
105 u6 = u[6];
106 u7 = u[7];
107 u8 = u[8];
108 u9 = u[9];
109 u10 = u[10];
110 u11 = u[11];
111 u12 = u[12];
112 u13 = u[13];
113 u14 = u[14];
114 u15 = u[15];
115 u16 = u[16];
116 u17 = u[17];
117 u18 = u[18];
118 u19 = u[19];
119 u20 = u[20];
120 u21 = u[21];
121 u22 = u[22];
122 u23 = u[23];
123 u24 = u[24];
124 u25 = u[25];
125 u26 = u[26];
126 u27 = u[27];
127 u28 = u[28];
128 u29 = u[29];
129 u30 = u[30];
130 u31 = u[31];
131 u32 = u[32];
132 u33 = u[33];
133 u34 = u[34];
134 u35 = u[35];
135 u36 = u[36];
136 u37 = u[37];
137 u38 = u[38];
138 u39 = u[39];
139 u40 = u[40];
140 u41 = u[41];
141 u42 = u[42];
142 u43 = u[43];
143 u44 = u[44];
144 u45 = u[45];
145 u46 = u[46];
146 u47 = u[47];
147 u48 = u[48];
148
149 uik[0] = -(d[0] * u0 + d[7] * u1 + d[14] * u2 + d[21] * u3 + d[28] * u4 + d[35] * u5 + d[42] * u6);
150 uik[1] = -(d[1] * u0 + d[8] * u1 + d[15] * u2 + d[22] * u3 + d[29] * u4 + d[36] * u5 + d[43] * u6);
151 uik[2] = -(d[2] * u0 + d[9] * u1 + d[16] * u2 + d[23] * u3 + d[30] * u4 + d[37] * u5 + d[44] * u6);
152 uik[3] = -(d[3] * u0 + d[10] * u1 + d[17] * u2 + d[24] * u3 + d[31] * u4 + d[38] * u5 + d[45] * u6);
153 uik[4] = -(d[4] * u0 + d[11] * u1 + d[18] * u2 + d[25] * u3 + d[32] * u4 + d[39] * u5 + d[46] * u6);
154 uik[5] = -(d[5] * u0 + d[12] * u1 + d[19] * u2 + d[26] * u3 + d[33] * u4 + d[40] * u5 + d[47] * u6);
155 uik[6] = -(d[6] * u0 + d[13] * u1 + d[20] * u2 + d[27] * u3 + d[34] * u4 + d[41] * u5 + d[48] * u6);
156
157 uik[7] = -(d[0] * u7 + d[7] * u8 + d[14] * u9 + d[21] * u10 + d[28] * u11 + d[35] * u12 + d[42] * u13);
158 uik[8] = -(d[1] * u7 + d[8] * u8 + d[15] * u9 + d[22] * u10 + d[29] * u11 + d[36] * u12 + d[43] * u13);
159 uik[9] = -(d[2] * u7 + d[9] * u8 + d[16] * u9 + d[23] * u10 + d[30] * u11 + d[37] * u12 + d[44] * u13);
160 uik[10] = -(d[3] * u7 + d[10] * u8 + d[17] * u9 + d[24] * u10 + d[31] * u11 + d[38] * u12 + d[45] * u13);
161 uik[11] = -(d[4] * u7 + d[11] * u8 + d[18] * u9 + d[25] * u10 + d[32] * u11 + d[39] * u12 + d[46] * u13);
162 uik[12] = -(d[5] * u7 + d[12] * u8 + d[19] * u9 + d[26] * u10 + d[33] * u11 + d[40] * u12 + d[47] * u13);
163 uik[13] = -(d[6] * u7 + d[13] * u8 + d[20] * u9 + d[27] * u10 + d[34] * u11 + d[41] * u12 + d[48] * u13);
164
165 uik[14] = -(d[0] * u14 + d[7] * u15 + d[14] * u16 + d[21] * u17 + d[28] * u18 + d[35] * u19 + d[42] * u20);
166 uik[15] = -(d[1] * u14 + d[8] * u15 + d[15] * u16 + d[22] * u17 + d[29] * u18 + d[36] * u19 + d[43] * u20);
167 uik[16] = -(d[2] * u14 + d[9] * u15 + d[16] * u16 + d[23] * u17 + d[30] * u18 + d[37] * u19 + d[44] * u20);
168 uik[17] = -(d[3] * u14 + d[10] * u15 + d[17] * u16 + d[24] * u17 + d[31] * u18 + d[38] * u19 + d[45] * u20);
169 uik[18] = -(d[4] * u14 + d[11] * u15 + d[18] * u16 + d[25] * u17 + d[32] * u18 + d[39] * u19 + d[46] * u20);
170 uik[19] = -(d[5] * u14 + d[12] * u15 + d[19] * u16 + d[26] * u17 + d[33] * u18 + d[40] * u19 + d[47] * u20);
171 uik[20] = -(d[6] * u14 + d[13] * u15 + d[20] * u16 + d[27] * u17 + d[34] * u18 + d[41] * u19 + d[48] * u20);
172
173 uik[21] = -(d[0] * u21 + d[7] * u22 + d[14] * u23 + d[21] * u24 + d[28] * u25 + d[35] * u26 + d[42] * u27);
174 uik[22] = -(d[1] * u21 + d[8] * u22 + d[15] * u23 + d[22] * u24 + d[29] * u25 + d[36] * u26 + d[43] * u27);
175 uik[23] = -(d[2] * u21 + d[9] * u22 + d[16] * u23 + d[23] * u24 + d[30] * u25 + d[37] * u26 + d[44] * u27);
176 uik[24] = -(d[3] * u21 + d[10] * u22 + d[17] * u23 + d[24] * u24 + d[31] * u25 + d[38] * u26 + d[45] * u27);
177 uik[25] = -(d[4] * u21 + d[11] * u22 + d[18] * u23 + d[25] * u24 + d[32] * u25 + d[39] * u26 + d[46] * u27);
178 uik[26] = -(d[5] * u21 + d[12] * u22 + d[19] * u23 + d[26] * u24 + d[33] * u25 + d[40] * u26 + d[47] * u27);
179 uik[27] = -(d[6] * u21 + d[13] * u22 + d[20] * u23 + d[27] * u24 + d[34] * u25 + d[41] * u26 + d[48] * u27);
180
181 uik[28] = -(d[0] * u28 + d[7] * u29 + d[14] * u30 + d[21] * u31 + d[28] * u32 + d[35] * u33 + d[42] * u34);
182 uik[29] = -(d[1] * u28 + d[8] * u29 + d[15] * u30 + d[22] * u31 + d[29] * u32 + d[36] * u33 + d[43] * u34);
183 uik[30] = -(d[2] * u28 + d[9] * u29 + d[16] * u30 + d[23] * u31 + d[30] * u32 + d[37] * u33 + d[44] * u34);
184 uik[31] = -(d[3] * u28 + d[10] * u29 + d[17] * u30 + d[24] * u31 + d[31] * u32 + d[38] * u33 + d[45] * u34);
185 uik[32] = -(d[4] * u28 + d[11] * u29 + d[18] * u30 + d[25] * u31 + d[32] * u32 + d[39] * u33 + d[46] * u34);
186 uik[33] = -(d[5] * u28 + d[12] * u29 + d[19] * u30 + d[26] * u31 + d[33] * u32 + d[40] * u33 + d[47] * u34);
187 uik[34] = -(d[6] * u28 + d[13] * u29 + d[20] * u30 + d[27] * u31 + d[34] * u32 + d[41] * u33 + d[48] * u34);
188
189 uik[35] = -(d[0] * u35 + d[7] * u36 + d[14] * u37 + d[21] * u38 + d[28] * u39 + d[35] * u40 + d[42] * u41);
190 uik[36] = -(d[1] * u35 + d[8] * u36 + d[15] * u37 + d[22] * u38 + d[29] * u39 + d[36] * u40 + d[43] * u41);
191 uik[37] = -(d[2] * u35 + d[9] * u36 + d[16] * u37 + d[23] * u38 + d[30] * u39 + d[37] * u40 + d[44] * u41);
192 uik[38] = -(d[3] * u35 + d[10] * u36 + d[17] * u37 + d[24] * u38 + d[31] * u39 + d[38] * u40 + d[45] * u41);
193 uik[39] = -(d[4] * u35 + d[11] * u36 + d[18] * u37 + d[25] * u38 + d[32] * u39 + d[39] * u40 + d[46] * u41);
194 uik[40] = -(d[5] * u35 + d[12] * u36 + d[19] * u37 + d[26] * u38 + d[33] * u39 + d[40] * u40 + d[47] * u41);
195 uik[41] = -(d[6] * u35 + d[13] * u36 + d[20] * u37 + d[27] * u38 + d[34] * u39 + d[41] * u40 + d[48] * u41);
196
197 uik[42] = -(d[0] * u42 + d[7] * u43 + d[14] * u44 + d[21] * u45 + d[28] * u46 + d[35] * u47 + d[42] * u48);
198 uik[43] = -(d[1] * u42 + d[8] * u43 + d[15] * u44 + d[22] * u45 + d[29] * u46 + d[36] * u47 + d[43] * u48);
199 uik[44] = -(d[2] * u42 + d[9] * u43 + d[16] * u44 + d[23] * u45 + d[30] * u46 + d[37] * u47 + d[44] * u48);
200 uik[45] = -(d[3] * u42 + d[10] * u43 + d[17] * u44 + d[24] * u45 + d[31] * u46 + d[38] * u47 + d[45] * u48);
201 uik[46] = -(d[4] * u42 + d[11] * u43 + d[18] * u44 + d[25] * u45 + d[32] * u46 + d[39] * u47 + d[46] * u48);
202 uik[47] = -(d[5] * u42 + d[12] * u43 + d[19] * u44 + d[26] * u45 + d[33] * u46 + d[40] * u47 + d[47] * u48);
203 uik[48] = -(d[6] * u42 + d[13] * u43 + d[20] * u44 + d[27] * u45 + d[34] * u46 + d[41] * u47 + d[48] * u48);
204
205 /* update D(k) += -U(i,k)^T * U_bar(i,k) */
206 dk[0] += uik[0] * u0 + uik[1] * u1 + uik[2] * u2 + uik[3] * u3 + uik[4] * u4 + uik[5] * u5 + uik[6] * u6;
207 dk[1] += uik[7] * u0 + uik[8] * u1 + uik[9] * u2 + uik[10] * u3 + uik[11] * u4 + uik[12] * u5 + uik[13] * u6;
208 dk[2] += uik[14] * u0 + uik[15] * u1 + uik[16] * u2 + uik[17] * u3 + uik[18] * u4 + uik[19] * u5 + uik[20] * u6;
209 dk[3] += uik[21] * u0 + uik[22] * u1 + uik[23] * u2 + uik[24] * u3 + uik[25] * u4 + uik[26] * u5 + uik[27] * u6;
210 dk[4] += uik[28] * u0 + uik[29] * u1 + uik[30] * u2 + uik[31] * u3 + uik[32] * u4 + uik[33] * u5 + uik[34] * u6;
211 dk[5] += uik[35] * u0 + uik[36] * u1 + uik[37] * u2 + uik[38] * u3 + uik[39] * u4 + uik[40] * u5 + uik[41] * u6;
212 dk[6] += uik[42] * u0 + uik[43] * u1 + uik[44] * u2 + uik[45] * u3 + uik[46] * u4 + uik[47] * u5 + uik[48] * u6;
213
214 dk[7] += uik[0] * u7 + uik[1] * u8 + uik[2] * u9 + uik[3] * u10 + uik[4] * u11 + uik[5] * u12 + uik[6] * u13;
215 dk[8] += uik[7] * u7 + uik[8] * u8 + uik[9] * u9 + uik[10] * u10 + uik[11] * u11 + uik[12] * u12 + uik[13] * u13;
216 dk[9] += uik[14] * u7 + uik[15] * u8 + uik[16] * u9 + uik[17] * u10 + uik[18] * u11 + uik[19] * u12 + uik[20] * u13;
217 dk[10] += uik[21] * u7 + uik[22] * u8 + uik[23] * u9 + uik[24] * u10 + uik[25] * u11 + uik[26] * u12 + uik[27] * u13;
218 dk[11] += uik[28] * u7 + uik[29] * u8 + uik[30] * u9 + uik[31] * u10 + uik[32] * u11 + uik[33] * u12 + uik[34] * u13;
219 dk[12] += uik[35] * u7 + uik[36] * u8 + uik[37] * u9 + uik[38] * u10 + uik[39] * u11 + uik[40] * u12 + uik[41] * u13;
220 dk[13] += uik[42] * u7 + uik[43] * u8 + uik[44] * u9 + uik[45] * u10 + uik[46] * u11 + uik[47] * u12 + uik[48] * u13;
221
222 dk[14] += uik[0] * u14 + uik[1] * u15 + uik[2] * u16 + uik[3] * u17 + uik[4] * u18 + uik[5] * u19 + uik[6] * u20;
223 dk[15] += uik[7] * u14 + uik[8] * u15 + uik[9] * u16 + uik[10] * u17 + uik[11] * u18 + uik[12] * u19 + uik[13] * u20;
224 dk[16] += uik[14] * u14 + uik[15] * u15 + uik[16] * u16 + uik[17] * u17 + uik[18] * u18 + uik[19] * u19 + uik[20] * u20;
225 dk[17] += uik[21] * u14 + uik[22] * u15 + uik[23] * u16 + uik[24] * u17 + uik[25] * u18 + uik[26] * u19 + uik[27] * u20;
226 dk[18] += uik[28] * u14 + uik[29] * u15 + uik[30] * u16 + uik[31] * u17 + uik[32] * u18 + uik[33] * u19 + uik[34] * u20;
227 dk[19] += uik[35] * u14 + uik[36] * u15 + uik[37] * u16 + uik[38] * u17 + uik[39] * u18 + uik[40] * u19 + uik[41] * u20;
228 dk[20] += uik[42] * u14 + uik[43] * u15 + uik[44] * u16 + uik[45] * u17 + uik[46] * u18 + uik[47] * u19 + uik[48] * u20;
229
230 dk[21] += uik[0] * u21 + uik[1] * u22 + uik[2] * u23 + uik[3] * u24 + uik[4] * u25 + uik[5] * u26 + uik[6] * u27;
231 dk[22] += uik[7] * u21 + uik[8] * u22 + uik[9] * u23 + uik[10] * u24 + uik[11] * u25 + uik[12] * u26 + uik[13] * u27;
232 dk[23] += uik[14] * u21 + uik[15] * u22 + uik[16] * u23 + uik[17] * u24 + uik[18] * u25 + uik[19] * u26 + uik[20] * u27;
233 dk[24] += uik[21] * u21 + uik[22] * u22 + uik[23] * u23 + uik[24] * u24 + uik[25] * u25 + uik[26] * u26 + uik[27] * u27;
234 dk[25] += uik[28] * u21 + uik[29] * u22 + uik[30] * u23 + uik[31] * u24 + uik[32] * u25 + uik[33] * u26 + uik[34] * u27;
235 dk[26] += uik[35] * u21 + uik[36] * u22 + uik[37] * u23 + uik[38] * u24 + uik[39] * u25 + uik[40] * u26 + uik[41] * u27;
236 dk[27] += uik[42] * u21 + uik[43] * u22 + uik[44] * u23 + uik[45] * u24 + uik[46] * u25 + uik[47] * u26 + uik[48] * u27;
237
238 dk[28] += uik[0] * u28 + uik[1] * u29 + uik[2] * u30 + uik[3] * u31 + uik[4] * u32 + uik[5] * u33 + uik[6] * u34;
239 dk[29] += uik[7] * u28 + uik[8] * u29 + uik[9] * u30 + uik[10] * u31 + uik[11] * u32 + uik[12] * u33 + uik[13] * u34;
240 dk[30] += uik[14] * u28 + uik[15] * u29 + uik[16] * u30 + uik[17] * u31 + uik[18] * u32 + uik[19] * u33 + uik[20] * u34;
241 dk[31] += uik[21] * u28 + uik[22] * u29 + uik[23] * u30 + uik[24] * u31 + uik[25] * u32 + uik[26] * u33 + uik[27] * u34;
242 dk[32] += uik[28] * u28 + uik[29] * u29 + uik[30] * u30 + uik[31] * u31 + uik[32] * u32 + uik[33] * u33 + uik[34] * u34;
243 dk[33] += uik[35] * u28 + uik[36] * u29 + uik[37] * u30 + uik[38] * u31 + uik[39] * u32 + uik[40] * u33 + uik[41] * u34;
244 dk[34] += uik[42] * u28 + uik[43] * u29 + uik[44] * u30 + uik[45] * u31 + uik[46] * u32 + uik[47] * u33 + uik[48] * u34;
245
246 dk[35] += uik[0] * u35 + uik[1] * u36 + uik[2] * u37 + uik[3] * u38 + uik[4] * u39 + uik[5] * u40 + uik[6] * u41;
247 dk[36] += uik[7] * u35 + uik[8] * u36 + uik[9] * u37 + uik[10] * u38 + uik[11] * u39 + uik[12] * u40 + uik[13] * u41;
248 dk[37] += uik[14] * u35 + uik[15] * u36 + uik[16] * u37 + uik[17] * u38 + uik[18] * u39 + uik[19] * u40 + uik[20] * u41;
249 dk[38] += uik[21] * u35 + uik[22] * u36 + uik[23] * u37 + uik[24] * u38 + uik[25] * u39 + uik[26] * u40 + uik[27] * u41;
250 dk[39] += uik[28] * u35 + uik[29] * u36 + uik[30] * u37 + uik[31] * u38 + uik[32] * u39 + uik[33] * u40 + uik[34] * u41;
251 dk[40] += uik[35] * u35 + uik[36] * u36 + uik[37] * u37 + uik[38] * u38 + uik[39] * u39 + uik[40] * u40 + uik[41] * u41;
252 dk[41] += uik[42] * u35 + uik[43] * u36 + uik[44] * u37 + uik[45] * u38 + uik[46] * u39 + uik[47] * u40 + uik[48] * u41;
253
254 dk[42] += uik[0] * u42 + uik[1] * u43 + uik[2] * u44 + uik[3] * u45 + uik[4] * u46 + uik[5] * u47 + uik[6] * u48;
255 dk[43] += uik[7] * u42 + uik[8] * u43 + uik[9] * u44 + uik[10] * u45 + uik[11] * u46 + uik[12] * u47 + uik[13] * u48;
256 dk[44] += uik[14] * u42 + uik[15] * u43 + uik[16] * u44 + uik[17] * u45 + uik[18] * u46 + uik[19] * u47 + uik[20] * u48;
257 dk[45] += uik[21] * u42 + uik[22] * u43 + uik[23] * u44 + uik[24] * u45 + uik[25] * u46 + uik[26] * u47 + uik[27] * u48;
258 dk[46] += uik[28] * u42 + uik[29] * u43 + uik[30] * u44 + uik[31] * u45 + uik[32] * u46 + uik[33] * u47 + uik[34] * u48;
259 dk[47] += uik[35] * u42 + uik[36] * u43 + uik[37] * u44 + uik[38] * u45 + uik[39] * u46 + uik[40] * u47 + uik[41] * u48;
260 dk[48] += uik[42] * u42 + uik[43] * u43 + uik[44] * u44 + uik[45] * u45 + uik[46] * u46 + uik[47] * u47 + uik[48] * u48;
261
262 PetscCall(PetscLogFlops(343.0 * 4.0));
263
264 /* update -U(i,k) */
265 PetscCall(PetscArraycpy(ba + ili * 49, uik, 49));
266
267 /* add multiple of row i to k-th row ... */
268 jmin = ili + 1;
269 jmax = bi[i + 1];
270 if (jmin < jmax) {
271 for (j = jmin; j < jmax; j++) {
272 /* w += -U(i,k)^T * U_bar(i,j) */
273 wp = w + bj[j] * 49;
274 u = ba + j * 49;
275
276 u0 = u[0];
277 u1 = u[1];
278 u2 = u[2];
279 u3 = u[3];
280 u4 = u[4];
281 u5 = u[5];
282 u6 = u[6];
283 u7 = u[7];
284 u8 = u[8];
285 u9 = u[9];
286 u10 = u[10];
287 u11 = u[11];
288 u12 = u[12];
289 u13 = u[13];
290 u14 = u[14];
291 u15 = u[15];
292 u16 = u[16];
293 u17 = u[17];
294 u18 = u[18];
295 u19 = u[19];
296 u20 = u[20];
297 u21 = u[21];
298 u22 = u[22];
299 u23 = u[23];
300 u24 = u[24];
301 u25 = u[25];
302 u26 = u[26];
303 u27 = u[27];
304 u28 = u[28];
305 u29 = u[29];
306 u30 = u[30];
307 u31 = u[31];
308 u32 = u[32];
309 u33 = u[33];
310 u34 = u[34];
311 u35 = u[35];
312 u36 = u[36];
313 u37 = u[37];
314 u38 = u[38];
315 u39 = u[39];
316 u40 = u[40];
317 u41 = u[41];
318 u42 = u[42];
319 u43 = u[43];
320 u44 = u[44];
321 u45 = u[45];
322 u46 = u[46];
323 u47 = u[47];
324 u48 = u[48];
325
326 wp[0] += uik[0] * u0 + uik[1] * u1 + uik[2] * u2 + uik[3] * u3 + uik[4] * u4 + uik[5] * u5 + uik[6] * u6;
327 wp[1] += uik[7] * u0 + uik[8] * u1 + uik[9] * u2 + uik[10] * u3 + uik[11] * u4 + uik[12] * u5 + uik[13] * u6;
328 wp[2] += uik[14] * u0 + uik[15] * u1 + uik[16] * u2 + uik[17] * u3 + uik[18] * u4 + uik[19] * u5 + uik[20] * u6;
329 wp[3] += uik[21] * u0 + uik[22] * u1 + uik[23] * u2 + uik[24] * u3 + uik[25] * u4 + uik[26] * u5 + uik[27] * u6;
330 wp[4] += uik[28] * u0 + uik[29] * u1 + uik[30] * u2 + uik[31] * u3 + uik[32] * u4 + uik[33] * u5 + uik[34] * u6;
331 wp[5] += uik[35] * u0 + uik[36] * u1 + uik[37] * u2 + uik[38] * u3 + uik[39] * u4 + uik[40] * u5 + uik[41] * u6;
332 wp[6] += uik[42] * u0 + uik[43] * u1 + uik[44] * u2 + uik[45] * u3 + uik[46] * u4 + uik[47] * u5 + uik[48] * u6;
333
334 wp[7] += uik[0] * u7 + uik[1] * u8 + uik[2] * u9 + uik[3] * u10 + uik[4] * u11 + uik[5] * u12 + uik[6] * u13;
335 wp[8] += uik[7] * u7 + uik[8] * u8 + uik[9] * u9 + uik[10] * u10 + uik[11] * u11 + uik[12] * u12 + uik[13] * u13;
336 wp[9] += uik[14] * u7 + uik[15] * u8 + uik[16] * u9 + uik[17] * u10 + uik[18] * u11 + uik[19] * u12 + uik[20] * u13;
337 wp[10] += uik[21] * u7 + uik[22] * u8 + uik[23] * u9 + uik[24] * u10 + uik[25] * u11 + uik[26] * u12 + uik[27] * u13;
338 wp[11] += uik[28] * u7 + uik[29] * u8 + uik[30] * u9 + uik[31] * u10 + uik[32] * u11 + uik[33] * u12 + uik[34] * u13;
339 wp[12] += uik[35] * u7 + uik[36] * u8 + uik[37] * u9 + uik[38] * u10 + uik[39] * u11 + uik[40] * u12 + uik[41] * u13;
340 wp[13] += uik[42] * u7 + uik[43] * u8 + uik[44] * u9 + uik[45] * u10 + uik[46] * u11 + uik[47] * u12 + uik[48] * u13;
341
342 wp[14] += uik[0] * u14 + uik[1] * u15 + uik[2] * u16 + uik[3] * u17 + uik[4] * u18 + uik[5] * u19 + uik[6] * u20;
343 wp[15] += uik[7] * u14 + uik[8] * u15 + uik[9] * u16 + uik[10] * u17 + uik[11] * u18 + uik[12] * u19 + uik[13] * u20;
344 wp[16] += uik[14] * u14 + uik[15] * u15 + uik[16] * u16 + uik[17] * u17 + uik[18] * u18 + uik[19] * u19 + uik[20] * u20;
345 wp[17] += uik[21] * u14 + uik[22] * u15 + uik[23] * u16 + uik[24] * u17 + uik[25] * u18 + uik[26] * u19 + uik[27] * u20;
346 wp[18] += uik[28] * u14 + uik[29] * u15 + uik[30] * u16 + uik[31] * u17 + uik[32] * u18 + uik[33] * u19 + uik[34] * u20;
347 wp[19] += uik[35] * u14 + uik[36] * u15 + uik[37] * u16 + uik[38] * u17 + uik[39] * u18 + uik[40] * u19 + uik[41] * u20;
348 wp[20] += uik[42] * u14 + uik[43] * u15 + uik[44] * u16 + uik[45] * u17 + uik[46] * u18 + uik[47] * u19 + uik[48] * u20;
349
350 wp[21] += uik[0] * u21 + uik[1] * u22 + uik[2] * u23 + uik[3] * u24 + uik[4] * u25 + uik[5] * u26 + uik[6] * u27;
351 wp[22] += uik[7] * u21 + uik[8] * u22 + uik[9] * u23 + uik[10] * u24 + uik[11] * u25 + uik[12] * u26 + uik[13] * u27;
352 wp[23] += uik[14] * u21 + uik[15] * u22 + uik[16] * u23 + uik[17] * u24 + uik[18] * u25 + uik[19] * u26 + uik[20] * u27;
353 wp[24] += uik[21] * u21 + uik[22] * u22 + uik[23] * u23 + uik[24] * u24 + uik[25] * u25 + uik[26] * u26 + uik[27] * u27;
354 wp[25] += uik[28] * u21 + uik[29] * u22 + uik[30] * u23 + uik[31] * u24 + uik[32] * u25 + uik[33] * u26 + uik[34] * u27;
355 wp[26] += uik[35] * u21 + uik[36] * u22 + uik[37] * u23 + uik[38] * u24 + uik[39] * u25 + uik[40] * u26 + uik[41] * u27;
356 wp[27] += uik[42] * u21 + uik[43] * u22 + uik[44] * u23 + uik[45] * u24 + uik[46] * u25 + uik[47] * u26 + uik[48] * u27;
357
358 wp[28] += uik[0] * u28 + uik[1] * u29 + uik[2] * u30 + uik[3] * u31 + uik[4] * u32 + uik[5] * u33 + uik[6] * u34;
359 wp[29] += uik[7] * u28 + uik[8] * u29 + uik[9] * u30 + uik[10] * u31 + uik[11] * u32 + uik[12] * u33 + uik[13] * u34;
360 wp[30] += uik[14] * u28 + uik[15] * u29 + uik[16] * u30 + uik[17] * u31 + uik[18] * u32 + uik[19] * u33 + uik[20] * u34;
361 wp[31] += uik[21] * u28 + uik[22] * u29 + uik[23] * u30 + uik[24] * u31 + uik[25] * u32 + uik[26] * u33 + uik[27] * u34;
362 wp[32] += uik[28] * u28 + uik[29] * u29 + uik[30] * u30 + uik[31] * u31 + uik[32] * u32 + uik[33] * u33 + uik[34] * u34;
363 wp[33] += uik[35] * u28 + uik[36] * u29 + uik[37] * u30 + uik[38] * u31 + uik[39] * u32 + uik[40] * u33 + uik[41] * u34;
364 wp[34] += uik[42] * u28 + uik[43] * u29 + uik[44] * u30 + uik[45] * u31 + uik[46] * u32 + uik[47] * u33 + uik[48] * u34;
365
366 wp[35] += uik[0] * u35 + uik[1] * u36 + uik[2] * u37 + uik[3] * u38 + uik[4] * u39 + uik[5] * u40 + uik[6] * u41;
367 wp[36] += uik[7] * u35 + uik[8] * u36 + uik[9] * u37 + uik[10] * u38 + uik[11] * u39 + uik[12] * u40 + uik[13] * u41;
368 wp[37] += uik[14] * u35 + uik[15] * u36 + uik[16] * u37 + uik[17] * u38 + uik[18] * u39 + uik[19] * u40 + uik[20] * u41;
369 wp[38] += uik[21] * u35 + uik[22] * u36 + uik[23] * u37 + uik[24] * u38 + uik[25] * u39 + uik[26] * u40 + uik[27] * u41;
370 wp[39] += uik[28] * u35 + uik[29] * u36 + uik[30] * u37 + uik[31] * u38 + uik[32] * u39 + uik[33] * u40 + uik[34] * u41;
371 wp[40] += uik[35] * u35 + uik[36] * u36 + uik[37] * u37 + uik[38] * u38 + uik[39] * u39 + uik[40] * u40 + uik[41] * u41;
372 wp[41] += uik[42] * u35 + uik[43] * u36 + uik[44] * u37 + uik[45] * u38 + uik[46] * u39 + uik[47] * u40 + uik[48] * u41;
373
374 wp[42] += uik[0] * u42 + uik[1] * u43 + uik[2] * u44 + uik[3] * u45 + uik[4] * u46 + uik[5] * u47 + uik[6] * u48;
375 wp[43] += uik[7] * u42 + uik[8] * u43 + uik[9] * u44 + uik[10] * u45 + uik[11] * u46 + uik[12] * u47 + uik[13] * u48;
376 wp[44] += uik[14] * u42 + uik[15] * u43 + uik[16] * u44 + uik[17] * u45 + uik[18] * u46 + uik[19] * u47 + uik[20] * u48;
377 wp[45] += uik[21] * u42 + uik[22] * u43 + uik[23] * u44 + uik[24] * u45 + uik[25] * u46 + uik[26] * u47 + uik[27] * u48;
378 wp[46] += uik[28] * u42 + uik[29] * u43 + uik[30] * u44 + uik[31] * u45 + uik[32] * u46 + uik[33] * u47 + uik[34] * u48;
379 wp[47] += uik[35] * u42 + uik[36] * u43 + uik[37] * u44 + uik[38] * u45 + uik[39] * u46 + uik[40] * u47 + uik[41] * u48;
380 wp[48] += uik[42] * u42 + uik[43] * u43 + uik[44] * u44 + uik[45] * u45 + uik[46] * u46 + uik[47] * u47 + uik[48] * u48;
381 }
382 PetscCall(PetscLogFlops(2.0 * 343.0 * (jmax - jmin)));
383
384 /* ... add i to row list for next nonzero entry */
385 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
386 j = bj[jmin];
387 jl[i] = jl[j];
388 jl[j] = i; /* update jl */
389 }
390 i = nexti;
391 }
392
393 /* save nonzero entries in k-th row of U ... */
394
395 /* invert diagonal block */
396 d = ba + k * 49;
397 PetscCall(PetscArraycpy(d, dk, 49));
398 PetscCall(PetscKernel_A_gets_inverse_A_7(d, shift, allowzeropivot, &zeropivotdetected));
399 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
400
401 jmin = bi[k];
402 jmax = bi[k + 1];
403 if (jmin < jmax) {
404 for (j = jmin; j < jmax; j++) {
405 vj = bj[j]; /* block col. index of U */
406 u = ba + j * 49;
407 wp = w + vj * 49;
408 for (k1 = 0; k1 < 49; k1++) {
409 *u++ = *wp;
410 *wp++ = 0.0;
411 }
412 }
413
414 /* ... add k to row list for first nonzero entry in k-th row */
415 il[k] = jmin;
416 i = bj[jmin];
417 jl[k] = jl[i];
418 jl[i] = k;
419 }
420 }
421
422 PetscCall(PetscFree(w));
423 PetscCall(PetscFree2(il, jl));
424 PetscCall(PetscFree2(dk, uik));
425 if (a->permute) PetscCall(PetscFree(aa));
426
427 PetscCall(ISRestoreIndices(perm, &perm_ptr));
428
429 C->ops->solve = MatSolve_SeqSBAIJ_7_inplace;
430 C->ops->solvetranspose = MatSolve_SeqSBAIJ_7_inplace;
431 C->assembled = PETSC_TRUE;
432 C->preallocated = PETSC_TRUE;
433
434 PetscCall(PetscLogFlops(1.3333 * 343 * b->mbs)); /* from inverting diagonal blocks */
435 PetscFunctionReturn(PETSC_SUCCESS);
436 }
437