1 #include <../src/mat/impls/sbaij/seq/sbaij.h> 2 #include <petsc/private/kernels/blockinvert.h> 3 4 /* Version for when blocks are 6 by 6 */ 5 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat C, Mat A, const MatFactorInfo *info) 6 { 7 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 8 IS perm = b->row; 9 const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j; 10 PetscInt i, j, *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 11 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 12 MatScalar *u, *d, *w, *wp, u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12; 13 MatScalar u13, u14, u15, u16, u17, u18, u19, u20, u21, u22, u23, u24, u25, u26, u27; 14 MatScalar u28, u29, u30, u31, u32, u33, u34, u35; 15 PetscReal shift = info->shiftamount; 16 PetscBool allowzeropivot, zeropivotdetected; 17 18 PetscFunctionBegin; 19 /* initialization */ 20 allowzeropivot = PetscNot(A->erroriffailure); 21 PetscCall(PetscCalloc1(36 * mbs, &w)); 22 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 23 il[0] = 0; 24 for (i = 0; i < mbs; i++) jl[i] = mbs; 25 26 PetscCall(PetscMalloc2(36, &dk, 36, &uik)); 27 PetscCall(ISGetIndices(perm, &perm_ptr)); 28 29 /* check permutation */ 30 if (!a->permute) { 31 ai = a->i; 32 aj = a->j; 33 aa = a->a; 34 } else { 35 ai = a->inew; 36 aj = a->jnew; 37 PetscCall(PetscMalloc1(36 * ai[mbs], &aa)); 38 PetscCall(PetscArraycpy(aa, a->a, 36 * ai[mbs])); 39 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 40 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 41 42 for (i = 0; i < mbs; i++) { 43 jmin = ai[i]; 44 jmax = ai[i + 1]; 45 for (j = jmin; j < jmax; j++) { 46 while (a2anew[j] != j) { 47 k = a2anew[j]; 48 a2anew[j] = a2anew[k]; 49 a2anew[k] = k; 50 for (k1 = 0; k1 < 36; k1++) { 51 dk[k1] = aa[k * 36 + k1]; 52 aa[k * 36 + k1] = aa[j * 36 + k1]; 53 aa[j * 36 + k1] = dk[k1]; 54 } 55 } 56 /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */ 57 if (i > aj[j]) { 58 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 59 ap = aa + j * 36; /* ptr to the beginning of j-th block of aa */ 60 for (k = 0; k < 36; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 61 for (k = 0; k < 6; k++) { /* j-th block of aa <- dk^T */ 62 for (k1 = 0; k1 < 6; k1++) *ap++ = dk[k + 6 * k1]; 63 } 64 } 65 } 66 } 67 PetscCall(PetscFree(a2anew)); 68 } 69 70 /* for each row k */ 71 for (k = 0; k < mbs; k++) { 72 /*initialize k-th row with elements nonzero in row perm(k) of A */ 73 jmin = ai[perm_ptr[k]]; 74 jmax = ai[perm_ptr[k] + 1]; 75 if (jmin < jmax) { 76 ap = aa + jmin * 36; 77 for (j = jmin; j < jmax; j++) { 78 vj = perm_ptr[aj[j]]; /* block col. index */ 79 wp = w + vj * 36; 80 for (i = 0; i < 36; i++) *wp++ = *ap++; 81 } 82 } 83 84 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 85 PetscCall(PetscArraycpy(dk, w + k * 36, 36)); 86 i = jl[k]; /* first row to be added to k_th row */ 87 88 while (i < mbs) { 89 nexti = jl[i]; /* next row to be added to k_th row */ 90 91 /* compute multiplier */ 92 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 93 94 /* uik = -inv(Di)*U_bar(i,k) */ 95 d = ba + i * 36; 96 u = ba + ili * 36; 97 98 u0 = u[0]; 99 u1 = u[1]; 100 u2 = u[2]; 101 u3 = u[3]; 102 u4 = u[4]; 103 u5 = u[5]; 104 u6 = u[6]; 105 u7 = u[7]; 106 u8 = u[8]; 107 u9 = u[9]; 108 u10 = u[10]; 109 u11 = u[11]; 110 u12 = u[12]; 111 u13 = u[13]; 112 u14 = u[14]; 113 u15 = u[15]; 114 u16 = u[16]; 115 u17 = u[17]; 116 u18 = u[18]; 117 u19 = u[19]; 118 u20 = u[20]; 119 u21 = u[21]; 120 u22 = u[22]; 121 u23 = u[23]; 122 u24 = u[24]; 123 u25 = u[25]; 124 u26 = u[26]; 125 u27 = u[27]; 126 u28 = u[28]; 127 u29 = u[29]; 128 u30 = u[30]; 129 u31 = u[31]; 130 u32 = u[32]; 131 u33 = u[33]; 132 u34 = u[34]; 133 u35 = u[35]; 134 135 uik[0] = -(d[0] * u0 + d[6] * u1 + d[12] * u2 + d[18] * u3 + d[24] * u4 + d[30] * u5); 136 uik[1] = -(d[1] * u0 + d[7] * u1 + d[13] * u2 + d[19] * u3 + d[25] * u4 + d[31] * u5); 137 uik[2] = -(d[2] * u0 + d[8] * u1 + d[14] * u2 + d[20] * u3 + d[26] * u4 + d[32] * u5); 138 uik[3] = -(d[3] * u0 + d[9] * u1 + d[15] * u2 + d[21] * u3 + d[27] * u4 + d[33] * u5); 139 uik[4] = -(d[4] * u0 + d[10] * u1 + d[16] * u2 + d[22] * u3 + d[28] * u4 + d[34] * u5); 140 uik[5] = -(d[5] * u0 + d[11] * u1 + d[17] * u2 + d[23] * u3 + d[29] * u4 + d[35] * u5); 141 142 uik[6] = -(d[0] * u6 + d[6] * u7 + d[12] * u8 + d[18] * u9 + d[24] * u10 + d[30] * u11); 143 uik[7] = -(d[1] * u6 + d[7] * u7 + d[13] * u8 + d[19] * u9 + d[25] * u10 + d[31] * u11); 144 uik[8] = -(d[2] * u6 + d[8] * u7 + d[14] * u8 + d[20] * u9 + d[26] * u10 + d[32] * u11); 145 uik[9] = -(d[3] * u6 + d[9] * u7 + d[15] * u8 + d[21] * u9 + d[27] * u10 + d[33] * u11); 146 uik[10] = -(d[4] * u6 + d[10] * u7 + d[16] * u8 + d[22] * u9 + d[28] * u10 + d[34] * u11); 147 uik[11] = -(d[5] * u6 + d[11] * u7 + d[17] * u8 + d[23] * u9 + d[29] * u10 + d[35] * u11); 148 149 uik[12] = -(d[0] * u12 + d[6] * u13 + d[12] * u14 + d[18] * u15 + d[24] * u16 + d[30] * u17); 150 uik[13] = -(d[1] * u12 + d[7] * u13 + d[13] * u14 + d[19] * u15 + d[25] * u16 + d[31] * u17); 151 uik[14] = -(d[2] * u12 + d[8] * u13 + d[14] * u14 + d[20] * u15 + d[26] * u16 + d[32] * u17); 152 uik[15] = -(d[3] * u12 + d[9] * u13 + d[15] * u14 + d[21] * u15 + d[27] * u16 + d[33] * u17); 153 uik[16] = -(d[4] * u12 + d[10] * u13 + d[16] * u14 + d[22] * u15 + d[28] * u16 + d[34] * u17); 154 uik[17] = -(d[5] * u12 + d[11] * u13 + d[17] * u14 + d[23] * u15 + d[29] * u16 + d[35] * u17); 155 156 uik[18] = -(d[0] * u18 + d[6] * u19 + d[12] * u20 + d[18] * u21 + d[24] * u22 + d[30] * u23); 157 uik[19] = -(d[1] * u18 + d[7] * u19 + d[13] * u20 + d[19] * u21 + d[25] * u22 + d[31] * u23); 158 uik[20] = -(d[2] * u18 + d[8] * u19 + d[14] * u20 + d[20] * u21 + d[26] * u22 + d[32] * u23); 159 uik[21] = -(d[3] * u18 + d[9] * u19 + d[15] * u20 + d[21] * u21 + d[27] * u22 + d[33] * u23); 160 uik[22] = -(d[4] * u18 + d[10] * u19 + d[16] * u20 + d[22] * u21 + d[28] * u22 + d[34] * u23); 161 uik[23] = -(d[5] * u18 + d[11] * u19 + d[17] * u20 + d[23] * u21 + d[29] * u22 + d[35] * u23); 162 163 uik[24] = -(d[0] * u24 + d[6] * u25 + d[12] * u26 + d[18] * u27 + d[24] * u28 + d[30] * u29); 164 uik[25] = -(d[1] * u24 + d[7] * u25 + d[13] * u26 + d[19] * u27 + d[25] * u28 + d[31] * u29); 165 uik[26] = -(d[2] * u24 + d[8] * u25 + d[14] * u26 + d[20] * u27 + d[26] * u28 + d[32] * u29); 166 uik[27] = -(d[3] * u24 + d[9] * u25 + d[15] * u26 + d[21] * u27 + d[27] * u28 + d[33] * u29); 167 uik[28] = -(d[4] * u24 + d[10] * u25 + d[16] * u26 + d[22] * u27 + d[28] * u28 + d[34] * u29); 168 uik[29] = -(d[5] * u24 + d[11] * u25 + d[17] * u26 + d[23] * u27 + d[29] * u28 + d[35] * u29); 169 170 uik[30] = -(d[0] * u30 + d[6] * u31 + d[12] * u32 + d[18] * u33 + d[24] * u34 + d[30] * u35); 171 uik[31] = -(d[1] * u30 + d[7] * u31 + d[13] * u32 + d[19] * u33 + d[25] * u34 + d[31] * u35); 172 uik[32] = -(d[2] * u30 + d[8] * u31 + d[14] * u32 + d[20] * u33 + d[26] * u34 + d[32] * u35); 173 uik[33] = -(d[3] * u30 + d[9] * u31 + d[15] * u32 + d[21] * u33 + d[27] * u34 + d[33] * u35); 174 uik[34] = -(d[4] * u30 + d[10] * u31 + d[16] * u32 + d[22] * u33 + d[28] * u34 + d[34] * u35); 175 uik[35] = -(d[5] * u30 + d[11] * u31 + d[17] * u32 + d[23] * u33 + d[29] * u34 + d[35] * u35); 176 177 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 178 dk[0] += uik[0] * u0 + uik[1] * u1 + uik[2] * u2 + uik[3] * u3 + uik[4] * u4 + uik[5] * u5; 179 dk[1] += uik[6] * u0 + uik[7] * u1 + uik[8] * u2 + uik[9] * u3 + uik[10] * u4 + uik[11] * u5; 180 dk[2] += uik[12] * u0 + uik[13] * u1 + uik[14] * u2 + uik[15] * u3 + uik[16] * u4 + uik[17] * u5; 181 dk[3] += uik[18] * u0 + uik[19] * u1 + uik[20] * u2 + uik[21] * u3 + uik[22] * u4 + uik[23] * u5; 182 dk[4] += uik[24] * u0 + uik[25] * u1 + uik[26] * u2 + uik[27] * u3 + uik[28] * u4 + uik[29] * u5; 183 dk[5] += uik[30] * u0 + uik[31] * u1 + uik[32] * u2 + uik[33] * u3 + uik[34] * u4 + uik[35] * u5; 184 185 dk[6] += uik[0] * u6 + uik[1] * u7 + uik[2] * u8 + uik[3] * u9 + uik[4] * u10 + uik[5] * u11; 186 dk[7] += uik[6] * u6 + uik[7] * u7 + uik[8] * u8 + uik[9] * u9 + uik[10] * u10 + uik[11] * u11; 187 dk[8] += uik[12] * u6 + uik[13] * u7 + uik[14] * u8 + uik[15] * u9 + uik[16] * u10 + uik[17] * u11; 188 dk[9] += uik[18] * u6 + uik[19] * u7 + uik[20] * u8 + uik[21] * u9 + uik[22] * u10 + uik[23] * u11; 189 dk[10] += uik[24] * u6 + uik[25] * u7 + uik[26] * u8 + uik[27] * u9 + uik[28] * u10 + uik[29] * u11; 190 dk[11] += uik[30] * u6 + uik[31] * u7 + uik[32] * u8 + uik[33] * u9 + uik[34] * u10 + uik[35] * u11; 191 192 dk[12] += uik[0] * u12 + uik[1] * u13 + uik[2] * u14 + uik[3] * u15 + uik[4] * u16 + uik[5] * u17; 193 dk[13] += uik[6] * u12 + uik[7] * u13 + uik[8] * u14 + uik[9] * u15 + uik[10] * u16 + uik[11] * u17; 194 dk[14] += uik[12] * u12 + uik[13] * u13 + uik[14] * u14 + uik[15] * u15 + uik[16] * u16 + uik[17] * u17; 195 dk[15] += uik[18] * u12 + uik[19] * u13 + uik[20] * u14 + uik[21] * u15 + uik[22] * u16 + uik[23] * u17; 196 dk[16] += uik[24] * u12 + uik[25] * u13 + uik[26] * u14 + uik[27] * u15 + uik[28] * u16 + uik[29] * u17; 197 dk[17] += uik[30] * u12 + uik[31] * u13 + uik[32] * u14 + uik[33] * u15 + uik[34] * u16 + uik[35] * u17; 198 199 dk[18] += uik[0] * u18 + uik[1] * u19 + uik[2] * u20 + uik[3] * u21 + uik[4] * u22 + uik[5] * u23; 200 dk[19] += uik[6] * u18 + uik[7] * u19 + uik[8] * u20 + uik[9] * u21 + uik[10] * u22 + uik[11] * u23; 201 dk[20] += uik[12] * u18 + uik[13] * u19 + uik[14] * u20 + uik[15] * u21 + uik[16] * u22 + uik[17] * u23; 202 dk[21] += uik[18] * u18 + uik[19] * u19 + uik[20] * u20 + uik[21] * u21 + uik[22] * u22 + uik[23] * u23; 203 dk[22] += uik[24] * u18 + uik[25] * u19 + uik[26] * u20 + uik[27] * u21 + uik[28] * u22 + uik[29] * u23; 204 dk[23] += uik[30] * u18 + uik[31] * u19 + uik[32] * u20 + uik[33] * u21 + uik[34] * u22 + uik[35] * u23; 205 206 dk[24] += uik[0] * u24 + uik[1] * u25 + uik[2] * u26 + uik[3] * u27 + uik[4] * u28 + uik[5] * u29; 207 dk[25] += uik[6] * u24 + uik[7] * u25 + uik[8] * u26 + uik[9] * u27 + uik[10] * u28 + uik[11] * u29; 208 dk[26] += uik[12] * u24 + uik[13] * u25 + uik[14] * u26 + uik[15] * u27 + uik[16] * u28 + uik[17] * u29; 209 dk[27] += uik[18] * u24 + uik[19] * u25 + uik[20] * u26 + uik[21] * u27 + uik[22] * u28 + uik[23] * u29; 210 dk[28] += uik[24] * u24 + uik[25] * u25 + uik[26] * u26 + uik[27] * u27 + uik[28] * u28 + uik[29] * u29; 211 dk[29] += uik[30] * u24 + uik[31] * u25 + uik[32] * u26 + uik[33] * u27 + uik[34] * u28 + uik[35] * u29; 212 213 dk[30] += uik[0] * u30 + uik[1] * u31 + uik[2] * u32 + uik[3] * u33 + uik[4] * u34 + uik[5] * u35; 214 dk[31] += uik[6] * u30 + uik[7] * u31 + uik[8] * u32 + uik[9] * u33 + uik[10] * u34 + uik[11] * u35; 215 dk[32] += uik[12] * u30 + uik[13] * u31 + uik[14] * u32 + uik[15] * u33 + uik[16] * u34 + uik[17] * u35; 216 dk[33] += uik[18] * u30 + uik[19] * u31 + uik[20] * u32 + uik[21] * u33 + uik[22] * u34 + uik[23] * u35; 217 dk[34] += uik[24] * u30 + uik[25] * u31 + uik[26] * u32 + uik[27] * u33 + uik[28] * u34 + uik[29] * u35; 218 dk[35] += uik[30] * u30 + uik[31] * u31 + uik[32] * u32 + uik[33] * u33 + uik[34] * u34 + uik[35] * u35; 219 220 PetscCall(PetscLogFlops(216.0 * 4.0)); 221 222 /* update -U(i,k) */ 223 PetscCall(PetscArraycpy(ba + ili * 36, uik, 36)); 224 225 /* add multiple of row i to k-th row ... */ 226 jmin = ili + 1; 227 jmax = bi[i + 1]; 228 if (jmin < jmax) { 229 for (j = jmin; j < jmax; j++) { 230 /* w += -U(i,k)^T * U_bar(i,j) */ 231 wp = w + bj[j] * 36; 232 u = ba + j * 36; 233 234 u0 = u[0]; 235 u1 = u[1]; 236 u2 = u[2]; 237 u3 = u[3]; 238 u4 = u[4]; 239 u5 = u[5]; 240 u6 = u[6]; 241 u7 = u[7]; 242 u8 = u[8]; 243 u9 = u[9]; 244 u10 = u[10]; 245 u11 = u[11]; 246 u12 = u[12]; 247 u13 = u[13]; 248 u14 = u[14]; 249 u15 = u[15]; 250 u16 = u[16]; 251 u17 = u[17]; 252 u18 = u[18]; 253 u19 = u[19]; 254 u20 = u[20]; 255 u21 = u[21]; 256 u22 = u[22]; 257 u23 = u[23]; 258 u24 = u[24]; 259 u25 = u[25]; 260 u26 = u[26]; 261 u27 = u[27]; 262 u28 = u[28]; 263 u29 = u[29]; 264 u30 = u[30]; 265 u31 = u[31]; 266 u32 = u[32]; 267 u33 = u[33]; 268 u34 = u[34]; 269 u35 = u[35]; 270 271 wp[0] += uik[0] * u0 + uik[1] * u1 + uik[2] * u2 + uik[3] * u3 + uik[4] * u4 + uik[5] * u5; 272 wp[1] += uik[6] * u0 + uik[7] * u1 + uik[8] * u2 + uik[9] * u3 + uik[10] * u4 + uik[11] * u5; 273 wp[2] += uik[12] * u0 + uik[13] * u1 + uik[14] * u2 + uik[15] * u3 + uik[16] * u4 + uik[17] * u5; 274 wp[3] += uik[18] * u0 + uik[19] * u1 + uik[20] * u2 + uik[21] * u3 + uik[22] * u4 + uik[23] * u5; 275 wp[4] += uik[24] * u0 + uik[25] * u1 + uik[26] * u2 + uik[27] * u3 + uik[28] * u4 + uik[29] * u5; 276 wp[5] += uik[30] * u0 + uik[31] * u1 + uik[32] * u2 + uik[33] * u3 + uik[34] * u4 + uik[35] * u5; 277 278 wp[6] += uik[0] * u6 + uik[1] * u7 + uik[2] * u8 + uik[3] * u9 + uik[4] * u10 + uik[5] * u11; 279 wp[7] += uik[6] * u6 + uik[7] * u7 + uik[8] * u8 + uik[9] * u9 + uik[10] * u10 + uik[11] * u11; 280 wp[8] += uik[12] * u6 + uik[13] * u7 + uik[14] * u8 + uik[15] * u9 + uik[16] * u10 + uik[17] * u11; 281 wp[9] += uik[18] * u6 + uik[19] * u7 + uik[20] * u8 + uik[21] * u9 + uik[22] * u10 + uik[23] * u11; 282 wp[10] += uik[24] * u6 + uik[25] * u7 + uik[26] * u8 + uik[27] * u9 + uik[28] * u10 + uik[29] * u11; 283 wp[11] += uik[30] * u6 + uik[31] * u7 + uik[32] * u8 + uik[33] * u9 + uik[34] * u10 + uik[35] * u11; 284 285 wp[12] += uik[0] * u12 + uik[1] * u13 + uik[2] * u14 + uik[3] * u15 + uik[4] * u16 + uik[5] * u17; 286 wp[13] += uik[6] * u12 + uik[7] * u13 + uik[8] * u14 + uik[9] * u15 + uik[10] * u16 + uik[11] * u17; 287 wp[14] += uik[12] * u12 + uik[13] * u13 + uik[14] * u14 + uik[15] * u15 + uik[16] * u16 + uik[17] * u17; 288 wp[15] += uik[18] * u12 + uik[19] * u13 + uik[20] * u14 + uik[21] * u15 + uik[22] * u16 + uik[23] * u17; 289 wp[16] += uik[24] * u12 + uik[25] * u13 + uik[26] * u14 + uik[27] * u15 + uik[28] * u16 + uik[29] * u17; 290 wp[17] += uik[30] * u12 + uik[31] * u13 + uik[32] * u14 + uik[33] * u15 + uik[34] * u16 + uik[35] * u17; 291 292 wp[18] += uik[0] * u18 + uik[1] * u19 + uik[2] * u20 + uik[3] * u21 + uik[4] * u22 + uik[5] * u23; 293 wp[19] += uik[6] * u18 + uik[7] * u19 + uik[8] * u20 + uik[9] * u21 + uik[10] * u22 + uik[11] * u23; 294 wp[20] += uik[12] * u18 + uik[13] * u19 + uik[14] * u20 + uik[15] * u21 + uik[16] * u22 + uik[17] * u23; 295 wp[21] += uik[18] * u18 + uik[19] * u19 + uik[20] * u20 + uik[21] * u21 + uik[22] * u22 + uik[23] * u23; 296 wp[22] += uik[24] * u18 + uik[25] * u19 + uik[26] * u20 + uik[27] * u21 + uik[28] * u22 + uik[29] * u23; 297 wp[23] += uik[30] * u18 + uik[31] * u19 + uik[32] * u20 + uik[33] * u21 + uik[34] * u22 + uik[35] * u23; 298 299 wp[24] += uik[0] * u24 + uik[1] * u25 + uik[2] * u26 + uik[3] * u27 + uik[4] * u28 + uik[5] * u29; 300 wp[25] += uik[6] * u24 + uik[7] * u25 + uik[8] * u26 + uik[9] * u27 + uik[10] * u28 + uik[11] * u29; 301 wp[26] += uik[12] * u24 + uik[13] * u25 + uik[14] * u26 + uik[15] * u27 + uik[16] * u28 + uik[17] * u29; 302 wp[27] += uik[18] * u24 + uik[19] * u25 + uik[20] * u26 + uik[21] * u27 + uik[22] * u28 + uik[23] * u29; 303 wp[28] += uik[24] * u24 + uik[25] * u25 + uik[26] * u26 + uik[27] * u27 + uik[28] * u28 + uik[29] * u29; 304 wp[29] += uik[30] * u24 + uik[31] * u25 + uik[32] * u26 + uik[33] * u27 + uik[34] * u28 + uik[35] * u29; 305 306 wp[30] += uik[0] * u30 + uik[1] * u31 + uik[2] * u32 + uik[3] * u33 + uik[4] * u34 + uik[5] * u35; 307 wp[31] += uik[6] * u30 + uik[7] * u31 + uik[8] * u32 + uik[9] * u33 + uik[10] * u34 + uik[11] * u35; 308 wp[32] += uik[12] * u30 + uik[13] * u31 + uik[14] * u32 + uik[15] * u33 + uik[16] * u34 + uik[17] * u35; 309 wp[33] += uik[18] * u30 + uik[19] * u31 + uik[20] * u32 + uik[21] * u33 + uik[22] * u34 + uik[23] * u35; 310 wp[34] += uik[24] * u30 + uik[25] * u31 + uik[26] * u32 + uik[27] * u33 + uik[28] * u34 + uik[29] * u35; 311 wp[35] += uik[30] * u30 + uik[31] * u31 + uik[32] * u32 + uik[33] * u33 + uik[34] * u34 + uik[35] * u35; 312 } 313 PetscCall(PetscLogFlops(2.0 * 216.0 * (jmax - jmin))); 314 315 /* ... add i to row list for next nonzero entry */ 316 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 317 j = bj[jmin]; 318 jl[i] = jl[j]; 319 jl[j] = i; /* update jl */ 320 } 321 i = nexti; 322 } 323 324 /* save nonzero entries in k-th row of U ... */ 325 326 /* invert diagonal block */ 327 d = ba + k * 36; 328 PetscCall(PetscArraycpy(d, dk, 36)); 329 PetscCall(PetscKernel_A_gets_inverse_A_6(d, shift, allowzeropivot, &zeropivotdetected)); 330 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 331 332 jmin = bi[k]; 333 jmax = bi[k + 1]; 334 if (jmin < jmax) { 335 for (j = jmin; j < jmax; j++) { 336 vj = bj[j]; /* block col. index of U */ 337 u = ba + j * 36; 338 wp = w + vj * 36; 339 for (k1 = 0; k1 < 36; k1++) { 340 *u++ = *wp; 341 *wp++ = 0.0; 342 } 343 } 344 345 /* ... add k to row list for first nonzero entry in k-th row */ 346 il[k] = jmin; 347 i = bj[jmin]; 348 jl[k] = jl[i]; 349 jl[i] = k; 350 } 351 } 352 353 PetscCall(PetscFree(w)); 354 PetscCall(PetscFree2(il, jl)); 355 PetscCall(PetscFree2(dk, uik)); 356 if (a->permute) PetscCall(PetscFree(aa)); 357 358 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 359 360 C->ops->solve = MatSolve_SeqSBAIJ_6_inplace; 361 C->ops->solvetranspose = MatSolve_SeqSBAIJ_6_inplace; 362 C->assembled = PETSC_TRUE; 363 C->preallocated = PETSC_TRUE; 364 365 PetscCall(PetscLogFlops(1.3333 * 216 * b->mbs)); /* from inverting diagonal blocks */ 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368