1 2 /* 3 Inverts 2 by 2 matrix using gaussian elimination with partial pivoting. 4 5 Used by the sparse factorization routines in 6 src/mat/impls/baij/seq 7 8 This is a combination of the Linpack routines 9 dgefa() and dgedi() specialized for a size of 2. 10 11 */ 12 #include <petscsys.h> 13 14 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected) { 15 PetscInt i__2, i__3, kp1, j, k, l, ll, i, ipvt[2], k3; 16 PetscInt k4, j3; 17 MatScalar *aa, *ax, *ay, work[4], stmp; 18 MatReal tmp, max; 19 20 PetscFunctionBegin; 21 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 22 shift = .25 * shift * (1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3])); 23 24 /* Parameter adjustments */ 25 a -= 3; 26 27 k = 1; 28 kp1 = k + 1; 29 k3 = 2 * k; 30 k4 = k3 + k; 31 32 /* find l = pivot index */ 33 i__2 = 3 - k; 34 aa = &a[k4]; 35 max = PetscAbsScalar(aa[0]); 36 l = 1; 37 for (ll = 1; ll < i__2; ll++) { 38 tmp = PetscAbsScalar(aa[ll]); 39 if (tmp > max) { 40 max = tmp; 41 l = ll + 1; 42 } 43 } 44 l += k - 1; 45 ipvt[k - 1] = l; 46 47 if (a[l + k3] == 0.0) { 48 if (shift == 0.0) { 49 if (allowzeropivot) { 50 PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1)); 51 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 52 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1); 53 } else { 54 a[l + k3] = shift; 55 } 56 } 57 58 /* interchange if necessary */ 59 if (l != k) { 60 stmp = a[l + k3]; 61 a[l + k3] = a[k4]; 62 a[k4] = stmp; 63 } 64 65 /* compute multipliers */ 66 stmp = -1. / a[k4]; 67 i__2 = 2 - k; 68 aa = &a[1 + k4]; 69 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 70 71 /* row elimination with column indexing */ 72 ax = &a[k4 + 1]; 73 for (j = kp1; j <= 2; ++j) { 74 j3 = 2 * j; 75 stmp = a[l + j3]; 76 if (l != k) { 77 a[l + j3] = a[k + j3]; 78 a[k + j3] = stmp; 79 } 80 81 i__3 = 2 - k; 82 ay = &a[1 + k + j3]; 83 for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll]; 84 } 85 86 ipvt[1] = 2; 87 if (a[6] == 0.0) { 88 if (PetscLikely(allowzeropivot)) { 89 PetscCall(PetscInfo(NULL, "Zero pivot, row 1\n")); 90 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 91 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 1"); 92 } 93 94 /* Now form the inverse */ 95 /* compute inverse(u) */ 96 for (k = 1; k <= 2; ++k) { 97 k3 = 2 * k; 98 k4 = k3 + k; 99 a[k4] = 1.0 / a[k4]; 100 stmp = -a[k4]; 101 i__2 = k - 1; 102 aa = &a[k3 + 1]; 103 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 104 kp1 = k + 1; 105 if (2 < kp1) continue; 106 ax = aa; 107 for (j = kp1; j <= 2; ++j) { 108 j3 = 2 * j; 109 stmp = a[k + j3]; 110 a[k + j3] = 0.0; 111 ay = &a[j3 + 1]; 112 for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll]; 113 } 114 } 115 116 /* form inverse(u)*inverse(l) */ 117 k = 1; 118 k3 = 2 * k; 119 kp1 = k + 1; 120 aa = a + k3; 121 for (i = kp1; i <= 2; ++i) { 122 work[i - 1] = aa[i]; 123 aa[i] = 0.0; 124 } 125 for (j = kp1; j <= 2; ++j) { 126 stmp = work[j - 1]; 127 ax = &a[2 * j + 1]; 128 ay = &a[k3 + 1]; 129 ay[0] += stmp * ax[0]; 130 ay[1] += stmp * ax[1]; 131 } 132 l = ipvt[k - 1]; 133 if (l != k) { 134 ax = &a[k3 + 1]; 135 ay = &a[2 * l + 1]; 136 stmp = ax[0]; 137 ax[0] = ay[0]; 138 ay[0] = stmp; 139 stmp = ax[1]; 140 ax[1] = ay[1]; 141 ay[1] = stmp; 142 } 143 PetscFunctionReturn(0); 144 } 145 146 /* gaussian elimination with partial pivoting */ 147 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected) { 148 PetscInt i__2, i__3, kp1, j, k, l, ll, i, ipvt[9], kb, k3; 149 PetscInt k4, j3; 150 MatScalar *aa, *ax, *ay, work[81], stmp; 151 MatReal tmp, max; 152 153 PetscFunctionBegin; 154 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 155 156 /* Parameter adjustments */ 157 a -= 10; 158 159 for (k = 1; k <= 8; ++k) { 160 kp1 = k + 1; 161 k3 = 9 * k; 162 k4 = k3 + k; 163 164 /* find l = pivot index */ 165 i__2 = 10 - k; 166 aa = &a[k4]; 167 max = PetscAbsScalar(aa[0]); 168 l = 1; 169 for (ll = 1; ll < i__2; ll++) { 170 tmp = PetscAbsScalar(aa[ll]); 171 if (tmp > max) { 172 max = tmp; 173 l = ll + 1; 174 } 175 } 176 l += k - 1; 177 ipvt[k - 1] = l; 178 179 if (a[l + k3] == 0.0) { 180 if (shift == 0.0) { 181 if (allowzeropivot) { 182 PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1)); 183 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 184 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1); 185 } else { 186 a[l + k3] = shift; 187 } 188 } 189 190 /* interchange if necessary */ 191 if (l != k) { 192 stmp = a[l + k3]; 193 a[l + k3] = a[k4]; 194 a[k4] = stmp; 195 } 196 197 /* compute multipliers */ 198 stmp = -1. / a[k4]; 199 i__2 = 9 - k; 200 aa = &a[1 + k4]; 201 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 202 203 /* row elimination with column indexing */ 204 ax = &a[k4 + 1]; 205 for (j = kp1; j <= 9; ++j) { 206 j3 = 9 * j; 207 stmp = a[l + j3]; 208 if (l != k) { 209 a[l + j3] = a[k + j3]; 210 a[k + j3] = stmp; 211 } 212 213 i__3 = 9 - k; 214 ay = &a[1 + k + j3]; 215 for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll]; 216 } 217 } 218 ipvt[8] = 9; 219 if (a[90] == 0.0) { 220 if (PetscLikely(allowzeropivot)) { 221 PetscCall(PetscInfo(NULL, "Zero pivot, row 8\n")); 222 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 223 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 8"); 224 } 225 226 /* Now form the inverse */ 227 /* compute inverse(u) */ 228 for (k = 1; k <= 9; ++k) { 229 k3 = 9 * k; 230 k4 = k3 + k; 231 a[k4] = 1.0 / a[k4]; 232 stmp = -a[k4]; 233 i__2 = k - 1; 234 aa = &a[k3 + 1]; 235 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 236 kp1 = k + 1; 237 if (9 < kp1) continue; 238 ax = aa; 239 for (j = kp1; j <= 9; ++j) { 240 j3 = 9 * j; 241 stmp = a[k + j3]; 242 a[k + j3] = 0.0; 243 ay = &a[j3 + 1]; 244 for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll]; 245 } 246 } 247 248 /* form inverse(u)*inverse(l) */ 249 for (kb = 1; kb <= 8; ++kb) { 250 k = 9 - kb; 251 k3 = 9 * k; 252 kp1 = k + 1; 253 aa = a + k3; 254 for (i = kp1; i <= 9; ++i) { 255 work[i - 1] = aa[i]; 256 aa[i] = 0.0; 257 } 258 for (j = kp1; j <= 9; ++j) { 259 stmp = work[j - 1]; 260 ax = &a[9 * j + 1]; 261 ay = &a[k3 + 1]; 262 ay[0] += stmp * ax[0]; 263 ay[1] += stmp * ax[1]; 264 ay[2] += stmp * ax[2]; 265 ay[3] += stmp * ax[3]; 266 ay[4] += stmp * ax[4]; 267 ay[5] += stmp * ax[5]; 268 ay[6] += stmp * ax[6]; 269 ay[7] += stmp * ax[7]; 270 ay[8] += stmp * ax[8]; 271 } 272 l = ipvt[k - 1]; 273 if (l != k) { 274 ax = &a[k3 + 1]; 275 ay = &a[9 * l + 1]; 276 stmp = ax[0]; 277 ax[0] = ay[0]; 278 ay[0] = stmp; 279 stmp = ax[1]; 280 ax[1] = ay[1]; 281 ay[1] = stmp; 282 stmp = ax[2]; 283 ax[2] = ay[2]; 284 ay[2] = stmp; 285 stmp = ax[3]; 286 ax[3] = ay[3]; 287 ay[3] = stmp; 288 stmp = ax[4]; 289 ax[4] = ay[4]; 290 ay[4] = stmp; 291 stmp = ax[5]; 292 ax[5] = ay[5]; 293 ay[5] = stmp; 294 stmp = ax[6]; 295 ax[6] = ay[6]; 296 ay[6] = stmp; 297 stmp = ax[7]; 298 ax[7] = ay[7]; 299 ay[7] = stmp; 300 stmp = ax[8]; 301 ax[8] = ay[8]; 302 ay[8] = stmp; 303 } 304 } 305 PetscFunctionReturn(0); 306 } 307 308 /* 309 Inverts 15 by 15 matrix using gaussian elimination with partial pivoting. 310 311 Used by the sparse factorization routines in 312 src/mat/impls/baij/seq 313 314 This is a combination of the Linpack routines 315 dgefa() and dgedi() specialized for a size of 15. 316 317 */ 318 319 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a, PetscInt *ipvt, MatScalar *work, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected) { 320 PetscInt i__2, i__3, kp1, j, k, l, ll, i, kb, k3; 321 PetscInt k4, j3; 322 MatScalar *aa, *ax, *ay, stmp; 323 MatReal tmp, max; 324 325 PetscFunctionBegin; 326 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 327 328 /* Parameter adjustments */ 329 a -= 16; 330 331 for (k = 1; k <= 14; ++k) { 332 kp1 = k + 1; 333 k3 = 15 * k; 334 k4 = k3 + k; 335 336 /* find l = pivot index */ 337 i__2 = 16 - k; 338 aa = &a[k4]; 339 max = PetscAbsScalar(aa[0]); 340 l = 1; 341 for (ll = 1; ll < i__2; ll++) { 342 tmp = PetscAbsScalar(aa[ll]); 343 if (tmp > max) { 344 max = tmp; 345 l = ll + 1; 346 } 347 } 348 l += k - 1; 349 ipvt[k - 1] = l; 350 351 if (a[l + k3] == 0.0) { 352 if (shift == 0.0) { 353 if (allowzeropivot) { 354 PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1)); 355 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 356 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1); 357 } else { 358 a[l + k3] = shift; 359 } 360 } 361 362 /* interchange if necessary */ 363 if (l != k) { 364 stmp = a[l + k3]; 365 a[l + k3] = a[k4]; 366 a[k4] = stmp; 367 } 368 369 /* compute multipliers */ 370 stmp = -1. / a[k4]; 371 i__2 = 15 - k; 372 aa = &a[1 + k4]; 373 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 374 375 /* row elimination with column indexing */ 376 ax = &a[k4 + 1]; 377 for (j = kp1; j <= 15; ++j) { 378 j3 = 15 * j; 379 stmp = a[l + j3]; 380 if (l != k) { 381 a[l + j3] = a[k + j3]; 382 a[k + j3] = stmp; 383 } 384 385 i__3 = 15 - k; 386 ay = &a[1 + k + j3]; 387 for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll]; 388 } 389 } 390 ipvt[14] = 15; 391 if (a[240] == 0.0) { 392 if (PetscLikely(allowzeropivot)) { 393 PetscCall(PetscInfo(NULL, "Zero pivot, row 14\n")); 394 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 395 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 14"); 396 } 397 398 /* Now form the inverse */ 399 /* compute inverse(u) */ 400 for (k = 1; k <= 15; ++k) { 401 k3 = 15 * k; 402 k4 = k3 + k; 403 a[k4] = 1.0 / a[k4]; 404 stmp = -a[k4]; 405 i__2 = k - 1; 406 aa = &a[k3 + 1]; 407 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 408 kp1 = k + 1; 409 if (15 < kp1) continue; 410 ax = aa; 411 for (j = kp1; j <= 15; ++j) { 412 j3 = 15 * j; 413 stmp = a[k + j3]; 414 a[k + j3] = 0.0; 415 ay = &a[j3 + 1]; 416 for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll]; 417 } 418 } 419 420 /* form inverse(u)*inverse(l) */ 421 for (kb = 1; kb <= 14; ++kb) { 422 k = 15 - kb; 423 k3 = 15 * k; 424 kp1 = k + 1; 425 aa = a + k3; 426 for (i = kp1; i <= 15; ++i) { 427 work[i - 1] = aa[i]; 428 aa[i] = 0.0; 429 } 430 for (j = kp1; j <= 15; ++j) { 431 stmp = work[j - 1]; 432 ax = &a[15 * j + 1]; 433 ay = &a[k3 + 1]; 434 ay[0] += stmp * ax[0]; 435 ay[1] += stmp * ax[1]; 436 ay[2] += stmp * ax[2]; 437 ay[3] += stmp * ax[3]; 438 ay[4] += stmp * ax[4]; 439 ay[5] += stmp * ax[5]; 440 ay[6] += stmp * ax[6]; 441 ay[7] += stmp * ax[7]; 442 ay[8] += stmp * ax[8]; 443 ay[9] += stmp * ax[9]; 444 ay[10] += stmp * ax[10]; 445 ay[11] += stmp * ax[11]; 446 ay[12] += stmp * ax[12]; 447 ay[13] += stmp * ax[13]; 448 ay[14] += stmp * ax[14]; 449 } 450 l = ipvt[k - 1]; 451 if (l != k) { 452 ax = &a[k3 + 1]; 453 ay = &a[15 * l + 1]; 454 stmp = ax[0]; 455 ax[0] = ay[0]; 456 ay[0] = stmp; 457 stmp = ax[1]; 458 ax[1] = ay[1]; 459 ay[1] = stmp; 460 stmp = ax[2]; 461 ax[2] = ay[2]; 462 ay[2] = stmp; 463 stmp = ax[3]; 464 ax[3] = ay[3]; 465 ay[3] = stmp; 466 stmp = ax[4]; 467 ax[4] = ay[4]; 468 ay[4] = stmp; 469 stmp = ax[5]; 470 ax[5] = ay[5]; 471 ay[5] = stmp; 472 stmp = ax[6]; 473 ax[6] = ay[6]; 474 ay[6] = stmp; 475 stmp = ax[7]; 476 ax[7] = ay[7]; 477 ay[7] = stmp; 478 stmp = ax[8]; 479 ax[8] = ay[8]; 480 ay[8] = stmp; 481 stmp = ax[9]; 482 ax[9] = ay[9]; 483 ay[9] = stmp; 484 stmp = ax[10]; 485 ax[10] = ay[10]; 486 ay[10] = stmp; 487 stmp = ax[11]; 488 ax[11] = ay[11]; 489 ay[11] = stmp; 490 stmp = ax[12]; 491 ax[12] = ay[12]; 492 ay[12] = stmp; 493 stmp = ax[13]; 494 ax[13] = ay[13]; 495 ay[13] = stmp; 496 stmp = ax[14]; 497 ax[14] = ay[14]; 498 ay[14] = stmp; 499 } 500 } 501 PetscFunctionReturn(0); 502 } 503