1 /* 2 Inverts 2 by 2 matrix using gaussian elimination with partial pivoting. 3 4 Used by the sparse factorization routines in 5 src/mat/impls/baij/seq 6 7 This is a combination of the Linpack routines 8 dgefa() and dgedi() specialized for a size of 2. 9 10 */ 11 #include <petscsys.h> 12 13 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected) 14 { 15 PetscInt i__2, i__3, kp1, j, k, l, ll, i, ipvt[2], k3; 16 PetscInt k4, j3; 17 MatScalar *aa, *ax, *ay, work[4], stmp; 18 MatReal tmp, max; 19 20 PetscFunctionBegin; 21 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 22 shift = .25 * shift * (1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3])); 23 24 /* Parameter adjustments */ 25 a -= 3; 26 27 k = 1; 28 kp1 = k + 1; 29 k3 = 2 * k; 30 k4 = k3 + k; 31 32 /* find l = pivot index */ 33 i__2 = 3 - k; 34 aa = &a[k4]; 35 max = PetscAbsScalar(aa[0]); 36 l = 1; 37 for (ll = 1; ll < i__2; ll++) { 38 tmp = PetscAbsScalar(aa[ll]); 39 if (tmp > max) { 40 max = tmp; 41 l = ll + 1; 42 } 43 } 44 l += k - 1; 45 ipvt[k - 1] = l; 46 47 if (a[l + k3] == 0.0) { 48 if (shift == 0.0) { 49 if (allowzeropivot) { 50 PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1)); 51 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 52 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1); 53 } else { 54 a[l + k3] = shift; 55 } 56 } 57 58 /* interchange if necessary */ 59 if (l != k) { 60 stmp = a[l + k3]; 61 a[l + k3] = a[k4]; 62 a[k4] = stmp; 63 } 64 65 /* compute multipliers */ 66 stmp = -1. / a[k4]; 67 i__2 = 2 - k; 68 aa = &a[1 + k4]; 69 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 70 71 /* row elimination with column indexing */ 72 ax = &a[k4 + 1]; 73 for (j = kp1; j <= 2; ++j) { 74 j3 = 2 * j; 75 stmp = a[l + j3]; 76 if (l != k) { 77 a[l + j3] = a[k + j3]; 78 a[k + j3] = stmp; 79 } 80 81 i__3 = 2 - k; 82 ay = &a[1 + k + j3]; 83 for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll]; 84 } 85 86 ipvt[1] = 2; 87 if (a[6] == 0.0) { 88 if (PetscLikely(allowzeropivot)) { 89 PetscCall(PetscInfo(NULL, "Zero pivot, row 1\n")); 90 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 91 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 1"); 92 } 93 94 /* Now form the inverse */ 95 /* compute inverse(u) */ 96 for (k = 1; k <= 2; ++k) { 97 k3 = 2 * k; 98 k4 = k3 + k; 99 a[k4] = 1.0 / a[k4]; 100 stmp = -a[k4]; 101 i__2 = k - 1; 102 aa = &a[k3 + 1]; 103 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 104 kp1 = k + 1; 105 if (2 < kp1) continue; 106 ax = aa; 107 for (j = kp1; j <= 2; ++j) { 108 j3 = 2 * j; 109 stmp = a[k + j3]; 110 a[k + j3] = 0.0; 111 ay = &a[j3 + 1]; 112 for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll]; 113 } 114 } 115 116 /* form inverse(u)*inverse(l) */ 117 k = 1; 118 k3 = 2 * k; 119 kp1 = k + 1; 120 aa = a + k3; 121 for (i = kp1; i <= 2; ++i) { 122 work[i - 1] = aa[i]; 123 aa[i] = 0.0; 124 } 125 for (j = kp1; j <= 2; ++j) { 126 stmp = work[j - 1]; 127 ax = &a[2 * j + 1]; 128 ay = &a[k3 + 1]; 129 ay[0] += stmp * ax[0]; 130 ay[1] += stmp * ax[1]; 131 } 132 l = ipvt[k - 1]; 133 if (l != k) { 134 ax = &a[k3 + 1]; 135 ay = &a[2 * l + 1]; 136 stmp = ax[0]; 137 ax[0] = ay[0]; 138 ay[0] = stmp; 139 stmp = ax[1]; 140 ax[1] = ay[1]; 141 ay[1] = stmp; 142 } 143 PetscFunctionReturn(PETSC_SUCCESS); 144 } 145 146 /* gaussian elimination with partial pivoting */ 147 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected) 148 { 149 PetscInt i__2, i__3, kp1, j, k, l, ll, i, ipvt[9], kb, k3; 150 PetscInt k4, j3; 151 MatScalar *aa, *ax, *ay, work[81], stmp; 152 MatReal tmp, max; 153 154 PetscFunctionBegin; 155 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 156 157 /* Parameter adjustments */ 158 a -= 10; 159 160 for (k = 1; k <= 8; ++k) { 161 kp1 = k + 1; 162 k3 = 9 * k; 163 k4 = k3 + k; 164 165 /* find l = pivot index */ 166 i__2 = 10 - k; 167 aa = &a[k4]; 168 max = PetscAbsScalar(aa[0]); 169 l = 1; 170 for (ll = 1; ll < i__2; ll++) { 171 tmp = PetscAbsScalar(aa[ll]); 172 if (tmp > max) { 173 max = tmp; 174 l = ll + 1; 175 } 176 } 177 l += k - 1; 178 ipvt[k - 1] = l; 179 180 if (a[l + k3] == 0.0) { 181 if (shift == 0.0) { 182 if (allowzeropivot) { 183 PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1)); 184 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 185 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1); 186 } else { 187 a[l + k3] = shift; 188 } 189 } 190 191 /* interchange if necessary */ 192 if (l != k) { 193 stmp = a[l + k3]; 194 a[l + k3] = a[k4]; 195 a[k4] = stmp; 196 } 197 198 /* compute multipliers */ 199 stmp = -1. / a[k4]; 200 i__2 = 9 - k; 201 aa = &a[1 + k4]; 202 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 203 204 /* row elimination with column indexing */ 205 ax = &a[k4 + 1]; 206 for (j = kp1; j <= 9; ++j) { 207 j3 = 9 * j; 208 stmp = a[l + j3]; 209 if (l != k) { 210 a[l + j3] = a[k + j3]; 211 a[k + j3] = stmp; 212 } 213 214 i__3 = 9 - k; 215 ay = &a[1 + k + j3]; 216 for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll]; 217 } 218 } 219 ipvt[8] = 9; 220 if (a[90] == 0.0) { 221 if (PetscLikely(allowzeropivot)) { 222 PetscCall(PetscInfo(NULL, "Zero pivot, row 8\n")); 223 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 224 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 8"); 225 } 226 227 /* Now form the inverse */ 228 /* compute inverse(u) */ 229 for (k = 1; k <= 9; ++k) { 230 k3 = 9 * k; 231 k4 = k3 + k; 232 a[k4] = 1.0 / a[k4]; 233 stmp = -a[k4]; 234 i__2 = k - 1; 235 aa = &a[k3 + 1]; 236 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 237 kp1 = k + 1; 238 if (9 < kp1) continue; 239 ax = aa; 240 for (j = kp1; j <= 9; ++j) { 241 j3 = 9 * j; 242 stmp = a[k + j3]; 243 a[k + j3] = 0.0; 244 ay = &a[j3 + 1]; 245 for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll]; 246 } 247 } 248 249 /* form inverse(u)*inverse(l) */ 250 for (kb = 1; kb <= 8; ++kb) { 251 k = 9 - kb; 252 k3 = 9 * k; 253 kp1 = k + 1; 254 aa = a + k3; 255 for (i = kp1; i <= 9; ++i) { 256 work[i - 1] = aa[i]; 257 aa[i] = 0.0; 258 } 259 for (j = kp1; j <= 9; ++j) { 260 stmp = work[j - 1]; 261 ax = &a[9 * j + 1]; 262 ay = &a[k3 + 1]; 263 ay[0] += stmp * ax[0]; 264 ay[1] += stmp * ax[1]; 265 ay[2] += stmp * ax[2]; 266 ay[3] += stmp * ax[3]; 267 ay[4] += stmp * ax[4]; 268 ay[5] += stmp * ax[5]; 269 ay[6] += stmp * ax[6]; 270 ay[7] += stmp * ax[7]; 271 ay[8] += stmp * ax[8]; 272 } 273 l = ipvt[k - 1]; 274 if (l != k) { 275 ax = &a[k3 + 1]; 276 ay = &a[9 * l + 1]; 277 stmp = ax[0]; 278 ax[0] = ay[0]; 279 ay[0] = stmp; 280 stmp = ax[1]; 281 ax[1] = ay[1]; 282 ay[1] = stmp; 283 stmp = ax[2]; 284 ax[2] = ay[2]; 285 ay[2] = stmp; 286 stmp = ax[3]; 287 ax[3] = ay[3]; 288 ay[3] = stmp; 289 stmp = ax[4]; 290 ax[4] = ay[4]; 291 ay[4] = stmp; 292 stmp = ax[5]; 293 ax[5] = ay[5]; 294 ay[5] = stmp; 295 stmp = ax[6]; 296 ax[6] = ay[6]; 297 ay[6] = stmp; 298 stmp = ax[7]; 299 ax[7] = ay[7]; 300 ay[7] = stmp; 301 stmp = ax[8]; 302 ax[8] = ay[8]; 303 ay[8] = stmp; 304 } 305 } 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 /* 310 Inverts 15 by 15 matrix using gaussian elimination with partial pivoting. 311 312 Used by the sparse factorization routines in 313 src/mat/impls/baij/seq 314 315 This is a combination of the Linpack routines 316 dgefa() and dgedi() specialized for a size of 15. 317 318 */ 319 320 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a, PetscInt *ipvt, MatScalar *work, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected) 321 { 322 PetscInt i__2, i__3, kp1, j, k, l, ll, i, kb, k3; 323 PetscInt k4, j3; 324 MatScalar *aa, *ax, *ay, stmp; 325 MatReal tmp, max; 326 327 PetscFunctionBegin; 328 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 329 330 /* Parameter adjustments */ 331 a -= 16; 332 333 for (k = 1; k <= 14; ++k) { 334 kp1 = k + 1; 335 k3 = 15 * k; 336 k4 = k3 + k; 337 338 /* find l = pivot index */ 339 i__2 = 16 - k; 340 aa = &a[k4]; 341 max = PetscAbsScalar(aa[0]); 342 l = 1; 343 for (ll = 1; ll < i__2; ll++) { 344 tmp = PetscAbsScalar(aa[ll]); 345 if (tmp > max) { 346 max = tmp; 347 l = ll + 1; 348 } 349 } 350 l += k - 1; 351 ipvt[k - 1] = l; 352 353 if (a[l + k3] == 0.0) { 354 if (shift == 0.0) { 355 if (allowzeropivot) { 356 PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1)); 357 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 358 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1); 359 } else { 360 a[l + k3] = shift; 361 } 362 } 363 364 /* interchange if necessary */ 365 if (l != k) { 366 stmp = a[l + k3]; 367 a[l + k3] = a[k4]; 368 a[k4] = stmp; 369 } 370 371 /* compute multipliers */ 372 stmp = -1. / a[k4]; 373 i__2 = 15 - k; 374 aa = &a[1 + k4]; 375 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 376 377 /* row elimination with column indexing */ 378 ax = &a[k4 + 1]; 379 for (j = kp1; j <= 15; ++j) { 380 j3 = 15 * j; 381 stmp = a[l + j3]; 382 if (l != k) { 383 a[l + j3] = a[k + j3]; 384 a[k + j3] = stmp; 385 } 386 387 i__3 = 15 - k; 388 ay = &a[1 + k + j3]; 389 for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll]; 390 } 391 } 392 ipvt[14] = 15; 393 if (a[240] == 0.0) { 394 if (PetscLikely(allowzeropivot)) { 395 PetscCall(PetscInfo(NULL, "Zero pivot, row 14\n")); 396 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 397 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 14"); 398 } 399 400 /* Now form the inverse */ 401 /* compute inverse(u) */ 402 for (k = 1; k <= 15; ++k) { 403 k3 = 15 * k; 404 k4 = k3 + k; 405 a[k4] = 1.0 / a[k4]; 406 stmp = -a[k4]; 407 i__2 = k - 1; 408 aa = &a[k3 + 1]; 409 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 410 kp1 = k + 1; 411 if (15 < kp1) continue; 412 ax = aa; 413 for (j = kp1; j <= 15; ++j) { 414 j3 = 15 * j; 415 stmp = a[k + j3]; 416 a[k + j3] = 0.0; 417 ay = &a[j3 + 1]; 418 for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll]; 419 } 420 } 421 422 /* form inverse(u)*inverse(l) */ 423 for (kb = 1; kb <= 14; ++kb) { 424 k = 15 - kb; 425 k3 = 15 * k; 426 kp1 = k + 1; 427 aa = a + k3; 428 for (i = kp1; i <= 15; ++i) { 429 work[i - 1] = aa[i]; 430 aa[i] = 0.0; 431 } 432 for (j = kp1; j <= 15; ++j) { 433 stmp = work[j - 1]; 434 ax = &a[15 * j + 1]; 435 ay = &a[k3 + 1]; 436 ay[0] += stmp * ax[0]; 437 ay[1] += stmp * ax[1]; 438 ay[2] += stmp * ax[2]; 439 ay[3] += stmp * ax[3]; 440 ay[4] += stmp * ax[4]; 441 ay[5] += stmp * ax[5]; 442 ay[6] += stmp * ax[6]; 443 ay[7] += stmp * ax[7]; 444 ay[8] += stmp * ax[8]; 445 ay[9] += stmp * ax[9]; 446 ay[10] += stmp * ax[10]; 447 ay[11] += stmp * ax[11]; 448 ay[12] += stmp * ax[12]; 449 ay[13] += stmp * ax[13]; 450 ay[14] += stmp * ax[14]; 451 } 452 l = ipvt[k - 1]; 453 if (l != k) { 454 ax = &a[k3 + 1]; 455 ay = &a[15 * l + 1]; 456 stmp = ax[0]; 457 ax[0] = ay[0]; 458 ay[0] = stmp; 459 stmp = ax[1]; 460 ax[1] = ay[1]; 461 ay[1] = stmp; 462 stmp = ax[2]; 463 ax[2] = ay[2]; 464 ay[2] = stmp; 465 stmp = ax[3]; 466 ax[3] = ay[3]; 467 ay[3] = stmp; 468 stmp = ax[4]; 469 ax[4] = ay[4]; 470 ay[4] = stmp; 471 stmp = ax[5]; 472 ax[5] = ay[5]; 473 ay[5] = stmp; 474 stmp = ax[6]; 475 ax[6] = ay[6]; 476 ay[6] = stmp; 477 stmp = ax[7]; 478 ax[7] = ay[7]; 479 ay[7] = stmp; 480 stmp = ax[8]; 481 ax[8] = ay[8]; 482 ay[8] = stmp; 483 stmp = ax[9]; 484 ax[9] = ay[9]; 485 ay[9] = stmp; 486 stmp = ax[10]; 487 ax[10] = ay[10]; 488 ay[10] = stmp; 489 stmp = ax[11]; 490 ax[11] = ay[11]; 491 ay[11] = stmp; 492 stmp = ax[12]; 493 ax[12] = ay[12]; 494 ay[12] = stmp; 495 stmp = ax[13]; 496 ax[13] = ay[13]; 497 ay[13] = stmp; 498 stmp = ax[14]; 499 ax[14] = ay[14]; 500 ay[14] = stmp; 501 } 502 } 503 PetscFunctionReturn(PETSC_SUCCESS); 504 } 505