1 2 /* 3 Inverts 2 by 2 matrix using gaussian elimination with partial pivoting. 4 5 Used by the sparse factorization routines in 6 src/mat/impls/baij/seq 7 8 This is a combination of the Linpack routines 9 dgefa() and dgedi() specialized for a size of 2. 10 11 */ 12 #include <petscsys.h> 13 14 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected) 15 { 16 PetscInt i__2, i__3, kp1, j, k, l, ll, i, ipvt[2], k3; 17 PetscInt k4, j3; 18 MatScalar *aa, *ax, *ay, work[4], stmp; 19 MatReal tmp, max; 20 21 PetscFunctionBegin; 22 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 23 shift = .25 * shift * (1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3])); 24 25 /* Parameter adjustments */ 26 a -= 3; 27 28 k = 1; 29 kp1 = k + 1; 30 k3 = 2 * k; 31 k4 = k3 + k; 32 33 /* find l = pivot index */ 34 i__2 = 3 - k; 35 aa = &a[k4]; 36 max = PetscAbsScalar(aa[0]); 37 l = 1; 38 for (ll = 1; ll < i__2; ll++) { 39 tmp = PetscAbsScalar(aa[ll]); 40 if (tmp > max) { 41 max = tmp; 42 l = ll + 1; 43 } 44 } 45 l += k - 1; 46 ipvt[k - 1] = l; 47 48 if (a[l + k3] == 0.0) { 49 if (shift == 0.0) { 50 if (allowzeropivot) { 51 PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1)); 52 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 53 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1); 54 } else { 55 a[l + k3] = shift; 56 } 57 } 58 59 /* interchange if necessary */ 60 if (l != k) { 61 stmp = a[l + k3]; 62 a[l + k3] = a[k4]; 63 a[k4] = stmp; 64 } 65 66 /* compute multipliers */ 67 stmp = -1. / a[k4]; 68 i__2 = 2 - k; 69 aa = &a[1 + k4]; 70 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 71 72 /* row elimination with column indexing */ 73 ax = &a[k4 + 1]; 74 for (j = kp1; j <= 2; ++j) { 75 j3 = 2 * j; 76 stmp = a[l + j3]; 77 if (l != k) { 78 a[l + j3] = a[k + j3]; 79 a[k + j3] = stmp; 80 } 81 82 i__3 = 2 - k; 83 ay = &a[1 + k + j3]; 84 for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll]; 85 } 86 87 ipvt[1] = 2; 88 if (a[6] == 0.0) { 89 if (PetscLikely(allowzeropivot)) { 90 PetscCall(PetscInfo(NULL, "Zero pivot, row 1\n")); 91 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 92 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 1"); 93 } 94 95 /* Now form the inverse */ 96 /* compute inverse(u) */ 97 for (k = 1; k <= 2; ++k) { 98 k3 = 2 * k; 99 k4 = k3 + k; 100 a[k4] = 1.0 / a[k4]; 101 stmp = -a[k4]; 102 i__2 = k - 1; 103 aa = &a[k3 + 1]; 104 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 105 kp1 = k + 1; 106 if (2 < kp1) continue; 107 ax = aa; 108 for (j = kp1; j <= 2; ++j) { 109 j3 = 2 * j; 110 stmp = a[k + j3]; 111 a[k + j3] = 0.0; 112 ay = &a[j3 + 1]; 113 for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll]; 114 } 115 } 116 117 /* form inverse(u)*inverse(l) */ 118 k = 1; 119 k3 = 2 * k; 120 kp1 = k + 1; 121 aa = a + k3; 122 for (i = kp1; i <= 2; ++i) { 123 work[i - 1] = aa[i]; 124 aa[i] = 0.0; 125 } 126 for (j = kp1; j <= 2; ++j) { 127 stmp = work[j - 1]; 128 ax = &a[2 * j + 1]; 129 ay = &a[k3 + 1]; 130 ay[0] += stmp * ax[0]; 131 ay[1] += stmp * ax[1]; 132 } 133 l = ipvt[k - 1]; 134 if (l != k) { 135 ax = &a[k3 + 1]; 136 ay = &a[2 * l + 1]; 137 stmp = ax[0]; 138 ax[0] = ay[0]; 139 ay[0] = stmp; 140 stmp = ax[1]; 141 ax[1] = ay[1]; 142 ay[1] = stmp; 143 } 144 PetscFunctionReturn(PETSC_SUCCESS); 145 } 146 147 /* gaussian elimination with partial pivoting */ 148 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected) 149 { 150 PetscInt i__2, i__3, kp1, j, k, l, ll, i, ipvt[9], kb, k3; 151 PetscInt k4, j3; 152 MatScalar *aa, *ax, *ay, work[81], stmp; 153 MatReal tmp, max; 154 155 PetscFunctionBegin; 156 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 157 158 /* Parameter adjustments */ 159 a -= 10; 160 161 for (k = 1; k <= 8; ++k) { 162 kp1 = k + 1; 163 k3 = 9 * k; 164 k4 = k3 + k; 165 166 /* find l = pivot index */ 167 i__2 = 10 - k; 168 aa = &a[k4]; 169 max = PetscAbsScalar(aa[0]); 170 l = 1; 171 for (ll = 1; ll < i__2; ll++) { 172 tmp = PetscAbsScalar(aa[ll]); 173 if (tmp > max) { 174 max = tmp; 175 l = ll + 1; 176 } 177 } 178 l += k - 1; 179 ipvt[k - 1] = l; 180 181 if (a[l + k3] == 0.0) { 182 if (shift == 0.0) { 183 if (allowzeropivot) { 184 PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1)); 185 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 186 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1); 187 } else { 188 a[l + k3] = shift; 189 } 190 } 191 192 /* interchange if necessary */ 193 if (l != k) { 194 stmp = a[l + k3]; 195 a[l + k3] = a[k4]; 196 a[k4] = stmp; 197 } 198 199 /* compute multipliers */ 200 stmp = -1. / a[k4]; 201 i__2 = 9 - k; 202 aa = &a[1 + k4]; 203 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 204 205 /* row elimination with column indexing */ 206 ax = &a[k4 + 1]; 207 for (j = kp1; j <= 9; ++j) { 208 j3 = 9 * j; 209 stmp = a[l + j3]; 210 if (l != k) { 211 a[l + j3] = a[k + j3]; 212 a[k + j3] = stmp; 213 } 214 215 i__3 = 9 - k; 216 ay = &a[1 + k + j3]; 217 for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll]; 218 } 219 } 220 ipvt[8] = 9; 221 if (a[90] == 0.0) { 222 if (PetscLikely(allowzeropivot)) { 223 PetscCall(PetscInfo(NULL, "Zero pivot, row 8\n")); 224 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 225 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 8"); 226 } 227 228 /* Now form the inverse */ 229 /* compute inverse(u) */ 230 for (k = 1; k <= 9; ++k) { 231 k3 = 9 * k; 232 k4 = k3 + k; 233 a[k4] = 1.0 / a[k4]; 234 stmp = -a[k4]; 235 i__2 = k - 1; 236 aa = &a[k3 + 1]; 237 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 238 kp1 = k + 1; 239 if (9 < kp1) continue; 240 ax = aa; 241 for (j = kp1; j <= 9; ++j) { 242 j3 = 9 * j; 243 stmp = a[k + j3]; 244 a[k + j3] = 0.0; 245 ay = &a[j3 + 1]; 246 for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll]; 247 } 248 } 249 250 /* form inverse(u)*inverse(l) */ 251 for (kb = 1; kb <= 8; ++kb) { 252 k = 9 - kb; 253 k3 = 9 * k; 254 kp1 = k + 1; 255 aa = a + k3; 256 for (i = kp1; i <= 9; ++i) { 257 work[i - 1] = aa[i]; 258 aa[i] = 0.0; 259 } 260 for (j = kp1; j <= 9; ++j) { 261 stmp = work[j - 1]; 262 ax = &a[9 * j + 1]; 263 ay = &a[k3 + 1]; 264 ay[0] += stmp * ax[0]; 265 ay[1] += stmp * ax[1]; 266 ay[2] += stmp * ax[2]; 267 ay[3] += stmp * ax[3]; 268 ay[4] += stmp * ax[4]; 269 ay[5] += stmp * ax[5]; 270 ay[6] += stmp * ax[6]; 271 ay[7] += stmp * ax[7]; 272 ay[8] += stmp * ax[8]; 273 } 274 l = ipvt[k - 1]; 275 if (l != k) { 276 ax = &a[k3 + 1]; 277 ay = &a[9 * l + 1]; 278 stmp = ax[0]; 279 ax[0] = ay[0]; 280 ay[0] = stmp; 281 stmp = ax[1]; 282 ax[1] = ay[1]; 283 ay[1] = stmp; 284 stmp = ax[2]; 285 ax[2] = ay[2]; 286 ay[2] = stmp; 287 stmp = ax[3]; 288 ax[3] = ay[3]; 289 ay[3] = stmp; 290 stmp = ax[4]; 291 ax[4] = ay[4]; 292 ay[4] = stmp; 293 stmp = ax[5]; 294 ax[5] = ay[5]; 295 ay[5] = stmp; 296 stmp = ax[6]; 297 ax[6] = ay[6]; 298 ay[6] = stmp; 299 stmp = ax[7]; 300 ax[7] = ay[7]; 301 ay[7] = stmp; 302 stmp = ax[8]; 303 ax[8] = ay[8]; 304 ay[8] = stmp; 305 } 306 } 307 PetscFunctionReturn(PETSC_SUCCESS); 308 } 309 310 /* 311 Inverts 15 by 15 matrix using gaussian elimination with partial pivoting. 312 313 Used by the sparse factorization routines in 314 src/mat/impls/baij/seq 315 316 This is a combination of the Linpack routines 317 dgefa() and dgedi() specialized for a size of 15. 318 319 */ 320 321 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a, PetscInt *ipvt, MatScalar *work, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected) 322 { 323 PetscInt i__2, i__3, kp1, j, k, l, ll, i, kb, k3; 324 PetscInt k4, j3; 325 MatScalar *aa, *ax, *ay, stmp; 326 MatReal tmp, max; 327 328 PetscFunctionBegin; 329 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 330 331 /* Parameter adjustments */ 332 a -= 16; 333 334 for (k = 1; k <= 14; ++k) { 335 kp1 = k + 1; 336 k3 = 15 * k; 337 k4 = k3 + k; 338 339 /* find l = pivot index */ 340 i__2 = 16 - k; 341 aa = &a[k4]; 342 max = PetscAbsScalar(aa[0]); 343 l = 1; 344 for (ll = 1; ll < i__2; ll++) { 345 tmp = PetscAbsScalar(aa[ll]); 346 if (tmp > max) { 347 max = tmp; 348 l = ll + 1; 349 } 350 } 351 l += k - 1; 352 ipvt[k - 1] = l; 353 354 if (a[l + k3] == 0.0) { 355 if (shift == 0.0) { 356 if (allowzeropivot) { 357 PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1)); 358 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 359 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1); 360 } else { 361 a[l + k3] = shift; 362 } 363 } 364 365 /* interchange if necessary */ 366 if (l != k) { 367 stmp = a[l + k3]; 368 a[l + k3] = a[k4]; 369 a[k4] = stmp; 370 } 371 372 /* compute multipliers */ 373 stmp = -1. / a[k4]; 374 i__2 = 15 - k; 375 aa = &a[1 + k4]; 376 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 377 378 /* row elimination with column indexing */ 379 ax = &a[k4 + 1]; 380 for (j = kp1; j <= 15; ++j) { 381 j3 = 15 * j; 382 stmp = a[l + j3]; 383 if (l != k) { 384 a[l + j3] = a[k + j3]; 385 a[k + j3] = stmp; 386 } 387 388 i__3 = 15 - k; 389 ay = &a[1 + k + j3]; 390 for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll]; 391 } 392 } 393 ipvt[14] = 15; 394 if (a[240] == 0.0) { 395 if (PetscLikely(allowzeropivot)) { 396 PetscCall(PetscInfo(NULL, "Zero pivot, row 14\n")); 397 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 398 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 14"); 399 } 400 401 /* Now form the inverse */ 402 /* compute inverse(u) */ 403 for (k = 1; k <= 15; ++k) { 404 k3 = 15 * k; 405 k4 = k3 + k; 406 a[k4] = 1.0 / a[k4]; 407 stmp = -a[k4]; 408 i__2 = k - 1; 409 aa = &a[k3 + 1]; 410 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp; 411 kp1 = k + 1; 412 if (15 < kp1) continue; 413 ax = aa; 414 for (j = kp1; j <= 15; ++j) { 415 j3 = 15 * j; 416 stmp = a[k + j3]; 417 a[k + j3] = 0.0; 418 ay = &a[j3 + 1]; 419 for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll]; 420 } 421 } 422 423 /* form inverse(u)*inverse(l) */ 424 for (kb = 1; kb <= 14; ++kb) { 425 k = 15 - kb; 426 k3 = 15 * k; 427 kp1 = k + 1; 428 aa = a + k3; 429 for (i = kp1; i <= 15; ++i) { 430 work[i - 1] = aa[i]; 431 aa[i] = 0.0; 432 } 433 for (j = kp1; j <= 15; ++j) { 434 stmp = work[j - 1]; 435 ax = &a[15 * j + 1]; 436 ay = &a[k3 + 1]; 437 ay[0] += stmp * ax[0]; 438 ay[1] += stmp * ax[1]; 439 ay[2] += stmp * ax[2]; 440 ay[3] += stmp * ax[3]; 441 ay[4] += stmp * ax[4]; 442 ay[5] += stmp * ax[5]; 443 ay[6] += stmp * ax[6]; 444 ay[7] += stmp * ax[7]; 445 ay[8] += stmp * ax[8]; 446 ay[9] += stmp * ax[9]; 447 ay[10] += stmp * ax[10]; 448 ay[11] += stmp * ax[11]; 449 ay[12] += stmp * ax[12]; 450 ay[13] += stmp * ax[13]; 451 ay[14] += stmp * ax[14]; 452 } 453 l = ipvt[k - 1]; 454 if (l != k) { 455 ax = &a[k3 + 1]; 456 ay = &a[15 * l + 1]; 457 stmp = ax[0]; 458 ax[0] = ay[0]; 459 ay[0] = stmp; 460 stmp = ax[1]; 461 ax[1] = ay[1]; 462 ay[1] = stmp; 463 stmp = ax[2]; 464 ax[2] = ay[2]; 465 ay[2] = stmp; 466 stmp = ax[3]; 467 ax[3] = ay[3]; 468 ay[3] = stmp; 469 stmp = ax[4]; 470 ax[4] = ay[4]; 471 ay[4] = stmp; 472 stmp = ax[5]; 473 ax[5] = ay[5]; 474 ay[5] = stmp; 475 stmp = ax[6]; 476 ax[6] = ay[6]; 477 ay[6] = stmp; 478 stmp = ax[7]; 479 ax[7] = ay[7]; 480 ay[7] = stmp; 481 stmp = ax[8]; 482 ax[8] = ay[8]; 483 ay[8] = stmp; 484 stmp = ax[9]; 485 ax[9] = ay[9]; 486 ay[9] = stmp; 487 stmp = ax[10]; 488 ax[10] = ay[10]; 489 ay[10] = stmp; 490 stmp = ax[11]; 491 ax[11] = ay[11]; 492 ay[11] = stmp; 493 stmp = ax[12]; 494 ax[12] = ay[12]; 495 ay[12] = stmp; 496 stmp = ax[13]; 497 ax[13] = ay[13]; 498 ay[13] = stmp; 499 stmp = ax[14]; 500 ax[14] = ay[14]; 501 ay[14] = stmp; 502 } 503 } 504 PetscFunctionReturn(PETSC_SUCCESS); 505 } 506