1 2 /* 3 Inverts 2 by 2 matrix using partial pivoting. 4 5 Used by the sparse factorization routines in 6 src/mat/impls/baij/seq 7 8 9 This is a combination of the Linpack routines 10 dgefa() and dgedi() specialized for a size of 2. 11 12 */ 13 #include <petscsys.h> 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_2" 17 PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift) 18 { 19 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3; 20 PetscInt k4,j3; 21 MatScalar *aa,*ax,*ay,work[4],stmp; 22 MatReal tmp,max; 23 24 /* gaussian elimination with partial pivoting */ 25 26 PetscFunctionBegin; 27 shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3])); 28 /* Parameter adjustments */ 29 a -= 3; 30 31 /*for (k = 1; k <= 1; ++k) {*/ 32 k = 1; 33 kp1 = k + 1; 34 k3 = 2*k; 35 k4 = k3 + k; 36 /* find l = pivot index */ 37 38 i__2 = 3 - k; 39 aa = &a[k4]; 40 max = PetscAbsScalar(aa[0]); 41 l = 1; 42 for (ll=1; ll<i__2; ll++) { 43 tmp = PetscAbsScalar(aa[ll]); 44 if (tmp > max) { max = tmp; l = ll+1;} 45 } 46 l += k - 1; 47 ipvt[k-1] = l; 48 49 if (a[l + k3] == 0.0) { 50 if (shift == 0.0) { 51 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 52 } else { 53 a[l + k3] = shift; 54 } 55 } 56 57 /* interchange if necessary */ 58 59 if (l != k) { 60 stmp = a[l + k3]; 61 a[l + k3] = a[k4]; 62 a[k4] = stmp; 63 } 64 65 /* compute multipliers */ 66 67 stmp = -1. / a[k4]; 68 i__2 = 2 - k; 69 aa = &a[1 + k4]; 70 for (ll=0; ll<i__2; ll++) { 71 aa[ll] *= stmp; 72 } 73 74 /* row elimination with column indexing */ 75 76 ax = &a[k4+1]; 77 for (j = kp1; j <= 2; ++j) { 78 j3 = 2*j; 79 stmp = a[l + j3]; 80 if (l != k) { 81 a[l + j3] = a[k + j3]; 82 a[k + j3] = stmp; 83 } 84 85 i__3 = 2 - k; 86 ay = &a[1+k+j3]; 87 for (ll=0; ll<i__3; ll++) { 88 ay[ll] += stmp*ax[ll]; 89 } 90 } 91 /*}*/ 92 ipvt[1] = 2; 93 if (a[6] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1); 94 95 /* 96 Now form the inverse 97 */ 98 99 /* compute inverse(u) */ 100 101 for (k = 1; k <= 2; ++k) { 102 k3 = 2*k; 103 k4 = k3 + k; 104 a[k4] = 1.0 / a[k4]; 105 stmp = -a[k4]; 106 i__2 = k - 1; 107 aa = &a[k3 + 1]; 108 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 109 kp1 = k + 1; 110 if (2 < kp1) continue; 111 ax = aa; 112 for (j = kp1; j <= 2; ++j) { 113 j3 = 2*j; 114 stmp = a[k + j3]; 115 a[k + j3] = 0.0; 116 ay = &a[j3 + 1]; 117 for (ll=0; ll<k; ll++) { 118 ay[ll] += stmp*ax[ll]; 119 } 120 } 121 } 122 123 /* form inverse(u)*inverse(l) */ 124 125 /*for (kb = 1; kb <= 1; ++kb) {*/ 126 127 k = 1; 128 k3 = 2*k; 129 kp1 = k + 1; 130 aa = a + k3; 131 for (i = kp1; i <= 2; ++i) { 132 work[i-1] = aa[i]; 133 aa[i] = 0.0; 134 } 135 for (j = kp1; j <= 2; ++j) { 136 stmp = work[j-1]; 137 ax = &a[2*j + 1]; 138 ay = &a[k3 + 1]; 139 ay[0] += stmp*ax[0]; 140 ay[1] += stmp*ax[1]; 141 } 142 l = ipvt[k-1]; 143 if (l != k) { 144 ax = &a[k3 + 1]; 145 ay = &a[2*l + 1]; 146 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 147 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 148 } 149 PetscFunctionReturn(0); 150 } 151 152 #undef __FUNCT__ 153 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_9" 154 PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift) 155 { 156 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3; 157 PetscInt k4,j3; 158 MatScalar *aa,*ax,*ay,work[81],stmp; 159 MatReal tmp,max; 160 161 /* gaussian elimination with partial pivoting */ 162 163 PetscFunctionBegin; 164 /* Parameter adjustments */ 165 a -= 10; 166 167 for (k = 1; k <= 8; ++k) { 168 kp1 = k + 1; 169 k3 = 9*k; 170 k4 = k3 + k; 171 /* find l = pivot index */ 172 173 i__2 = 10 - k; 174 aa = &a[k4]; 175 max = PetscAbsScalar(aa[0]); 176 l = 1; 177 for (ll=1; ll<i__2; ll++) { 178 tmp = PetscAbsScalar(aa[ll]); 179 if (tmp > max) { max = tmp; l = ll+1;} 180 } 181 l += k - 1; 182 ipvt[k-1] = l; 183 184 if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 185 186 /* interchange if necessary */ 187 188 if (l != k) { 189 stmp = a[l + k3]; 190 a[l + k3] = a[k4]; 191 a[k4] = stmp; 192 } 193 194 /* compute multipliers */ 195 196 stmp = -1. / a[k4]; 197 i__2 = 9 - k; 198 aa = &a[1 + k4]; 199 for (ll=0; ll<i__2; ll++) { 200 aa[ll] *= stmp; 201 } 202 203 /* row elimination with column indexing */ 204 205 ax = &a[k4+1]; 206 for (j = kp1; j <= 9; ++j) { 207 j3 = 9*j; 208 stmp = a[l + j3]; 209 if (l != k) { 210 a[l + j3] = a[k + j3]; 211 a[k + j3] = stmp; 212 } 213 214 i__3 = 9 - k; 215 ay = &a[1+k+j3]; 216 for (ll=0; ll<i__3; ll++) { 217 ay[ll] += stmp*ax[ll]; 218 } 219 } 220 } 221 ipvt[8] = 9; 222 if (a[90] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6); 223 224 /* 225 Now form the inverse 226 */ 227 228 /* compute inverse(u) */ 229 230 for (k = 1; k <= 9; ++k) { 231 k3 = 9*k; 232 k4 = k3 + k; 233 a[k4] = 1.0 / a[k4]; 234 stmp = -a[k4]; 235 i__2 = k - 1; 236 aa = &a[k3 + 1]; 237 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 238 kp1 = k + 1; 239 if (9 < kp1) continue; 240 ax = aa; 241 for (j = kp1; j <= 9; ++j) { 242 j3 = 9*j; 243 stmp = a[k + j3]; 244 a[k + j3] = 0.0; 245 ay = &a[j3 + 1]; 246 for (ll=0; ll<k; ll++) { 247 ay[ll] += stmp*ax[ll]; 248 } 249 } 250 } 251 252 /* form inverse(u)*inverse(l) */ 253 254 for (kb = 1; kb <= 8; ++kb) { 255 k = 9 - kb; 256 k3 = 9*k; 257 kp1 = k + 1; 258 aa = a + k3; 259 for (i = kp1; i <= 9; ++i) { 260 work[i-1] = aa[i]; 261 aa[i] = 0.0; 262 } 263 for (j = kp1; j <= 9; ++j) { 264 stmp = work[j-1]; 265 ax = &a[9*j + 1]; 266 ay = &a[k3 + 1]; 267 ay[0] += stmp*ax[0]; 268 ay[1] += stmp*ax[1]; 269 ay[2] += stmp*ax[2]; 270 ay[3] += stmp*ax[3]; 271 ay[4] += stmp*ax[4]; 272 ay[5] += stmp*ax[5]; 273 ay[6] += stmp*ax[6]; 274 ay[7] += stmp*ax[7]; 275 ay[8] += stmp*ax[8]; 276 } 277 l = ipvt[k-1]; 278 if (l != k) { 279 ax = &a[k3 + 1]; 280 ay = &a[9*l + 1]; 281 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 282 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 283 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 284 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 285 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 286 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 287 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 288 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 289 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 290 } 291 } 292 PetscFunctionReturn(0); 293 } 294 295 /* 296 Inverts 15 by 15 matrix using partial pivoting. 297 298 Used by the sparse factorization routines in 299 src/mat/impls/baij/seq 300 301 This is a combination of the Linpack routines 302 dgefa() and dgedi() specialized for a size of 15. 303 304 */ 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_15" 308 PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift) 309 { 310 PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3; 311 PetscInt k4,j3; 312 MatScalar *aa,*ax,*ay,stmp; 313 MatReal tmp,max; 314 315 /* gaussian elimination with partial pivoting */ 316 317 PetscFunctionBegin; 318 /* Parameter adjustments */ 319 a -= 16; 320 321 for (k = 1; k <= 14; ++k) { 322 kp1 = k + 1; 323 k3 = 15*k; 324 k4 = k3 + k; 325 /* find l = pivot index */ 326 327 i__2 = 16 - k; 328 aa = &a[k4]; 329 max = PetscAbsScalar(aa[0]); 330 l = 1; 331 for (ll=1; ll<i__2; ll++) { 332 tmp = PetscAbsScalar(aa[ll]); 333 if (tmp > max) { max = tmp; l = ll+1;} 334 } 335 l += k - 1; 336 ipvt[k-1] = l; 337 338 if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 339 340 /* interchange if necessary */ 341 342 if (l != k) { 343 stmp = a[l + k3]; 344 a[l + k3] = a[k4]; 345 a[k4] = stmp; 346 } 347 348 /* compute multipliers */ 349 350 stmp = -1. / a[k4]; 351 i__2 = 15 - k; 352 aa = &a[1 + k4]; 353 for (ll=0; ll<i__2; ll++) { 354 aa[ll] *= stmp; 355 } 356 357 /* row elimination with column indexing */ 358 359 ax = &a[k4+1]; 360 for (j = kp1; j <= 15; ++j) { 361 j3 = 15*j; 362 stmp = a[l + j3]; 363 if (l != k) { 364 a[l + j3] = a[k + j3]; 365 a[k + j3] = stmp; 366 } 367 368 i__3 = 15 - k; 369 ay = &a[1+k+j3]; 370 for (ll=0; ll<i__3; ll++) { 371 ay[ll] += stmp*ax[ll]; 372 } 373 } 374 } 375 ipvt[14] = 15; 376 if (a[240] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6); 377 378 /* 379 Now form the inverse 380 */ 381 382 /* compute inverse(u) */ 383 384 for (k = 1; k <= 15; ++k) { 385 k3 = 15*k; 386 k4 = k3 + k; 387 a[k4] = 1.0 / a[k4]; 388 stmp = -a[k4]; 389 i__2 = k - 1; 390 aa = &a[k3 + 1]; 391 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 392 kp1 = k + 1; 393 if (15 < kp1) continue; 394 ax = aa; 395 for (j = kp1; j <= 15; ++j) { 396 j3 = 15*j; 397 stmp = a[k + j3]; 398 a[k + j3] = 0.0; 399 ay = &a[j3 + 1]; 400 for (ll=0; ll<k; ll++) { 401 ay[ll] += stmp*ax[ll]; 402 } 403 } 404 } 405 406 /* form inverse(u)*inverse(l) */ 407 408 for (kb = 1; kb <= 14; ++kb) { 409 k = 15 - kb; 410 k3 = 15*k; 411 kp1 = k + 1; 412 aa = a + k3; 413 for (i = kp1; i <= 15; ++i) { 414 work[i-1] = aa[i]; 415 aa[i] = 0.0; 416 } 417 for (j = kp1; j <= 15; ++j) { 418 stmp = work[j-1]; 419 ax = &a[15*j + 1]; 420 ay = &a[k3 + 1]; 421 ay[0] += stmp*ax[0]; 422 ay[1] += stmp*ax[1]; 423 ay[2] += stmp*ax[2]; 424 ay[3] += stmp*ax[3]; 425 ay[4] += stmp*ax[4]; 426 ay[5] += stmp*ax[5]; 427 ay[6] += stmp*ax[6]; 428 ay[7] += stmp*ax[7]; 429 ay[8] += stmp*ax[8]; 430 ay[9] += stmp*ax[9]; 431 ay[10] += stmp*ax[10]; 432 ay[11] += stmp*ax[11]; 433 ay[12] += stmp*ax[12]; 434 ay[13] += stmp*ax[13]; 435 ay[14] += stmp*ax[14]; 436 } 437 l = ipvt[k-1]; 438 if (l != k) { 439 ax = &a[k3 + 1]; 440 ay = &a[15*l + 1]; 441 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 442 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 443 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 444 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 445 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 446 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 447 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 448 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 449 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 450 stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp; 451 stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp; 452 stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp; 453 stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp; 454 stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp; 455 stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp; 456 } 457 } 458 PetscFunctionReturn(0); 459 } 460