1 2 /* 3 Inverts 2 by 2 matrix using partial pivoting. 4 5 Used by the sparse factorization routines in 6 src/mat/impls/baij/seq 7 8 9 This is a combination of the Linpack routines 10 dgefa() and dgedi() specialized for a size of 2. 11 12 */ 13 #include <petscsys.h> 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_2" 17 PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift) 18 { 19 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3; 20 PetscInt k4,j3; 21 MatScalar *aa,*ax,*ay,work[4],stmp; 22 MatReal tmp,max; 23 24 /* gaussian elimination with partial pivoting */ 25 26 PetscFunctionBegin; 27 shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3])); 28 /* Parameter adjustments */ 29 a -= 3; 30 31 /*for (k = 1; k <= 1; ++k) {*/ 32 k = 1; 33 kp1 = k + 1; 34 k3 = 2*k; 35 k4 = k3 + k; 36 /* find l = pivot index */ 37 38 i__2 = 3 - k; 39 aa = &a[k4]; 40 max = PetscAbsScalar(aa[0]); 41 l = 1; 42 for (ll=1; ll<i__2; ll++) { 43 tmp = PetscAbsScalar(aa[ll]); 44 if (tmp > max) { max = tmp; l = ll+1;} 45 } 46 l += k - 1; 47 ipvt[k-1] = l; 48 49 if (a[l + k3] == 0.0) { 50 if (shift == 0.0) { 51 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 52 } else { 53 a[l + k3] = shift; 54 } 55 } 56 57 /* interchange if necessary */ 58 59 if (l != k) { 60 stmp = a[l + k3]; 61 a[l + k3] = a[k4]; 62 a[k4] = stmp; 63 } 64 65 /* compute multipliers */ 66 67 stmp = -1. / a[k4]; 68 i__2 = 2 - k; 69 aa = &a[1 + k4]; 70 for (ll=0; ll<i__2; ll++) { 71 aa[ll] *= stmp; 72 } 73 74 /* row elimination with column indexing */ 75 76 ax = &a[k4+1]; 77 for (j = kp1; j <= 2; ++j) { 78 j3 = 2*j; 79 stmp = a[l + j3]; 80 if (l != k) { 81 a[l + j3] = a[k + j3]; 82 a[k + j3] = stmp; 83 } 84 85 i__3 = 2 - k; 86 ay = &a[1+k+j3]; 87 for (ll=0; ll<i__3; ll++) { 88 ay[ll] += stmp*ax[ll]; 89 } 90 } 91 /*}*/ 92 ipvt[1] = 2; 93 if (a[6] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1); 94 95 /* 96 Now form the inverse 97 */ 98 99 /* compute inverse(u) */ 100 101 for (k = 1; k <= 2; ++k) { 102 k3 = 2*k; 103 k4 = k3 + k; 104 a[k4] = 1.0 / a[k4]; 105 stmp = -a[k4]; 106 i__2 = k - 1; 107 aa = &a[k3 + 1]; 108 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 109 kp1 = k + 1; 110 if (2 < kp1) continue; 111 ax = aa; 112 for (j = kp1; j <= 2; ++j) { 113 j3 = 2*j; 114 stmp = a[k + j3]; 115 a[k + j3] = 0.0; 116 ay = &a[j3 + 1]; 117 for (ll=0; ll<k; ll++) { 118 ay[ll] += stmp*ax[ll]; 119 } 120 } 121 } 122 123 /* form inverse(u)*inverse(l) */ 124 125 /*for (kb = 1; kb <= 1; ++kb) {*/ 126 127 k = 1; 128 k3 = 2*k; 129 kp1 = k + 1; 130 aa = a + k3; 131 for (i = kp1; i <= 2; ++i) { 132 work[i-1] = aa[i]; 133 aa[i] = 0.0; 134 } 135 for (j = kp1; j <= 2; ++j) { 136 stmp = work[j-1]; 137 ax = &a[2*j + 1]; 138 ay = &a[k3 + 1]; 139 ay[0] += stmp*ax[0]; 140 ay[1] += stmp*ax[1]; 141 } 142 l = ipvt[k-1]; 143 if (l != k) { 144 ax = &a[k3 + 1]; 145 ay = &a[2*l + 1]; 146 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 147 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 148 } 149 150 PetscFunctionReturn(0); 151 } 152 153 #undef __FUNCT__ 154 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_9" 155 PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift) 156 { 157 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3; 158 PetscInt k4,j3; 159 MatScalar *aa,*ax,*ay,work[81],stmp; 160 MatReal tmp,max; 161 162 /* gaussian elimination with partial pivoting */ 163 164 PetscFunctionBegin; 165 /* Parameter adjustments */ 166 a -= 10; 167 168 for (k = 1; k <= 8; ++k) { 169 kp1 = k + 1; 170 k3 = 9*k; 171 k4 = k3 + k; 172 /* find l = pivot index */ 173 174 i__2 = 10 - k; 175 aa = &a[k4]; 176 max = PetscAbsScalar(aa[0]); 177 l = 1; 178 for (ll=1; ll<i__2; ll++) { 179 tmp = PetscAbsScalar(aa[ll]); 180 if (tmp > max) { max = tmp; l = ll+1;} 181 } 182 l += k - 1; 183 ipvt[k-1] = l; 184 185 if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 186 187 /* interchange if necessary */ 188 189 if (l != k) { 190 stmp = a[l + k3]; 191 a[l + k3] = a[k4]; 192 a[k4] = stmp; 193 } 194 195 /* compute multipliers */ 196 197 stmp = -1. / a[k4]; 198 i__2 = 9 - k; 199 aa = &a[1 + k4]; 200 for (ll=0; ll<i__2; ll++) { 201 aa[ll] *= stmp; 202 } 203 204 /* row elimination with column indexing */ 205 206 ax = &a[k4+1]; 207 for (j = kp1; j <= 9; ++j) { 208 j3 = 9*j; 209 stmp = a[l + j3]; 210 if (l != k) { 211 a[l + j3] = a[k + j3]; 212 a[k + j3] = stmp; 213 } 214 215 i__3 = 9 - k; 216 ay = &a[1+k+j3]; 217 for (ll=0; ll<i__3; ll++) { 218 ay[ll] += stmp*ax[ll]; 219 } 220 } 221 } 222 ipvt[8] = 9; 223 if (a[90] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6); 224 225 /* 226 Now form the inverse 227 */ 228 229 /* compute inverse(u) */ 230 231 for (k = 1; k <= 9; ++k) { 232 k3 = 9*k; 233 k4 = k3 + k; 234 a[k4] = 1.0 / a[k4]; 235 stmp = -a[k4]; 236 i__2 = k - 1; 237 aa = &a[k3 + 1]; 238 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 239 kp1 = k + 1; 240 if (9 < kp1) continue; 241 ax = aa; 242 for (j = kp1; j <= 9; ++j) { 243 j3 = 9*j; 244 stmp = a[k + j3]; 245 a[k + j3] = 0.0; 246 ay = &a[j3 + 1]; 247 for (ll=0; ll<k; ll++) { 248 ay[ll] += stmp*ax[ll]; 249 } 250 } 251 } 252 253 /* form inverse(u)*inverse(l) */ 254 255 for (kb = 1; kb <= 8; ++kb) { 256 k = 9 - kb; 257 k3 = 9*k; 258 kp1 = k + 1; 259 aa = a + k3; 260 for (i = kp1; i <= 9; ++i) { 261 work[i-1] = aa[i]; 262 aa[i] = 0.0; 263 } 264 for (j = kp1; j <= 9; ++j) { 265 stmp = work[j-1]; 266 ax = &a[9*j + 1]; 267 ay = &a[k3 + 1]; 268 ay[0] += stmp*ax[0]; 269 ay[1] += stmp*ax[1]; 270 ay[2] += stmp*ax[2]; 271 ay[3] += stmp*ax[3]; 272 ay[4] += stmp*ax[4]; 273 ay[5] += stmp*ax[5]; 274 ay[6] += stmp*ax[6]; 275 ay[7] += stmp*ax[7]; 276 ay[8] += stmp*ax[8]; 277 } 278 l = ipvt[k-1]; 279 if (l != k) { 280 ax = &a[k3 + 1]; 281 ay = &a[9*l + 1]; 282 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 283 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 284 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 285 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 286 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 287 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 288 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 289 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 290 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 291 } 292 } 293 PetscFunctionReturn(0); 294 } 295 296 /* 297 Inverts 15 by 15 matrix using partial pivoting. 298 299 Used by the sparse factorization routines in 300 src/mat/impls/baij/seq 301 302 This is a combination of the Linpack routines 303 dgefa() and dgedi() specialized for a size of 15. 304 305 */ 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_15" 309 PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift) 310 { 311 PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3; 312 PetscInt k4,j3; 313 MatScalar *aa,*ax,*ay,stmp; 314 MatReal tmp,max; 315 316 /* gaussian elimination with partial pivoting */ 317 318 PetscFunctionBegin; 319 /* Parameter adjustments */ 320 a -= 16; 321 322 for (k = 1; k <= 14; ++k) { 323 kp1 = k + 1; 324 k3 = 15*k; 325 k4 = k3 + k; 326 /* find l = pivot index */ 327 328 i__2 = 16 - k; 329 aa = &a[k4]; 330 max = PetscAbsScalar(aa[0]); 331 l = 1; 332 for (ll=1; ll<i__2; ll++) { 333 tmp = PetscAbsScalar(aa[ll]); 334 if (tmp > max) { max = tmp; l = ll+1;} 335 } 336 l += k - 1; 337 ipvt[k-1] = l; 338 339 if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 340 341 /* interchange if necessary */ 342 343 if (l != k) { 344 stmp = a[l + k3]; 345 a[l + k3] = a[k4]; 346 a[k4] = stmp; 347 } 348 349 /* compute multipliers */ 350 351 stmp = -1. / a[k4]; 352 i__2 = 15 - k; 353 aa = &a[1 + k4]; 354 for (ll=0; ll<i__2; ll++) { 355 aa[ll] *= stmp; 356 } 357 358 /* row elimination with column indexing */ 359 360 ax = &a[k4+1]; 361 for (j = kp1; j <= 15; ++j) { 362 j3 = 15*j; 363 stmp = a[l + j3]; 364 if (l != k) { 365 a[l + j3] = a[k + j3]; 366 a[k + j3] = stmp; 367 } 368 369 i__3 = 15 - k; 370 ay = &a[1+k+j3]; 371 for (ll=0; ll<i__3; ll++) { 372 ay[ll] += stmp*ax[ll]; 373 } 374 } 375 } 376 ipvt[14] = 15; 377 if (a[240] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6); 378 379 /* 380 Now form the inverse 381 */ 382 383 /* compute inverse(u) */ 384 385 for (k = 1; k <= 15; ++k) { 386 k3 = 15*k; 387 k4 = k3 + k; 388 a[k4] = 1.0 / a[k4]; 389 stmp = -a[k4]; 390 i__2 = k - 1; 391 aa = &a[k3 + 1]; 392 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 393 kp1 = k + 1; 394 if (15 < kp1) continue; 395 ax = aa; 396 for (j = kp1; j <= 15; ++j) { 397 j3 = 15*j; 398 stmp = a[k + j3]; 399 a[k + j3] = 0.0; 400 ay = &a[j3 + 1]; 401 for (ll=0; ll<k; ll++) { 402 ay[ll] += stmp*ax[ll]; 403 } 404 } 405 } 406 407 /* form inverse(u)*inverse(l) */ 408 409 for (kb = 1; kb <= 14; ++kb) { 410 k = 15 - kb; 411 k3 = 15*k; 412 kp1 = k + 1; 413 aa = a + k3; 414 for (i = kp1; i <= 15; ++i) { 415 work[i-1] = aa[i]; 416 aa[i] = 0.0; 417 } 418 for (j = kp1; j <= 15; ++j) { 419 stmp = work[j-1]; 420 ax = &a[15*j + 1]; 421 ay = &a[k3 + 1]; 422 ay[0] += stmp*ax[0]; 423 ay[1] += stmp*ax[1]; 424 ay[2] += stmp*ax[2]; 425 ay[3] += stmp*ax[3]; 426 ay[4] += stmp*ax[4]; 427 ay[5] += stmp*ax[5]; 428 ay[6] += stmp*ax[6]; 429 ay[7] += stmp*ax[7]; 430 ay[8] += stmp*ax[8]; 431 ay[9] += stmp*ax[9]; 432 ay[10] += stmp*ax[10]; 433 ay[11] += stmp*ax[11]; 434 ay[12] += stmp*ax[12]; 435 ay[13] += stmp*ax[13]; 436 ay[14] += stmp*ax[14]; 437 } 438 l = ipvt[k-1]; 439 if (l != k) { 440 ax = &a[k3 + 1]; 441 ay = &a[15*l + 1]; 442 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 443 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 444 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 445 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 446 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 447 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 448 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 449 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 450 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 451 stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp; 452 stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp; 453 stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp; 454 stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp; 455 stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp; 456 stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp; 457 } 458 } 459 PetscFunctionReturn(0); 460 } 461