1 #define PETSCMAT_DLL 2 3 /* 4 Inverts 2 by 2 matrix using partial pivoting. 5 6 Used by the sparse factorization routines in 7 src/mat/impls/baij/seq 8 9 10 This is a combination of the Linpack routines 11 dgefa() and dgedi() specialized for a size of 2. 12 13 */ 14 #include "petscsys.h" 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "Kernel_A_gets_inverse_A_2" 18 PetscErrorCode Kernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift) 19 { 20 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3; 21 PetscInt k4,j3; 22 MatScalar *aa,*ax,*ay,work[4],stmp; 23 MatReal tmp,max; 24 25 /* gaussian elimination with partial pivoting */ 26 27 PetscFunctionBegin; 28 shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3])); 29 /* Parameter adjustments */ 30 a -= 3; 31 32 /*for (k = 1; k <= 1; ++k) {*/ 33 k = 1; 34 kp1 = k + 1; 35 k3 = 2*k; 36 k4 = k3 + k; 37 /* find l = pivot index */ 38 39 i__2 = 3 - k; 40 aa = &a[k4]; 41 max = PetscAbsScalar(aa[0]); 42 l = 1; 43 for (ll=1; ll<i__2; ll++) { 44 tmp = PetscAbsScalar(aa[ll]); 45 if (tmp > max) { max = tmp; l = ll+1;} 46 } 47 l += k - 1; 48 ipvt[k-1] = l; 49 50 if (a[l + k3] == 0.0) { 51 if (shift == 0.0) { 52 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 53 } else { 54 a[l + k3] = shift; 55 } 56 } 57 58 /* interchange if necessary */ 59 60 if (l != k) { 61 stmp = a[l + k3]; 62 a[l + k3] = a[k4]; 63 a[k4] = stmp; 64 } 65 66 /* compute multipliers */ 67 68 stmp = -1. / a[k4]; 69 i__2 = 2 - k; 70 aa = &a[1 + k4]; 71 for (ll=0; ll<i__2; ll++) { 72 aa[ll] *= stmp; 73 } 74 75 /* row elimination with column indexing */ 76 77 ax = &a[k4+1]; 78 for (j = kp1; j <= 2; ++j) { 79 j3 = 2*j; 80 stmp = a[l + j3]; 81 if (l != k) { 82 a[l + j3] = a[k + j3]; 83 a[k + j3] = stmp; 84 } 85 86 i__3 = 2 - k; 87 ay = &a[1+k+j3]; 88 for (ll=0; ll<i__3; ll++) { 89 ay[ll] += stmp*ax[ll]; 90 } 91 } 92 /*}*/ 93 ipvt[1] = 2; 94 if (a[6] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1); 95 96 /* 97 Now form the inverse 98 */ 99 100 /* compute inverse(u) */ 101 102 for (k = 1; k <= 2; ++k) { 103 k3 = 2*k; 104 k4 = k3 + k; 105 a[k4] = 1.0 / a[k4]; 106 stmp = -a[k4]; 107 i__2 = k - 1; 108 aa = &a[k3 + 1]; 109 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 110 kp1 = k + 1; 111 if (2 < kp1) continue; 112 ax = aa; 113 for (j = kp1; j <= 2; ++j) { 114 j3 = 2*j; 115 stmp = a[k + j3]; 116 a[k + j3] = 0.0; 117 ay = &a[j3 + 1]; 118 for (ll=0; ll<k; ll++) { 119 ay[ll] += stmp*ax[ll]; 120 } 121 } 122 } 123 124 /* form inverse(u)*inverse(l) */ 125 126 /*for (kb = 1; kb <= 1; ++kb) {*/ 127 128 k = 1; 129 k3 = 2*k; 130 kp1 = k + 1; 131 aa = a + k3; 132 for (i = kp1; i <= 2; ++i) { 133 work[i-1] = aa[i]; 134 aa[i] = 0.0; 135 } 136 for (j = kp1; j <= 2; ++j) { 137 stmp = work[j-1]; 138 ax = &a[2*j + 1]; 139 ay = &a[k3 + 1]; 140 ay[0] += stmp*ax[0]; 141 ay[1] += stmp*ax[1]; 142 } 143 l = ipvt[k-1]; 144 if (l != k) { 145 ax = &a[k3 + 1]; 146 ay = &a[2*l + 1]; 147 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 148 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 149 } 150 151 PetscFunctionReturn(0); 152 } 153 154 #undef __FUNCT__ 155 #define __FUNCT__ "Kernel_A_gets_inverse_A_9" 156 PetscErrorCode Kernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift) 157 { 158 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3; 159 PetscInt k4,j3; 160 MatScalar *aa,*ax,*ay,work[81],stmp; 161 MatReal tmp,max; 162 163 /* gaussian elimination with partial pivoting */ 164 165 PetscFunctionBegin; 166 /* Parameter adjustments */ 167 a -= 10; 168 169 for (k = 1; k <= 8; ++k) { 170 kp1 = k + 1; 171 k3 = 9*k; 172 k4 = k3 + k; 173 /* find l = pivot index */ 174 175 i__2 = 10 - k; 176 aa = &a[k4]; 177 max = PetscAbsScalar(aa[0]); 178 l = 1; 179 for (ll=1; ll<i__2; ll++) { 180 tmp = PetscAbsScalar(aa[ll]); 181 if (tmp > max) { max = tmp; l = ll+1;} 182 } 183 l += k - 1; 184 ipvt[k-1] = l; 185 186 if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 187 188 /* interchange if necessary */ 189 190 if (l != k) { 191 stmp = a[l + k3]; 192 a[l + k3] = a[k4]; 193 a[k4] = stmp; 194 } 195 196 /* compute multipliers */ 197 198 stmp = -1. / a[k4]; 199 i__2 = 9 - k; 200 aa = &a[1 + k4]; 201 for (ll=0; ll<i__2; ll++) { 202 aa[ll] *= stmp; 203 } 204 205 /* row elimination with column indexing */ 206 207 ax = &a[k4+1]; 208 for (j = kp1; j <= 9; ++j) { 209 j3 = 9*j; 210 stmp = a[l + j3]; 211 if (l != k) { 212 a[l + j3] = a[k + j3]; 213 a[k + j3] = stmp; 214 } 215 216 i__3 = 9 - k; 217 ay = &a[1+k+j3]; 218 for (ll=0; ll<i__3; ll++) { 219 ay[ll] += stmp*ax[ll]; 220 } 221 } 222 } 223 ipvt[8] = 9; 224 if (a[90] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6); 225 226 /* 227 Now form the inverse 228 */ 229 230 /* compute inverse(u) */ 231 232 for (k = 1; k <= 9; ++k) { 233 k3 = 9*k; 234 k4 = k3 + k; 235 a[k4] = 1.0 / a[k4]; 236 stmp = -a[k4]; 237 i__2 = k - 1; 238 aa = &a[k3 + 1]; 239 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 240 kp1 = k + 1; 241 if (9 < kp1) continue; 242 ax = aa; 243 for (j = kp1; j <= 9; ++j) { 244 j3 = 9*j; 245 stmp = a[k + j3]; 246 a[k + j3] = 0.0; 247 ay = &a[j3 + 1]; 248 for (ll=0; ll<k; ll++) { 249 ay[ll] += stmp*ax[ll]; 250 } 251 } 252 } 253 254 /* form inverse(u)*inverse(l) */ 255 256 for (kb = 1; kb <= 8; ++kb) { 257 k = 9 - kb; 258 k3 = 9*k; 259 kp1 = k + 1; 260 aa = a + k3; 261 for (i = kp1; i <= 9; ++i) { 262 work[i-1] = aa[i]; 263 aa[i] = 0.0; 264 } 265 for (j = kp1; j <= 9; ++j) { 266 stmp = work[j-1]; 267 ax = &a[9*j + 1]; 268 ay = &a[k3 + 1]; 269 ay[0] += stmp*ax[0]; 270 ay[1] += stmp*ax[1]; 271 ay[2] += stmp*ax[2]; 272 ay[3] += stmp*ax[3]; 273 ay[4] += stmp*ax[4]; 274 ay[5] += stmp*ax[5]; 275 ay[6] += stmp*ax[6]; 276 ay[7] += stmp*ax[7]; 277 ay[8] += stmp*ax[8]; 278 } 279 l = ipvt[k-1]; 280 if (l != k) { 281 ax = &a[k3 + 1]; 282 ay = &a[9*l + 1]; 283 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 284 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 285 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 286 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 287 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 288 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 289 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 290 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 291 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 292 } 293 } 294 PetscFunctionReturn(0); 295 } 296 297 /* 298 Inverts 15 by 15 matrix using partial pivoting. 299 300 Used by the sparse factorization routines in 301 src/mat/impls/baij/seq 302 303 This is a combination of the Linpack routines 304 dgefa() and dgedi() specialized for a size of 15. 305 306 */ 307 308 #undef __FUNCT__ 309 #define __FUNCT__ "Kernel_A_gets_inverse_A_15" 310 PetscErrorCode Kernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift) 311 { 312 PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3; 313 PetscInt k4,j3; 314 MatScalar *aa,*ax,*ay,stmp; 315 MatReal tmp,max; 316 317 /* gaussian elimination with partial pivoting */ 318 319 PetscFunctionBegin; 320 /* Parameter adjustments */ 321 a -= 16; 322 323 for (k = 1; k <= 14; ++k) { 324 kp1 = k + 1; 325 k3 = 15*k; 326 k4 = k3 + k; 327 /* find l = pivot index */ 328 329 i__2 = 16 - k; 330 aa = &a[k4]; 331 max = PetscAbsScalar(aa[0]); 332 l = 1; 333 for (ll=1; ll<i__2; ll++) { 334 tmp = PetscAbsScalar(aa[ll]); 335 if (tmp > max) { max = tmp; l = ll+1;} 336 } 337 l += k - 1; 338 ipvt[k-1] = l; 339 340 if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 341 342 /* interchange if necessary */ 343 344 if (l != k) { 345 stmp = a[l + k3]; 346 a[l + k3] = a[k4]; 347 a[k4] = stmp; 348 } 349 350 /* compute multipliers */ 351 352 stmp = -1. / a[k4]; 353 i__2 = 15 - k; 354 aa = &a[1 + k4]; 355 for (ll=0; ll<i__2; ll++) { 356 aa[ll] *= stmp; 357 } 358 359 /* row elimination with column indexing */ 360 361 ax = &a[k4+1]; 362 for (j = kp1; j <= 15; ++j) { 363 j3 = 15*j; 364 stmp = a[l + j3]; 365 if (l != k) { 366 a[l + j3] = a[k + j3]; 367 a[k + j3] = stmp; 368 } 369 370 i__3 = 15 - k; 371 ay = &a[1+k+j3]; 372 for (ll=0; ll<i__3; ll++) { 373 ay[ll] += stmp*ax[ll]; 374 } 375 } 376 } 377 ipvt[14] = 15; 378 if (a[240] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6); 379 380 /* 381 Now form the inverse 382 */ 383 384 /* compute inverse(u) */ 385 386 for (k = 1; k <= 15; ++k) { 387 k3 = 15*k; 388 k4 = k3 + k; 389 a[k4] = 1.0 / a[k4]; 390 stmp = -a[k4]; 391 i__2 = k - 1; 392 aa = &a[k3 + 1]; 393 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 394 kp1 = k + 1; 395 if (15 < kp1) continue; 396 ax = aa; 397 for (j = kp1; j <= 15; ++j) { 398 j3 = 15*j; 399 stmp = a[k + j3]; 400 a[k + j3] = 0.0; 401 ay = &a[j3 + 1]; 402 for (ll=0; ll<k; ll++) { 403 ay[ll] += stmp*ax[ll]; 404 } 405 } 406 } 407 408 /* form inverse(u)*inverse(l) */ 409 410 for (kb = 1; kb <= 14; ++kb) { 411 k = 15 - kb; 412 k3 = 15*k; 413 kp1 = k + 1; 414 aa = a + k3; 415 for (i = kp1; i <= 15; ++i) { 416 work[i-1] = aa[i]; 417 aa[i] = 0.0; 418 } 419 for (j = kp1; j <= 15; ++j) { 420 stmp = work[j-1]; 421 ax = &a[15*j + 1]; 422 ay = &a[k3 + 1]; 423 ay[0] += stmp*ax[0]; 424 ay[1] += stmp*ax[1]; 425 ay[2] += stmp*ax[2]; 426 ay[3] += stmp*ax[3]; 427 ay[4] += stmp*ax[4]; 428 ay[5] += stmp*ax[5]; 429 ay[6] += stmp*ax[6]; 430 ay[7] += stmp*ax[7]; 431 ay[8] += stmp*ax[8]; 432 ay[9] += stmp*ax[9]; 433 ay[10] += stmp*ax[10]; 434 ay[11] += stmp*ax[11]; 435 ay[12] += stmp*ax[12]; 436 ay[13] += stmp*ax[13]; 437 ay[14] += stmp*ax[14]; 438 } 439 l = ipvt[k-1]; 440 if (l != k) { 441 ax = &a[k3 + 1]; 442 ay = &a[15*l + 1]; 443 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 444 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 445 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 446 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 447 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 448 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 449 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 450 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 451 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 452 stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp; 453 stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp; 454 stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp; 455 stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp; 456 stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp; 457 stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp; 458 } 459 } 460 PetscFunctionReturn(0); 461 } 462