1 #define PETSCMAT_DLL 2 3 /* 4 Inverts 2 by 2 matrix using partial pivoting. 5 6 Used by the sparse factorization routines in 7 src/mat/impls/baij/seq 8 9 10 This is a combination of the Linpack routines 11 dgefa() and dgedi() specialized for a size of 2. 12 13 */ 14 #include "petscsys.h" 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "Kernel_A_gets_inverse_A_2" 18 PetscErrorCode Kernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift) 19 { 20 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3; 21 PetscInt k4,j3; 22 MatScalar *aa,*ax,*ay,work[4],stmp; 23 MatReal tmp,max; 24 25 /* gaussian elimination with partial pivoting */ 26 27 PetscFunctionBegin; 28 /* Parameter adjustments */ 29 a -= 3; 30 31 /*for (k = 1; k <= 1; ++k) {*/ 32 k = 1; 33 kp1 = k + 1; 34 k3 = 2*k; 35 k4 = k3 + k; 36 /* find l = pivot index */ 37 38 i__2 = 3 - k; 39 aa = &a[k4]; 40 max = PetscAbsScalar(aa[0]); 41 l = 1; 42 for (ll=1; ll<i__2; ll++) { 43 tmp = PetscAbsScalar(aa[ll]); 44 if (tmp > max) { max = tmp; l = ll+1;} 45 } 46 l += k - 1; 47 ipvt[k-1] = l; 48 49 if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 50 51 /* interchange if necessary */ 52 53 if (l != k) { 54 stmp = a[l + k3]; 55 a[l + k3] = a[k4]; 56 a[k4] = stmp; 57 } 58 59 /* compute multipliers */ 60 61 stmp = -1. / a[k4]; 62 i__2 = 2 - k; 63 aa = &a[1 + k4]; 64 for (ll=0; ll<i__2; ll++) { 65 aa[ll] *= stmp; 66 } 67 68 /* row elimination with column indexing */ 69 70 ax = &a[k4+1]; 71 for (j = kp1; j <= 2; ++j) { 72 j3 = 2*j; 73 stmp = a[l + j3]; 74 if (l != k) { 75 a[l + j3] = a[k + j3]; 76 a[k + j3] = stmp; 77 } 78 79 i__3 = 2 - k; 80 ay = &a[1+k+j3]; 81 for (ll=0; ll<i__3; ll++) { 82 ay[ll] += stmp*ax[ll]; 83 } 84 } 85 /*}*/ 86 ipvt[1] = 2; 87 if (a[6] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1); 88 89 /* 90 Now form the inverse 91 */ 92 93 /* compute inverse(u) */ 94 95 for (k = 1; k <= 2; ++k) { 96 k3 = 2*k; 97 k4 = k3 + k; 98 a[k4] = 1.0 / a[k4]; 99 stmp = -a[k4]; 100 i__2 = k - 1; 101 aa = &a[k3 + 1]; 102 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 103 kp1 = k + 1; 104 if (2 < kp1) continue; 105 ax = aa; 106 for (j = kp1; j <= 2; ++j) { 107 j3 = 2*j; 108 stmp = a[k + j3]; 109 a[k + j3] = 0.0; 110 ay = &a[j3 + 1]; 111 for (ll=0; ll<k; ll++) { 112 ay[ll] += stmp*ax[ll]; 113 } 114 } 115 } 116 117 /* form inverse(u)*inverse(l) */ 118 119 /*for (kb = 1; kb <= 1; ++kb) {*/ 120 121 k = 1; 122 k3 = 2*k; 123 kp1 = k + 1; 124 aa = a + k3; 125 for (i = kp1; i <= 2; ++i) { 126 work[i-1] = aa[i]; 127 aa[i] = 0.0; 128 } 129 for (j = kp1; j <= 2; ++j) { 130 stmp = work[j-1]; 131 ax = &a[2*j + 1]; 132 ay = &a[k3 + 1]; 133 ay[0] += stmp*ax[0]; 134 ay[1] += stmp*ax[1]; 135 } 136 l = ipvt[k-1]; 137 if (l != k) { 138 ax = &a[k3 + 1]; 139 ay = &a[2*l + 1]; 140 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 141 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 142 } 143 144 PetscFunctionReturn(0); 145 } 146 147 #undef __FUNCT__ 148 #define __FUNCT__ "Kernel_A_gets_inverse_A_9" 149 PetscErrorCode Kernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift) 150 { 151 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3; 152 PetscInt k4,j3; 153 MatScalar *aa,*ax,*ay,work[81],stmp; 154 MatReal tmp,max; 155 156 /* gaussian elimination with partial pivoting */ 157 158 PetscFunctionBegin; 159 /* Parameter adjustments */ 160 a -= 10; 161 162 for (k = 1; k <= 8; ++k) { 163 kp1 = k + 1; 164 k3 = 9*k; 165 k4 = k3 + k; 166 /* find l = pivot index */ 167 168 i__2 = 10 - k; 169 aa = &a[k4]; 170 max = PetscAbsScalar(aa[0]); 171 l = 1; 172 for (ll=1; ll<i__2; ll++) { 173 tmp = PetscAbsScalar(aa[ll]); 174 if (tmp > max) { max = tmp; l = ll+1;} 175 } 176 l += k - 1; 177 ipvt[k-1] = l; 178 179 if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 180 181 /* interchange if necessary */ 182 183 if (l != k) { 184 stmp = a[l + k3]; 185 a[l + k3] = a[k4]; 186 a[k4] = stmp; 187 } 188 189 /* compute multipliers */ 190 191 stmp = -1. / a[k4]; 192 i__2 = 9 - k; 193 aa = &a[1 + k4]; 194 for (ll=0; ll<i__2; ll++) { 195 aa[ll] *= stmp; 196 } 197 198 /* row elimination with column indexing */ 199 200 ax = &a[k4+1]; 201 for (j = kp1; j <= 9; ++j) { 202 j3 = 9*j; 203 stmp = a[l + j3]; 204 if (l != k) { 205 a[l + j3] = a[k + j3]; 206 a[k + j3] = stmp; 207 } 208 209 i__3 = 9 - k; 210 ay = &a[1+k+j3]; 211 for (ll=0; ll<i__3; ll++) { 212 ay[ll] += stmp*ax[ll]; 213 } 214 } 215 } 216 ipvt[8] = 9; 217 if (a[90] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6); 218 219 /* 220 Now form the inverse 221 */ 222 223 /* compute inverse(u) */ 224 225 for (k = 1; k <= 9; ++k) { 226 k3 = 9*k; 227 k4 = k3 + k; 228 a[k4] = 1.0 / a[k4]; 229 stmp = -a[k4]; 230 i__2 = k - 1; 231 aa = &a[k3 + 1]; 232 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 233 kp1 = k + 1; 234 if (9 < kp1) continue; 235 ax = aa; 236 for (j = kp1; j <= 9; ++j) { 237 j3 = 9*j; 238 stmp = a[k + j3]; 239 a[k + j3] = 0.0; 240 ay = &a[j3 + 1]; 241 for (ll=0; ll<k; ll++) { 242 ay[ll] += stmp*ax[ll]; 243 } 244 } 245 } 246 247 /* form inverse(u)*inverse(l) */ 248 249 for (kb = 1; kb <= 8; ++kb) { 250 k = 9 - kb; 251 k3 = 9*k; 252 kp1 = k + 1; 253 aa = a + k3; 254 for (i = kp1; i <= 9; ++i) { 255 work[i-1] = aa[i]; 256 aa[i] = 0.0; 257 } 258 for (j = kp1; j <= 9; ++j) { 259 stmp = work[j-1]; 260 ax = &a[9*j + 1]; 261 ay = &a[k3 + 1]; 262 ay[0] += stmp*ax[0]; 263 ay[1] += stmp*ax[1]; 264 ay[2] += stmp*ax[2]; 265 ay[3] += stmp*ax[3]; 266 ay[4] += stmp*ax[4]; 267 ay[5] += stmp*ax[5]; 268 ay[6] += stmp*ax[6]; 269 ay[7] += stmp*ax[7]; 270 ay[8] += stmp*ax[8]; 271 } 272 l = ipvt[k-1]; 273 if (l != k) { 274 ax = &a[k3 + 1]; 275 ay = &a[9*l + 1]; 276 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 277 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 278 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 279 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 280 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 281 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 282 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 283 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 284 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 285 } 286 } 287 PetscFunctionReturn(0); 288 } 289 290 /* 291 Inverts 15 by 15 matrix using partial pivoting. 292 293 Used by the sparse factorization routines in 294 src/mat/impls/baij/seq 295 296 This is a combination of the Linpack routines 297 dgefa() and dgedi() specialized for a size of 15. 298 299 */ 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "Kernel_A_gets_inverse_A_15" 303 PetscErrorCode Kernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift) 304 { 305 PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3; 306 PetscInt k4,j3; 307 MatScalar *aa,*ax,*ay,stmp; 308 MatReal tmp,max; 309 310 /* gaussian elimination with partial pivoting */ 311 312 PetscFunctionBegin; 313 /* Parameter adjustments */ 314 a -= 16; 315 316 for (k = 1; k <= 14; ++k) { 317 kp1 = k + 1; 318 k3 = 15*k; 319 k4 = k3 + k; 320 /* find l = pivot index */ 321 322 i__2 = 16 - k; 323 aa = &a[k4]; 324 max = PetscAbsScalar(aa[0]); 325 l = 1; 326 for (ll=1; ll<i__2; ll++) { 327 tmp = PetscAbsScalar(aa[ll]); 328 if (tmp > max) { max = tmp; l = ll+1;} 329 } 330 l += k - 1; 331 ipvt[k-1] = l; 332 333 if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 334 335 /* interchange if necessary */ 336 337 if (l != k) { 338 stmp = a[l + k3]; 339 a[l + k3] = a[k4]; 340 a[k4] = stmp; 341 } 342 343 /* compute multipliers */ 344 345 stmp = -1. / a[k4]; 346 i__2 = 15 - k; 347 aa = &a[1 + k4]; 348 for (ll=0; ll<i__2; ll++) { 349 aa[ll] *= stmp; 350 } 351 352 /* row elimination with column indexing */ 353 354 ax = &a[k4+1]; 355 for (j = kp1; j <= 15; ++j) { 356 j3 = 15*j; 357 stmp = a[l + j3]; 358 if (l != k) { 359 a[l + j3] = a[k + j3]; 360 a[k + j3] = stmp; 361 } 362 363 i__3 = 15 - k; 364 ay = &a[1+k+j3]; 365 for (ll=0; ll<i__3; ll++) { 366 ay[ll] += stmp*ax[ll]; 367 } 368 } 369 } 370 ipvt[14] = 15; 371 if (a[240] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6); 372 373 /* 374 Now form the inverse 375 */ 376 377 /* compute inverse(u) */ 378 379 for (k = 1; k <= 15; ++k) { 380 k3 = 15*k; 381 k4 = k3 + k; 382 a[k4] = 1.0 / a[k4]; 383 stmp = -a[k4]; 384 i__2 = k - 1; 385 aa = &a[k3 + 1]; 386 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 387 kp1 = k + 1; 388 if (15 < kp1) continue; 389 ax = aa; 390 for (j = kp1; j <= 15; ++j) { 391 j3 = 15*j; 392 stmp = a[k + j3]; 393 a[k + j3] = 0.0; 394 ay = &a[j3 + 1]; 395 for (ll=0; ll<k; ll++) { 396 ay[ll] += stmp*ax[ll]; 397 } 398 } 399 } 400 401 /* form inverse(u)*inverse(l) */ 402 403 for (kb = 1; kb <= 14; ++kb) { 404 k = 15 - kb; 405 k3 = 15*k; 406 kp1 = k + 1; 407 aa = a + k3; 408 for (i = kp1; i <= 15; ++i) { 409 work[i-1] = aa[i]; 410 aa[i] = 0.0; 411 } 412 for (j = kp1; j <= 15; ++j) { 413 stmp = work[j-1]; 414 ax = &a[15*j + 1]; 415 ay = &a[k3 + 1]; 416 ay[0] += stmp*ax[0]; 417 ay[1] += stmp*ax[1]; 418 ay[2] += stmp*ax[2]; 419 ay[3] += stmp*ax[3]; 420 ay[4] += stmp*ax[4]; 421 ay[5] += stmp*ax[5]; 422 ay[6] += stmp*ax[6]; 423 ay[7] += stmp*ax[7]; 424 ay[8] += stmp*ax[8]; 425 ay[9] += stmp*ax[9]; 426 ay[10] += stmp*ax[10]; 427 ay[11] += stmp*ax[11]; 428 ay[12] += stmp*ax[12]; 429 ay[13] += stmp*ax[13]; 430 ay[14] += stmp*ax[14]; 431 } 432 l = ipvt[k-1]; 433 if (l != k) { 434 ax = &a[k3 + 1]; 435 ay = &a[15*l + 1]; 436 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 437 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 438 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 439 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 440 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 441 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 442 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 443 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 444 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 445 stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp; 446 stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp; 447 stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp; 448 stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp; 449 stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp; 450 stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp; 451 } 452 } 453 PetscFunctionReturn(0); 454 } 455