1 2 /* 3 Inverts 2 by 2 matrix using partial pivoting. 4 5 Used by the sparse factorization routines in 6 src/mat/impls/baij/seq 7 8 9 This is a combination of the Linpack routines 10 dgefa() and dgedi() specialized for a size of 2. 11 12 */ 13 #include <petscsys.h> 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_2" 17 PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift) 18 { 19 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3; 20 PetscInt k4,j3; 21 MatScalar *aa,*ax,*ay,work[4],stmp; 22 MatReal tmp,max; 23 24 /* gaussian elimination with partial pivoting */ 25 26 PetscFunctionBegin; 27 shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3])); 28 /* Parameter adjustments */ 29 a -= 3; 30 31 /*for (k = 1; k <= 1; ++k) {*/ 32 k = 1; 33 kp1 = k + 1; 34 k3 = 2*k; 35 k4 = k3 + k; 36 /* find l = pivot index */ 37 38 i__2 = 3 - k; 39 aa = &a[k4]; 40 max = PetscAbsScalar(aa[0]); 41 l = 1; 42 for (ll=1; ll<i__2; ll++) { 43 tmp = PetscAbsScalar(aa[ll]); 44 if (tmp > max) { max = tmp; l = ll+1;} 45 } 46 l += k - 1; 47 ipvt[k-1] = l; 48 49 if (a[l + k3] == 0.0) { 50 if (shift == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 51 else { 52 a[l + k3] = shift; 53 } 54 } 55 56 /* interchange if necessary */ 57 58 if (l != k) { 59 stmp = a[l + k3]; 60 a[l + k3] = a[k4]; 61 a[k4] = stmp; 62 } 63 64 /* compute multipliers */ 65 66 stmp = -1. / a[k4]; 67 i__2 = 2 - k; 68 aa = &a[1 + k4]; 69 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 70 71 /* row elimination with column indexing */ 72 73 ax = &a[k4+1]; 74 for (j = kp1; j <= 2; ++j) { 75 j3 = 2*j; 76 stmp = a[l + j3]; 77 if (l != k) { 78 a[l + j3] = a[k + j3]; 79 a[k + j3] = stmp; 80 } 81 82 i__3 = 2 - k; 83 ay = &a[1+k+j3]; 84 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 85 } 86 87 /*}*/ 88 ipvt[1] = 2; 89 if (a[6] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1); 90 91 /* 92 Now form the inverse 93 */ 94 95 /* compute inverse(u) */ 96 97 for (k = 1; k <= 2; ++k) { 98 k3 = 2*k; 99 k4 = k3 + k; 100 a[k4] = 1.0 / a[k4]; 101 stmp = -a[k4]; 102 i__2 = k - 1; 103 aa = &a[k3 + 1]; 104 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 105 kp1 = k + 1; 106 if (2 < kp1) continue; 107 ax = aa; 108 for (j = kp1; j <= 2; ++j) { 109 j3 = 2*j; 110 stmp = a[k + j3]; 111 a[k + j3] = 0.0; 112 ay = &a[j3 + 1]; 113 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 114 } 115 } 116 117 /* form inverse(u)*inverse(l) */ 118 119 /*for (kb = 1; kb <= 1; ++kb) {*/ 120 121 k = 1; 122 k3 = 2*k; 123 kp1 = k + 1; 124 aa = a + k3; 125 for (i = kp1; i <= 2; ++i) { 126 work[i-1] = aa[i]; 127 aa[i] = 0.0; 128 } 129 for (j = kp1; j <= 2; ++j) { 130 stmp = work[j-1]; 131 ax = &a[2*j + 1]; 132 ay = &a[k3 + 1]; 133 ay[0] += stmp*ax[0]; 134 ay[1] += stmp*ax[1]; 135 } 136 l = ipvt[k-1]; 137 if (l != k) { 138 ax = &a[k3 + 1]; 139 ay = &a[2*l + 1]; 140 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 141 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 142 } 143 PetscFunctionReturn(0); 144 } 145 146 #undef __FUNCT__ 147 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_9" 148 PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift) 149 { 150 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3; 151 PetscInt k4,j3; 152 MatScalar *aa,*ax,*ay,work[81],stmp; 153 MatReal tmp,max; 154 155 /* gaussian elimination with partial pivoting */ 156 157 PetscFunctionBegin; 158 /* Parameter adjustments */ 159 a -= 10; 160 161 for (k = 1; k <= 8; ++k) { 162 kp1 = k + 1; 163 k3 = 9*k; 164 k4 = k3 + k; 165 /* find l = pivot index */ 166 167 i__2 = 10 - k; 168 aa = &a[k4]; 169 max = PetscAbsScalar(aa[0]); 170 l = 1; 171 for (ll=1; ll<i__2; ll++) { 172 tmp = PetscAbsScalar(aa[ll]); 173 if (tmp > max) { max = tmp; l = ll+1;} 174 } 175 l += k - 1; 176 ipvt[k-1] = l; 177 178 if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 179 180 /* interchange if necessary */ 181 182 if (l != k) { 183 stmp = a[l + k3]; 184 a[l + k3] = a[k4]; 185 a[k4] = stmp; 186 } 187 188 /* compute multipliers */ 189 190 stmp = -1. / a[k4]; 191 i__2 = 9 - k; 192 aa = &a[1 + k4]; 193 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 194 195 /* row elimination with column indexing */ 196 197 ax = &a[k4+1]; 198 for (j = kp1; j <= 9; ++j) { 199 j3 = 9*j; 200 stmp = a[l + j3]; 201 if (l != k) { 202 a[l + j3] = a[k + j3]; 203 a[k + j3] = stmp; 204 } 205 206 i__3 = 9 - k; 207 ay = &a[1+k+j3]; 208 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 209 } 210 } 211 ipvt[8] = 9; 212 if (a[90] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6); 213 214 /* 215 Now form the inverse 216 */ 217 218 /* compute inverse(u) */ 219 220 for (k = 1; k <= 9; ++k) { 221 k3 = 9*k; 222 k4 = k3 + k; 223 a[k4] = 1.0 / a[k4]; 224 stmp = -a[k4]; 225 i__2 = k - 1; 226 aa = &a[k3 + 1]; 227 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 228 kp1 = k + 1; 229 if (9 < kp1) continue; 230 ax = aa; 231 for (j = kp1; j <= 9; ++j) { 232 j3 = 9*j; 233 stmp = a[k + j3]; 234 a[k + j3] = 0.0; 235 ay = &a[j3 + 1]; 236 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 237 } 238 } 239 240 /* form inverse(u)*inverse(l) */ 241 242 for (kb = 1; kb <= 8; ++kb) { 243 k = 9 - kb; 244 k3 = 9*k; 245 kp1 = k + 1; 246 aa = a + k3; 247 for (i = kp1; i <= 9; ++i) { 248 work[i-1] = aa[i]; 249 aa[i] = 0.0; 250 } 251 for (j = kp1; j <= 9; ++j) { 252 stmp = work[j-1]; 253 ax = &a[9*j + 1]; 254 ay = &a[k3 + 1]; 255 ay[0] += stmp*ax[0]; 256 ay[1] += stmp*ax[1]; 257 ay[2] += stmp*ax[2]; 258 ay[3] += stmp*ax[3]; 259 ay[4] += stmp*ax[4]; 260 ay[5] += stmp*ax[5]; 261 ay[6] += stmp*ax[6]; 262 ay[7] += stmp*ax[7]; 263 ay[8] += stmp*ax[8]; 264 } 265 l = ipvt[k-1]; 266 if (l != k) { 267 ax = &a[k3 + 1]; 268 ay = &a[9*l + 1]; 269 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 270 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 271 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 272 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 273 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 274 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 275 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 276 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 277 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 /* 284 Inverts 15 by 15 matrix using partial pivoting. 285 286 Used by the sparse factorization routines in 287 src/mat/impls/baij/seq 288 289 This is a combination of the Linpack routines 290 dgefa() and dgedi() specialized for a size of 15. 291 292 */ 293 294 #undef __FUNCT__ 295 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_15" 296 PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift) 297 { 298 PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3; 299 PetscInt k4,j3; 300 MatScalar *aa,*ax,*ay,stmp; 301 MatReal tmp,max; 302 303 /* gaussian elimination with partial pivoting */ 304 305 PetscFunctionBegin; 306 /* Parameter adjustments */ 307 a -= 16; 308 309 for (k = 1; k <= 14; ++k) { 310 kp1 = k + 1; 311 k3 = 15*k; 312 k4 = k3 + k; 313 /* find l = pivot index */ 314 315 i__2 = 16 - k; 316 aa = &a[k4]; 317 max = PetscAbsScalar(aa[0]); 318 l = 1; 319 for (ll=1; ll<i__2; ll++) { 320 tmp = PetscAbsScalar(aa[ll]); 321 if (tmp > max) { max = tmp; l = ll+1;} 322 } 323 l += k - 1; 324 ipvt[k-1] = l; 325 326 if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 327 328 /* interchange if necessary */ 329 330 if (l != k) { 331 stmp = a[l + k3]; 332 a[l + k3] = a[k4]; 333 a[k4] = stmp; 334 } 335 336 /* compute multipliers */ 337 338 stmp = -1. / a[k4]; 339 i__2 = 15 - k; 340 aa = &a[1 + k4]; 341 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 342 343 /* row elimination with column indexing */ 344 345 ax = &a[k4+1]; 346 for (j = kp1; j <= 15; ++j) { 347 j3 = 15*j; 348 stmp = a[l + j3]; 349 if (l != k) { 350 a[l + j3] = a[k + j3]; 351 a[k + j3] = stmp; 352 } 353 354 i__3 = 15 - k; 355 ay = &a[1+k+j3]; 356 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 357 } 358 } 359 ipvt[14] = 15; 360 if (a[240] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6); 361 362 /* 363 Now form the inverse 364 */ 365 366 /* compute inverse(u) */ 367 368 for (k = 1; k <= 15; ++k) { 369 k3 = 15*k; 370 k4 = k3 + k; 371 a[k4] = 1.0 / a[k4]; 372 stmp = -a[k4]; 373 i__2 = k - 1; 374 aa = &a[k3 + 1]; 375 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 376 kp1 = k + 1; 377 if (15 < kp1) continue; 378 ax = aa; 379 for (j = kp1; j <= 15; ++j) { 380 j3 = 15*j; 381 stmp = a[k + j3]; 382 a[k + j3] = 0.0; 383 ay = &a[j3 + 1]; 384 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 385 } 386 } 387 388 /* form inverse(u)*inverse(l) */ 389 390 for (kb = 1; kb <= 14; ++kb) { 391 k = 15 - kb; 392 k3 = 15*k; 393 kp1 = k + 1; 394 aa = a + k3; 395 for (i = kp1; i <= 15; ++i) { 396 work[i-1] = aa[i]; 397 aa[i] = 0.0; 398 } 399 for (j = kp1; j <= 15; ++j) { 400 stmp = work[j-1]; 401 ax = &a[15*j + 1]; 402 ay = &a[k3 + 1]; 403 ay[0] += stmp*ax[0]; 404 ay[1] += stmp*ax[1]; 405 ay[2] += stmp*ax[2]; 406 ay[3] += stmp*ax[3]; 407 ay[4] += stmp*ax[4]; 408 ay[5] += stmp*ax[5]; 409 ay[6] += stmp*ax[6]; 410 ay[7] += stmp*ax[7]; 411 ay[8] += stmp*ax[8]; 412 ay[9] += stmp*ax[9]; 413 ay[10] += stmp*ax[10]; 414 ay[11] += stmp*ax[11]; 415 ay[12] += stmp*ax[12]; 416 ay[13] += stmp*ax[13]; 417 ay[14] += stmp*ax[14]; 418 } 419 l = ipvt[k-1]; 420 if (l != k) { 421 ax = &a[k3 + 1]; 422 ay = &a[15*l + 1]; 423 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 424 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 425 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 426 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 427 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 428 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 429 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 430 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 431 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 432 stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp; 433 stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp; 434 stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp; 435 stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp; 436 stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp; 437 stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp; 438 } 439 } 440 PetscFunctionReturn(0); 441 } 442