1 2 /* 3 Inverts 2 by 2 matrix using gaussian elimination with partial pivoting. 4 5 Used by the sparse factorization routines in 6 src/mat/impls/baij/seq 7 8 9 This is a combination of the Linpack routines 10 dgefa() and dgedi() specialized for a size of 2. 11 12 */ 13 #include <petscsys.h> 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_2" 17 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected) 18 { 19 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3; 20 PetscInt k4,j3; 21 MatScalar *aa,*ax,*ay,work[4],stmp; 22 MatReal tmp,max; 23 24 PetscFunctionBegin; 25 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 26 shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3])); 27 28 /* Parameter adjustments */ 29 a -= 3; 30 31 k = 1; 32 kp1 = k + 1; 33 k3 = 2*k; 34 k4 = k3 + k; 35 36 /* find l = pivot index */ 37 i__2 = 3 - k; 38 aa = &a[k4]; 39 max = PetscAbsScalar(aa[0]); 40 l = 1; 41 for (ll=1; ll<i__2; ll++) { 42 tmp = PetscAbsScalar(aa[ll]); 43 if (tmp > max) { max = tmp; l = ll+1;} 44 } 45 l += k - 1; 46 ipvt[k-1] = l; 47 48 if (a[l + k3] == 0.0) { 49 if (shift == 0.0) { 50 if (allowzeropivot) { 51 PetscErrorCode ierr; 52 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);CHKERRQ(ierr); 53 *zeropivotdetected = PETSC_TRUE; 54 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 55 } else { 56 a[l + k3] = shift; 57 } 58 } 59 60 /* interchange if necessary */ 61 if (l != k) { 62 stmp = a[l + k3]; 63 a[l + k3] = a[k4]; 64 a[k4] = stmp; 65 } 66 67 /* compute multipliers */ 68 stmp = -1. / a[k4]; 69 i__2 = 2 - k; 70 aa = &a[1 + k4]; 71 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 72 73 /* row elimination with column indexing */ 74 ax = &a[k4+1]; 75 for (j = kp1; j <= 2; ++j) { 76 j3 = 2*j; 77 stmp = a[l + j3]; 78 if (l != k) { 79 a[l + j3] = a[k + j3]; 80 a[k + j3] = stmp; 81 } 82 83 i__3 = 2 - k; 84 ay = &a[1+k+j3]; 85 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 86 } 87 88 ipvt[1] = 2; 89 if (a[6] == 0.0) { 90 if (allowzeropivot) { 91 PetscErrorCode ierr; 92 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",1);CHKERRQ(ierr); 93 *zeropivotdetected = PETSC_TRUE; 94 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1); 95 } 96 97 /* Now form the inverse */ 98 /* compute inverse(u) */ 99 for (k = 1; k <= 2; ++k) { 100 k3 = 2*k; 101 k4 = k3 + k; 102 a[k4] = 1.0 / a[k4]; 103 stmp = -a[k4]; 104 i__2 = k - 1; 105 aa = &a[k3 + 1]; 106 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 107 kp1 = k + 1; 108 if (2 < kp1) continue; 109 ax = aa; 110 for (j = kp1; j <= 2; ++j) { 111 j3 = 2*j; 112 stmp = a[k + j3]; 113 a[k + j3] = 0.0; 114 ay = &a[j3 + 1]; 115 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 116 } 117 } 118 119 /* form inverse(u)*inverse(l) */ 120 k = 1; 121 k3 = 2*k; 122 kp1 = k + 1; 123 aa = a + k3; 124 for (i = kp1; i <= 2; ++i) { 125 work[i-1] = aa[i]; 126 aa[i] = 0.0; 127 } 128 for (j = kp1; j <= 2; ++j) { 129 stmp = work[j-1]; 130 ax = &a[2*j + 1]; 131 ay = &a[k3 + 1]; 132 ay[0] += stmp*ax[0]; 133 ay[1] += stmp*ax[1]; 134 } 135 l = ipvt[k-1]; 136 if (l != k) { 137 ax = &a[k3 + 1]; 138 ay = &a[2*l + 1]; 139 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 140 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 141 } 142 PetscFunctionReturn(0); 143 } 144 145 /* gaussian elimination with partial pivoting */ 146 #undef __FUNCT__ 147 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_9" 148 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected) 149 { 150 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3; 151 PetscInt k4,j3; 152 MatScalar *aa,*ax,*ay,work[81],stmp; 153 MatReal tmp,max; 154 155 PetscFunctionBegin; 156 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 157 158 /* Parameter adjustments */ 159 a -= 10; 160 161 for (k = 1; k <= 8; ++k) { 162 kp1 = k + 1; 163 k3 = 9*k; 164 k4 = k3 + k; 165 166 /* find l = pivot index */ 167 i__2 = 10 - k; 168 aa = &a[k4]; 169 max = PetscAbsScalar(aa[0]); 170 l = 1; 171 for (ll=1; ll<i__2; ll++) { 172 tmp = PetscAbsScalar(aa[ll]); 173 if (tmp > max) { max = tmp; l = ll+1;} 174 } 175 l += k - 1; 176 ipvt[k-1] = l; 177 178 if (a[l + k3] == 0.0) { 179 if (shift == 0.0) { 180 if (allowzeropivot) { 181 PetscErrorCode ierr; 182 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);CHKERRQ(ierr); 183 *zeropivotdetected = PETSC_TRUE; 184 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 185 } else { 186 a[l + k3] = shift; 187 } 188 } 189 190 /* interchange if necessary */ 191 if (l != k) { 192 stmp = a[l + k3]; 193 a[l + k3] = a[k4]; 194 a[k4] = stmp; 195 } 196 197 /* compute multipliers */ 198 stmp = -1. / a[k4]; 199 i__2 = 9 - k; 200 aa = &a[1 + k4]; 201 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 202 203 /* row elimination with column indexing */ 204 ax = &a[k4+1]; 205 for (j = kp1; j <= 9; ++j) { 206 j3 = 9*j; 207 stmp = a[l + j3]; 208 if (l != k) { 209 a[l + j3] = a[k + j3]; 210 a[k + j3] = stmp; 211 } 212 213 i__3 = 9 - k; 214 ay = &a[1+k+j3]; 215 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 216 } 217 } 218 ipvt[8] = 9; 219 if (a[90] == 0.0) { 220 if (allowzeropivot) { 221 PetscErrorCode ierr; 222 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",6);CHKERRQ(ierr); 223 *zeropivotdetected = PETSC_TRUE; 224 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6); 225 } 226 227 /* Now form the inverse */ 228 /* compute inverse(u) */ 229 for (k = 1; k <= 9; ++k) { 230 k3 = 9*k; 231 k4 = k3 + k; 232 a[k4] = 1.0 / a[k4]; 233 stmp = -a[k4]; 234 i__2 = k - 1; 235 aa = &a[k3 + 1]; 236 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 237 kp1 = k + 1; 238 if (9 < kp1) continue; 239 ax = aa; 240 for (j = kp1; j <= 9; ++j) { 241 j3 = 9*j; 242 stmp = a[k + j3]; 243 a[k + j3] = 0.0; 244 ay = &a[j3 + 1]; 245 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 246 } 247 } 248 249 /* form inverse(u)*inverse(l) */ 250 for (kb = 1; kb <= 8; ++kb) { 251 k = 9 - kb; 252 k3 = 9*k; 253 kp1 = k + 1; 254 aa = a + k3; 255 for (i = kp1; i <= 9; ++i) { 256 work[i-1] = aa[i]; 257 aa[i] = 0.0; 258 } 259 for (j = kp1; j <= 9; ++j) { 260 stmp = work[j-1]; 261 ax = &a[9*j + 1]; 262 ay = &a[k3 + 1]; 263 ay[0] += stmp*ax[0]; 264 ay[1] += stmp*ax[1]; 265 ay[2] += stmp*ax[2]; 266 ay[3] += stmp*ax[3]; 267 ay[4] += stmp*ax[4]; 268 ay[5] += stmp*ax[5]; 269 ay[6] += stmp*ax[6]; 270 ay[7] += stmp*ax[7]; 271 ay[8] += stmp*ax[8]; 272 } 273 l = ipvt[k-1]; 274 if (l != k) { 275 ax = &a[k3 + 1]; 276 ay = &a[9*l + 1]; 277 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 278 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 279 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 280 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 281 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 282 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 283 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 284 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 285 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 286 } 287 } 288 PetscFunctionReturn(0); 289 } 290 291 /* 292 Inverts 15 by 15 matrix using gaussian elimination with partial pivoting. 293 294 Used by the sparse factorization routines in 295 src/mat/impls/baij/seq 296 297 This is a combination of the Linpack routines 298 dgefa() and dgedi() specialized for a size of 15. 299 300 */ 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_15" 304 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected) 305 { 306 PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3; 307 PetscInt k4,j3; 308 MatScalar *aa,*ax,*ay,stmp; 309 MatReal tmp,max; 310 311 PetscFunctionBegin; 312 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 313 314 /* Parameter adjustments */ 315 a -= 16; 316 317 for (k = 1; k <= 14; ++k) { 318 kp1 = k + 1; 319 k3 = 15*k; 320 k4 = k3 + k; 321 322 /* find l = pivot index */ 323 i__2 = 16 - k; 324 aa = &a[k4]; 325 max = PetscAbsScalar(aa[0]); 326 l = 1; 327 for (ll=1; ll<i__2; ll++) { 328 tmp = PetscAbsScalar(aa[ll]); 329 if (tmp > max) { max = tmp; l = ll+1;} 330 } 331 l += k - 1; 332 ipvt[k-1] = l; 333 334 if (a[l + k3] == 0.0) { 335 if (shift == 0.0) { 336 if (allowzeropivot) { 337 PetscErrorCode ierr; 338 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);CHKERRQ(ierr); 339 *zeropivotdetected = PETSC_TRUE; 340 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 341 } else { 342 a[l + k3] = shift; 343 } 344 } 345 346 /* interchange if necessary */ 347 if (l != k) { 348 stmp = a[l + k3]; 349 a[l + k3] = a[k4]; 350 a[k4] = stmp; 351 } 352 353 /* compute multipliers */ 354 stmp = -1. / a[k4]; 355 i__2 = 15 - k; 356 aa = &a[1 + k4]; 357 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 358 359 /* row elimination with column indexing */ 360 ax = &a[k4+1]; 361 for (j = kp1; j <= 15; ++j) { 362 j3 = 15*j; 363 stmp = a[l + j3]; 364 if (l != k) { 365 a[l + j3] = a[k + j3]; 366 a[k + j3] = stmp; 367 } 368 369 i__3 = 15 - k; 370 ay = &a[1+k+j3]; 371 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 372 } 373 } 374 ipvt[14] = 15; 375 if (a[240] == 0.0) { 376 if (allowzeropivot) { 377 PetscErrorCode ierr; 378 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",6);CHKERRQ(ierr); 379 *zeropivotdetected = PETSC_TRUE; 380 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6); 381 } 382 383 /* Now form the inverse */ 384 /* compute inverse(u) */ 385 for (k = 1; k <= 15; ++k) { 386 k3 = 15*k; 387 k4 = k3 + k; 388 a[k4] = 1.0 / a[k4]; 389 stmp = -a[k4]; 390 i__2 = k - 1; 391 aa = &a[k3 + 1]; 392 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 393 kp1 = k + 1; 394 if (15 < kp1) continue; 395 ax = aa; 396 for (j = kp1; j <= 15; ++j) { 397 j3 = 15*j; 398 stmp = a[k + j3]; 399 a[k + j3] = 0.0; 400 ay = &a[j3 + 1]; 401 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 402 } 403 } 404 405 /* form inverse(u)*inverse(l) */ 406 for (kb = 1; kb <= 14; ++kb) { 407 k = 15 - kb; 408 k3 = 15*k; 409 kp1 = k + 1; 410 aa = a + k3; 411 for (i = kp1; i <= 15; ++i) { 412 work[i-1] = aa[i]; 413 aa[i] = 0.0; 414 } 415 for (j = kp1; j <= 15; ++j) { 416 stmp = work[j-1]; 417 ax = &a[15*j + 1]; 418 ay = &a[k3 + 1]; 419 ay[0] += stmp*ax[0]; 420 ay[1] += stmp*ax[1]; 421 ay[2] += stmp*ax[2]; 422 ay[3] += stmp*ax[3]; 423 ay[4] += stmp*ax[4]; 424 ay[5] += stmp*ax[5]; 425 ay[6] += stmp*ax[6]; 426 ay[7] += stmp*ax[7]; 427 ay[8] += stmp*ax[8]; 428 ay[9] += stmp*ax[9]; 429 ay[10] += stmp*ax[10]; 430 ay[11] += stmp*ax[11]; 431 ay[12] += stmp*ax[12]; 432 ay[13] += stmp*ax[13]; 433 ay[14] += stmp*ax[14]; 434 } 435 l = ipvt[k-1]; 436 if (l != k) { 437 ax = &a[k3 + 1]; 438 ay = &a[15*l + 1]; 439 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 440 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 441 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 442 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 443 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 444 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 445 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 446 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 447 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 448 stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp; 449 stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp; 450 stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp; 451 stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp; 452 stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp; 453 stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp; 454 } 455 } 456 PetscFunctionReturn(0); 457 } 458