1 2 /* 3 Inverts 2 by 2 matrix using gaussian elimination with partial pivoting. 4 5 Used by the sparse factorization routines in 6 src/mat/impls/baij/seq 7 8 9 This is a combination of the Linpack routines 10 dgefa() and dgedi() specialized for a size of 2. 11 12 */ 13 #include <petscsys.h> 14 15 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected) 16 { 17 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3; 18 PetscInt k4,j3; 19 MatScalar *aa,*ax,*ay,work[4],stmp; 20 MatReal tmp,max; 21 22 PetscFunctionBegin; 23 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 24 shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3])); 25 26 /* Parameter adjustments */ 27 a -= 3; 28 29 k = 1; 30 kp1 = k + 1; 31 k3 = 2*k; 32 k4 = k3 + k; 33 34 /* find l = pivot index */ 35 i__2 = 3 - k; 36 aa = &a[k4]; 37 max = PetscAbsScalar(aa[0]); 38 l = 1; 39 for (ll=1; ll<i__2; ll++) { 40 tmp = PetscAbsScalar(aa[ll]); 41 if (tmp > max) { max = tmp; l = ll+1;} 42 } 43 l += k - 1; 44 ipvt[k-1] = l; 45 46 if (a[l + k3] == 0.0) { 47 if (shift == 0.0) { 48 if (allowzeropivot) { 49 PetscErrorCode ierr; 50 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);CHKERRQ(ierr); 51 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 52 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 53 } else { 54 a[l + k3] = shift; 55 } 56 } 57 58 /* interchange if necessary */ 59 if (l != k) { 60 stmp = a[l + k3]; 61 a[l + k3] = a[k4]; 62 a[k4] = stmp; 63 } 64 65 /* compute multipliers */ 66 stmp = -1. / a[k4]; 67 i__2 = 2 - k; 68 aa = &a[1 + k4]; 69 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 70 71 /* row elimination with column indexing */ 72 ax = &a[k4+1]; 73 for (j = kp1; j <= 2; ++j) { 74 j3 = 2*j; 75 stmp = a[l + j3]; 76 if (l != k) { 77 a[l + j3] = a[k + j3]; 78 a[k + j3] = stmp; 79 } 80 81 i__3 = 2 - k; 82 ay = &a[1+k+j3]; 83 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 84 } 85 86 ipvt[1] = 2; 87 if (a[6] == 0.0) { 88 if (allowzeropivot) { 89 PetscErrorCode ierr; 90 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",1);CHKERRQ(ierr); 91 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 92 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1); 93 } 94 95 /* Now form the inverse */ 96 /* compute inverse(u) */ 97 for (k = 1; k <= 2; ++k) { 98 k3 = 2*k; 99 k4 = k3 + k; 100 a[k4] = 1.0 / a[k4]; 101 stmp = -a[k4]; 102 i__2 = k - 1; 103 aa = &a[k3 + 1]; 104 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 105 kp1 = k + 1; 106 if (2 < kp1) continue; 107 ax = aa; 108 for (j = kp1; j <= 2; ++j) { 109 j3 = 2*j; 110 stmp = a[k + j3]; 111 a[k + j3] = 0.0; 112 ay = &a[j3 + 1]; 113 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 114 } 115 } 116 117 /* form inverse(u)*inverse(l) */ 118 k = 1; 119 k3 = 2*k; 120 kp1 = k + 1; 121 aa = a + k3; 122 for (i = kp1; i <= 2; ++i) { 123 work[i-1] = aa[i]; 124 aa[i] = 0.0; 125 } 126 for (j = kp1; j <= 2; ++j) { 127 stmp = work[j-1]; 128 ax = &a[2*j + 1]; 129 ay = &a[k3 + 1]; 130 ay[0] += stmp*ax[0]; 131 ay[1] += stmp*ax[1]; 132 } 133 l = ipvt[k-1]; 134 if (l != k) { 135 ax = &a[k3 + 1]; 136 ay = &a[2*l + 1]; 137 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 138 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 139 } 140 PetscFunctionReturn(0); 141 } 142 143 /* gaussian elimination with partial pivoting */ 144 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected) 145 { 146 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3; 147 PetscInt k4,j3; 148 MatScalar *aa,*ax,*ay,work[81],stmp; 149 MatReal tmp,max; 150 151 PetscFunctionBegin; 152 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 153 154 /* Parameter adjustments */ 155 a -= 10; 156 157 for (k = 1; k <= 8; ++k) { 158 kp1 = k + 1; 159 k3 = 9*k; 160 k4 = k3 + k; 161 162 /* find l = pivot index */ 163 i__2 = 10 - k; 164 aa = &a[k4]; 165 max = PetscAbsScalar(aa[0]); 166 l = 1; 167 for (ll=1; ll<i__2; ll++) { 168 tmp = PetscAbsScalar(aa[ll]); 169 if (tmp > max) { max = tmp; l = ll+1;} 170 } 171 l += k - 1; 172 ipvt[k-1] = l; 173 174 if (a[l + k3] == 0.0) { 175 if (shift == 0.0) { 176 if (allowzeropivot) { 177 PetscErrorCode ierr; 178 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);CHKERRQ(ierr); 179 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 180 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 181 } else { 182 a[l + k3] = shift; 183 } 184 } 185 186 /* interchange if necessary */ 187 if (l != k) { 188 stmp = a[l + k3]; 189 a[l + k3] = a[k4]; 190 a[k4] = stmp; 191 } 192 193 /* compute multipliers */ 194 stmp = -1. / a[k4]; 195 i__2 = 9 - k; 196 aa = &a[1 + k4]; 197 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 198 199 /* row elimination with column indexing */ 200 ax = &a[k4+1]; 201 for (j = kp1; j <= 9; ++j) { 202 j3 = 9*j; 203 stmp = a[l + j3]; 204 if (l != k) { 205 a[l + j3] = a[k + j3]; 206 a[k + j3] = stmp; 207 } 208 209 i__3 = 9 - k; 210 ay = &a[1+k+j3]; 211 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 212 } 213 } 214 ipvt[8] = 9; 215 if (a[90] == 0.0) { 216 if (allowzeropivot) { 217 PetscErrorCode ierr; 218 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",8);CHKERRQ(ierr); 219 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 220 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",8); 221 } 222 223 /* Now form the inverse */ 224 /* compute inverse(u) */ 225 for (k = 1; k <= 9; ++k) { 226 k3 = 9*k; 227 k4 = k3 + k; 228 a[k4] = 1.0 / a[k4]; 229 stmp = -a[k4]; 230 i__2 = k - 1; 231 aa = &a[k3 + 1]; 232 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 233 kp1 = k + 1; 234 if (9 < kp1) continue; 235 ax = aa; 236 for (j = kp1; j <= 9; ++j) { 237 j3 = 9*j; 238 stmp = a[k + j3]; 239 a[k + j3] = 0.0; 240 ay = &a[j3 + 1]; 241 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 242 } 243 } 244 245 /* form inverse(u)*inverse(l) */ 246 for (kb = 1; kb <= 8; ++kb) { 247 k = 9 - kb; 248 k3 = 9*k; 249 kp1 = k + 1; 250 aa = a + k3; 251 for (i = kp1; i <= 9; ++i) { 252 work[i-1] = aa[i]; 253 aa[i] = 0.0; 254 } 255 for (j = kp1; j <= 9; ++j) { 256 stmp = work[j-1]; 257 ax = &a[9*j + 1]; 258 ay = &a[k3 + 1]; 259 ay[0] += stmp*ax[0]; 260 ay[1] += stmp*ax[1]; 261 ay[2] += stmp*ax[2]; 262 ay[3] += stmp*ax[3]; 263 ay[4] += stmp*ax[4]; 264 ay[5] += stmp*ax[5]; 265 ay[6] += stmp*ax[6]; 266 ay[7] += stmp*ax[7]; 267 ay[8] += stmp*ax[8]; 268 } 269 l = ipvt[k-1]; 270 if (l != k) { 271 ax = &a[k3 + 1]; 272 ay = &a[9*l + 1]; 273 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 274 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 275 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 276 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 277 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 278 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 279 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 280 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 281 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 282 } 283 } 284 PetscFunctionReturn(0); 285 } 286 287 /* 288 Inverts 15 by 15 matrix using gaussian elimination with partial pivoting. 289 290 Used by the sparse factorization routines in 291 src/mat/impls/baij/seq 292 293 This is a combination of the Linpack routines 294 dgefa() and dgedi() specialized for a size of 15. 295 296 */ 297 298 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected) 299 { 300 PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3; 301 PetscInt k4,j3; 302 MatScalar *aa,*ax,*ay,stmp; 303 MatReal tmp,max; 304 305 PetscFunctionBegin; 306 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 307 308 /* Parameter adjustments */ 309 a -= 16; 310 311 for (k = 1; k <= 14; ++k) { 312 kp1 = k + 1; 313 k3 = 15*k; 314 k4 = k3 + k; 315 316 /* find l = pivot index */ 317 i__2 = 16 - k; 318 aa = &a[k4]; 319 max = PetscAbsScalar(aa[0]); 320 l = 1; 321 for (ll=1; ll<i__2; ll++) { 322 tmp = PetscAbsScalar(aa[ll]); 323 if (tmp > max) { max = tmp; l = ll+1;} 324 } 325 l += k - 1; 326 ipvt[k-1] = l; 327 328 if (a[l + k3] == 0.0) { 329 if (shift == 0.0) { 330 if (allowzeropivot) { 331 PetscErrorCode ierr; 332 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);CHKERRQ(ierr); 333 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 334 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 335 } else { 336 a[l + k3] = shift; 337 } 338 } 339 340 /* interchange if necessary */ 341 if (l != k) { 342 stmp = a[l + k3]; 343 a[l + k3] = a[k4]; 344 a[k4] = stmp; 345 } 346 347 /* compute multipliers */ 348 stmp = -1. / a[k4]; 349 i__2 = 15 - k; 350 aa = &a[1 + k4]; 351 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 352 353 /* row elimination with column indexing */ 354 ax = &a[k4+1]; 355 for (j = kp1; j <= 15; ++j) { 356 j3 = 15*j; 357 stmp = a[l + j3]; 358 if (l != k) { 359 a[l + j3] = a[k + j3]; 360 a[k + j3] = stmp; 361 } 362 363 i__3 = 15 - k; 364 ay = &a[1+k+j3]; 365 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 366 } 367 } 368 ipvt[14] = 15; 369 if (a[240] == 0.0) { 370 if (allowzeropivot) { 371 PetscErrorCode ierr; 372 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",14);CHKERRQ(ierr); 373 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 374 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",14); 375 } 376 377 /* Now form the inverse */ 378 /* compute inverse(u) */ 379 for (k = 1; k <= 15; ++k) { 380 k3 = 15*k; 381 k4 = k3 + k; 382 a[k4] = 1.0 / a[k4]; 383 stmp = -a[k4]; 384 i__2 = k - 1; 385 aa = &a[k3 + 1]; 386 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 387 kp1 = k + 1; 388 if (15 < kp1) continue; 389 ax = aa; 390 for (j = kp1; j <= 15; ++j) { 391 j3 = 15*j; 392 stmp = a[k + j3]; 393 a[k + j3] = 0.0; 394 ay = &a[j3 + 1]; 395 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 396 } 397 } 398 399 /* form inverse(u)*inverse(l) */ 400 for (kb = 1; kb <= 14; ++kb) { 401 k = 15 - kb; 402 k3 = 15*k; 403 kp1 = k + 1; 404 aa = a + k3; 405 for (i = kp1; i <= 15; ++i) { 406 work[i-1] = aa[i]; 407 aa[i] = 0.0; 408 } 409 for (j = kp1; j <= 15; ++j) { 410 stmp = work[j-1]; 411 ax = &a[15*j + 1]; 412 ay = &a[k3 + 1]; 413 ay[0] += stmp*ax[0]; 414 ay[1] += stmp*ax[1]; 415 ay[2] += stmp*ax[2]; 416 ay[3] += stmp*ax[3]; 417 ay[4] += stmp*ax[4]; 418 ay[5] += stmp*ax[5]; 419 ay[6] += stmp*ax[6]; 420 ay[7] += stmp*ax[7]; 421 ay[8] += stmp*ax[8]; 422 ay[9] += stmp*ax[9]; 423 ay[10] += stmp*ax[10]; 424 ay[11] += stmp*ax[11]; 425 ay[12] += stmp*ax[12]; 426 ay[13] += stmp*ax[13]; 427 ay[14] += stmp*ax[14]; 428 } 429 l = ipvt[k-1]; 430 if (l != k) { 431 ax = &a[k3 + 1]; 432 ay = &a[15*l + 1]; 433 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 434 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 435 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 436 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 437 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 438 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 439 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 440 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 441 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 442 stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp; 443 stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp; 444 stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp; 445 stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp; 446 stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp; 447 stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp; 448 } 449 } 450 PetscFunctionReturn(0); 451 } 452