1 2 /* 3 Inverts 2 by 2 matrix using gaussian elimination with partial pivoting. 4 5 Used by the sparse factorization routines in 6 src/mat/impls/baij/seq 7 8 This is a combination of the Linpack routines 9 dgefa() and dgedi() specialized for a size of 2. 10 11 */ 12 #include <petscsys.h> 13 14 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected) 15 { 16 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3; 17 PetscInt k4,j3; 18 MatScalar *aa,*ax,*ay,work[4],stmp; 19 MatReal tmp,max; 20 21 PetscFunctionBegin; 22 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 23 shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3])); 24 25 /* Parameter adjustments */ 26 a -= 3; 27 28 k = 1; 29 kp1 = k + 1; 30 k3 = 2*k; 31 k4 = k3 + k; 32 33 /* find l = pivot index */ 34 i__2 = 3 - k; 35 aa = &a[k4]; 36 max = PetscAbsScalar(aa[0]); 37 l = 1; 38 for (ll=1; ll<i__2; ll++) { 39 tmp = PetscAbsScalar(aa[ll]); 40 if (tmp > max) { max = tmp; l = ll+1;} 41 } 42 l += k - 1; 43 ipvt[k-1] = l; 44 45 if (a[l + k3] == 0.0) { 46 if (shift == 0.0) { 47 if (allowzeropivot) { 48 PetscErrorCode ierr; 49 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);CHKERRQ(ierr); 50 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 51 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 52 } else { 53 a[l + k3] = shift; 54 } 55 } 56 57 /* interchange if necessary */ 58 if (l != k) { 59 stmp = a[l + k3]; 60 a[l + k3] = a[k4]; 61 a[k4] = stmp; 62 } 63 64 /* compute multipliers */ 65 stmp = -1. / a[k4]; 66 i__2 = 2 - k; 67 aa = &a[1 + k4]; 68 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 69 70 /* row elimination with column indexing */ 71 ax = &a[k4+1]; 72 for (j = kp1; j <= 2; ++j) { 73 j3 = 2*j; 74 stmp = a[l + j3]; 75 if (l != k) { 76 a[l + j3] = a[k + j3]; 77 a[k + j3] = stmp; 78 } 79 80 i__3 = 2 - k; 81 ay = &a[1+k+j3]; 82 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 83 } 84 85 ipvt[1] = 2; 86 if (a[6] == 0.0) { 87 if (allowzeropivot) { 88 PetscErrorCode ierr; 89 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",1);CHKERRQ(ierr); 90 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 91 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1); 92 } 93 94 /* Now form the inverse */ 95 /* compute inverse(u) */ 96 for (k = 1; k <= 2; ++k) { 97 k3 = 2*k; 98 k4 = k3 + k; 99 a[k4] = 1.0 / a[k4]; 100 stmp = -a[k4]; 101 i__2 = k - 1; 102 aa = &a[k3 + 1]; 103 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 104 kp1 = k + 1; 105 if (2 < kp1) continue; 106 ax = aa; 107 for (j = kp1; j <= 2; ++j) { 108 j3 = 2*j; 109 stmp = a[k + j3]; 110 a[k + j3] = 0.0; 111 ay = &a[j3 + 1]; 112 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 113 } 114 } 115 116 /* form inverse(u)*inverse(l) */ 117 k = 1; 118 k3 = 2*k; 119 kp1 = k + 1; 120 aa = a + k3; 121 for (i = kp1; i <= 2; ++i) { 122 work[i-1] = aa[i]; 123 aa[i] = 0.0; 124 } 125 for (j = kp1; j <= 2; ++j) { 126 stmp = work[j-1]; 127 ax = &a[2*j + 1]; 128 ay = &a[k3 + 1]; 129 ay[0] += stmp*ax[0]; 130 ay[1] += stmp*ax[1]; 131 } 132 l = ipvt[k-1]; 133 if (l != k) { 134 ax = &a[k3 + 1]; 135 ay = &a[2*l + 1]; 136 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 137 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 138 } 139 PetscFunctionReturn(0); 140 } 141 142 /* gaussian elimination with partial pivoting */ 143 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected) 144 { 145 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3; 146 PetscInt k4,j3; 147 MatScalar *aa,*ax,*ay,work[81],stmp; 148 MatReal tmp,max; 149 150 PetscFunctionBegin; 151 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 152 153 /* Parameter adjustments */ 154 a -= 10; 155 156 for (k = 1; k <= 8; ++k) { 157 kp1 = k + 1; 158 k3 = 9*k; 159 k4 = k3 + k; 160 161 /* find l = pivot index */ 162 i__2 = 10 - k; 163 aa = &a[k4]; 164 max = PetscAbsScalar(aa[0]); 165 l = 1; 166 for (ll=1; ll<i__2; ll++) { 167 tmp = PetscAbsScalar(aa[ll]); 168 if (tmp > max) { max = tmp; l = ll+1;} 169 } 170 l += k - 1; 171 ipvt[k-1] = l; 172 173 if (a[l + k3] == 0.0) { 174 if (shift == 0.0) { 175 if (allowzeropivot) { 176 PetscErrorCode ierr; 177 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);CHKERRQ(ierr); 178 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 179 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 180 } else { 181 a[l + k3] = shift; 182 } 183 } 184 185 /* interchange if necessary */ 186 if (l != k) { 187 stmp = a[l + k3]; 188 a[l + k3] = a[k4]; 189 a[k4] = stmp; 190 } 191 192 /* compute multipliers */ 193 stmp = -1. / a[k4]; 194 i__2 = 9 - k; 195 aa = &a[1 + k4]; 196 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 197 198 /* row elimination with column indexing */ 199 ax = &a[k4+1]; 200 for (j = kp1; j <= 9; ++j) { 201 j3 = 9*j; 202 stmp = a[l + j3]; 203 if (l != k) { 204 a[l + j3] = a[k + j3]; 205 a[k + j3] = stmp; 206 } 207 208 i__3 = 9 - k; 209 ay = &a[1+k+j3]; 210 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 211 } 212 } 213 ipvt[8] = 9; 214 if (a[90] == 0.0) { 215 if (allowzeropivot) { 216 PetscErrorCode ierr; 217 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",8);CHKERRQ(ierr); 218 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 219 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",8); 220 } 221 222 /* Now form the inverse */ 223 /* compute inverse(u) */ 224 for (k = 1; k <= 9; ++k) { 225 k3 = 9*k; 226 k4 = k3 + k; 227 a[k4] = 1.0 / a[k4]; 228 stmp = -a[k4]; 229 i__2 = k - 1; 230 aa = &a[k3 + 1]; 231 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 232 kp1 = k + 1; 233 if (9 < kp1) continue; 234 ax = aa; 235 for (j = kp1; j <= 9; ++j) { 236 j3 = 9*j; 237 stmp = a[k + j3]; 238 a[k + j3] = 0.0; 239 ay = &a[j3 + 1]; 240 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 241 } 242 } 243 244 /* form inverse(u)*inverse(l) */ 245 for (kb = 1; kb <= 8; ++kb) { 246 k = 9 - kb; 247 k3 = 9*k; 248 kp1 = k + 1; 249 aa = a + k3; 250 for (i = kp1; i <= 9; ++i) { 251 work[i-1] = aa[i]; 252 aa[i] = 0.0; 253 } 254 for (j = kp1; j <= 9; ++j) { 255 stmp = work[j-1]; 256 ax = &a[9*j + 1]; 257 ay = &a[k3 + 1]; 258 ay[0] += stmp*ax[0]; 259 ay[1] += stmp*ax[1]; 260 ay[2] += stmp*ax[2]; 261 ay[3] += stmp*ax[3]; 262 ay[4] += stmp*ax[4]; 263 ay[5] += stmp*ax[5]; 264 ay[6] += stmp*ax[6]; 265 ay[7] += stmp*ax[7]; 266 ay[8] += stmp*ax[8]; 267 } 268 l = ipvt[k-1]; 269 if (l != k) { 270 ax = &a[k3 + 1]; 271 ay = &a[9*l + 1]; 272 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 273 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 274 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 275 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 276 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 277 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 278 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 279 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 280 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 281 } 282 } 283 PetscFunctionReturn(0); 284 } 285 286 /* 287 Inverts 15 by 15 matrix using gaussian elimination with partial pivoting. 288 289 Used by the sparse factorization routines in 290 src/mat/impls/baij/seq 291 292 This is a combination of the Linpack routines 293 dgefa() and dgedi() specialized for a size of 15. 294 295 */ 296 297 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected) 298 { 299 PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3; 300 PetscInt k4,j3; 301 MatScalar *aa,*ax,*ay,stmp; 302 MatReal tmp,max; 303 304 PetscFunctionBegin; 305 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 306 307 /* Parameter adjustments */ 308 a -= 16; 309 310 for (k = 1; k <= 14; ++k) { 311 kp1 = k + 1; 312 k3 = 15*k; 313 k4 = k3 + k; 314 315 /* find l = pivot index */ 316 i__2 = 16 - k; 317 aa = &a[k4]; 318 max = PetscAbsScalar(aa[0]); 319 l = 1; 320 for (ll=1; ll<i__2; ll++) { 321 tmp = PetscAbsScalar(aa[ll]); 322 if (tmp > max) { max = tmp; l = ll+1;} 323 } 324 l += k - 1; 325 ipvt[k-1] = l; 326 327 if (a[l + k3] == 0.0) { 328 if (shift == 0.0) { 329 if (allowzeropivot) { 330 PetscErrorCode ierr; 331 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);CHKERRQ(ierr); 332 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 333 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 334 } else { 335 a[l + k3] = shift; 336 } 337 } 338 339 /* interchange if necessary */ 340 if (l != k) { 341 stmp = a[l + k3]; 342 a[l + k3] = a[k4]; 343 a[k4] = stmp; 344 } 345 346 /* compute multipliers */ 347 stmp = -1. / a[k4]; 348 i__2 = 15 - k; 349 aa = &a[1 + k4]; 350 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 351 352 /* row elimination with column indexing */ 353 ax = &a[k4+1]; 354 for (j = kp1; j <= 15; ++j) { 355 j3 = 15*j; 356 stmp = a[l + j3]; 357 if (l != k) { 358 a[l + j3] = a[k + j3]; 359 a[k + j3] = stmp; 360 } 361 362 i__3 = 15 - k; 363 ay = &a[1+k+j3]; 364 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 365 } 366 } 367 ipvt[14] = 15; 368 if (a[240] == 0.0) { 369 if (allowzeropivot) { 370 PetscErrorCode ierr; 371 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",14);CHKERRQ(ierr); 372 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 373 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",14); 374 } 375 376 /* Now form the inverse */ 377 /* compute inverse(u) */ 378 for (k = 1; k <= 15; ++k) { 379 k3 = 15*k; 380 k4 = k3 + k; 381 a[k4] = 1.0 / a[k4]; 382 stmp = -a[k4]; 383 i__2 = k - 1; 384 aa = &a[k3 + 1]; 385 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 386 kp1 = k + 1; 387 if (15 < kp1) continue; 388 ax = aa; 389 for (j = kp1; j <= 15; ++j) { 390 j3 = 15*j; 391 stmp = a[k + j3]; 392 a[k + j3] = 0.0; 393 ay = &a[j3 + 1]; 394 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 395 } 396 } 397 398 /* form inverse(u)*inverse(l) */ 399 for (kb = 1; kb <= 14; ++kb) { 400 k = 15 - kb; 401 k3 = 15*k; 402 kp1 = k + 1; 403 aa = a + k3; 404 for (i = kp1; i <= 15; ++i) { 405 work[i-1] = aa[i]; 406 aa[i] = 0.0; 407 } 408 for (j = kp1; j <= 15; ++j) { 409 stmp = work[j-1]; 410 ax = &a[15*j + 1]; 411 ay = &a[k3 + 1]; 412 ay[0] += stmp*ax[0]; 413 ay[1] += stmp*ax[1]; 414 ay[2] += stmp*ax[2]; 415 ay[3] += stmp*ax[3]; 416 ay[4] += stmp*ax[4]; 417 ay[5] += stmp*ax[5]; 418 ay[6] += stmp*ax[6]; 419 ay[7] += stmp*ax[7]; 420 ay[8] += stmp*ax[8]; 421 ay[9] += stmp*ax[9]; 422 ay[10] += stmp*ax[10]; 423 ay[11] += stmp*ax[11]; 424 ay[12] += stmp*ax[12]; 425 ay[13] += stmp*ax[13]; 426 ay[14] += stmp*ax[14]; 427 } 428 l = ipvt[k-1]; 429 if (l != k) { 430 ax = &a[k3 + 1]; 431 ay = &a[15*l + 1]; 432 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 433 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 434 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 435 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 436 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 437 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 438 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 439 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 440 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 441 stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp; 442 stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp; 443 stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp; 444 stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp; 445 stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp; 446 stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp; 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451