1 2 /* 3 Inverts 2 by 2 matrix using gaussian elimination with partial pivoting. 4 5 Used by the sparse factorization routines in 6 src/mat/impls/baij/seq 7 8 This is a combination of the Linpack routines 9 dgefa() and dgedi() specialized for a size of 2. 10 11 */ 12 #include <petscsys.h> 13 14 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected) 15 { 16 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3; 17 PetscInt k4,j3; 18 MatScalar *aa,*ax,*ay,work[4],stmp; 19 MatReal tmp,max; 20 21 PetscFunctionBegin; 22 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 23 shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3])); 24 25 /* Parameter adjustments */ 26 a -= 3; 27 28 k = 1; 29 kp1 = k + 1; 30 k3 = 2*k; 31 k4 = k3 + k; 32 33 /* find l = pivot index */ 34 i__2 = 3 - k; 35 aa = &a[k4]; 36 max = PetscAbsScalar(aa[0]); 37 l = 1; 38 for (ll=1; ll<i__2; ll++) { 39 tmp = PetscAbsScalar(aa[ll]); 40 if (tmp > max) { max = tmp; l = ll+1;} 41 } 42 l += k - 1; 43 ipvt[k-1] = l; 44 45 if (a[l + k3] == 0.0) { 46 if (shift == 0.0) { 47 if (allowzeropivot) { 48 PetscCall(PetscInfo(NULL,"Zero pivot, row %" PetscInt_FMT "\n",k-1)); 49 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 50 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT,k-1); 51 } else { 52 a[l + k3] = shift; 53 } 54 } 55 56 /* interchange if necessary */ 57 if (l != k) { 58 stmp = a[l + k3]; 59 a[l + k3] = a[k4]; 60 a[k4] = stmp; 61 } 62 63 /* compute multipliers */ 64 stmp = -1. / a[k4]; 65 i__2 = 2 - k; 66 aa = &a[1 + k4]; 67 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 68 69 /* row elimination with column indexing */ 70 ax = &a[k4+1]; 71 for (j = kp1; j <= 2; ++j) { 72 j3 = 2*j; 73 stmp = a[l + j3]; 74 if (l != k) { 75 a[l + j3] = a[k + j3]; 76 a[k + j3] = stmp; 77 } 78 79 i__3 = 2 - k; 80 ay = &a[1+k+j3]; 81 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 82 } 83 84 ipvt[1] = 2; 85 if (a[6] == 0.0) { 86 if (PetscLikely(allowzeropivot)) { 87 PetscCall(PetscInfo(NULL,"Zero pivot, row 1\n")); 88 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 89 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row 1"); 90 } 91 92 /* Now form the inverse */ 93 /* compute inverse(u) */ 94 for (k = 1; k <= 2; ++k) { 95 k3 = 2*k; 96 k4 = k3 + k; 97 a[k4] = 1.0 / a[k4]; 98 stmp = -a[k4]; 99 i__2 = k - 1; 100 aa = &a[k3 + 1]; 101 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 102 kp1 = k + 1; 103 if (2 < kp1) continue; 104 ax = aa; 105 for (j = kp1; j <= 2; ++j) { 106 j3 = 2*j; 107 stmp = a[k + j3]; 108 a[k + j3] = 0.0; 109 ay = &a[j3 + 1]; 110 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 111 } 112 } 113 114 /* form inverse(u)*inverse(l) */ 115 k = 1; 116 k3 = 2*k; 117 kp1 = k + 1; 118 aa = a + k3; 119 for (i = kp1; i <= 2; ++i) { 120 work[i-1] = aa[i]; 121 aa[i] = 0.0; 122 } 123 for (j = kp1; j <= 2; ++j) { 124 stmp = work[j-1]; 125 ax = &a[2*j + 1]; 126 ay = &a[k3 + 1]; 127 ay[0] += stmp*ax[0]; 128 ay[1] += stmp*ax[1]; 129 } 130 l = ipvt[k-1]; 131 if (l != k) { 132 ax = &a[k3 + 1]; 133 ay = &a[2*l + 1]; 134 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 135 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 136 } 137 PetscFunctionReturn(0); 138 } 139 140 /* gaussian elimination with partial pivoting */ 141 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected) 142 { 143 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3; 144 PetscInt k4,j3; 145 MatScalar *aa,*ax,*ay,work[81],stmp; 146 MatReal tmp,max; 147 148 PetscFunctionBegin; 149 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 150 151 /* Parameter adjustments */ 152 a -= 10; 153 154 for (k = 1; k <= 8; ++k) { 155 kp1 = k + 1; 156 k3 = 9*k; 157 k4 = k3 + k; 158 159 /* find l = pivot index */ 160 i__2 = 10 - k; 161 aa = &a[k4]; 162 max = PetscAbsScalar(aa[0]); 163 l = 1; 164 for (ll=1; ll<i__2; ll++) { 165 tmp = PetscAbsScalar(aa[ll]); 166 if (tmp > max) { max = tmp; l = ll+1;} 167 } 168 l += k - 1; 169 ipvt[k-1] = l; 170 171 if (a[l + k3] == 0.0) { 172 if (shift == 0.0) { 173 if (allowzeropivot) { 174 PetscCall(PetscInfo(NULL,"Zero pivot, row %" PetscInt_FMT "\n",k-1)); 175 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 176 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT,k-1); 177 } else { 178 a[l + k3] = shift; 179 } 180 } 181 182 /* interchange if necessary */ 183 if (l != k) { 184 stmp = a[l + k3]; 185 a[l + k3] = a[k4]; 186 a[k4] = stmp; 187 } 188 189 /* compute multipliers */ 190 stmp = -1. / a[k4]; 191 i__2 = 9 - k; 192 aa = &a[1 + k4]; 193 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 194 195 /* row elimination with column indexing */ 196 ax = &a[k4+1]; 197 for (j = kp1; j <= 9; ++j) { 198 j3 = 9*j; 199 stmp = a[l + j3]; 200 if (l != k) { 201 a[l + j3] = a[k + j3]; 202 a[k + j3] = stmp; 203 } 204 205 i__3 = 9 - k; 206 ay = &a[1+k+j3]; 207 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 208 } 209 } 210 ipvt[8] = 9; 211 if (a[90] == 0.0) { 212 if (PetscLikely(allowzeropivot)) { 213 PetscCall(PetscInfo(NULL,"Zero pivot, row 8\n")); 214 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 215 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row 8"); 216 } 217 218 /* Now form the inverse */ 219 /* compute inverse(u) */ 220 for (k = 1; k <= 9; ++k) { 221 k3 = 9*k; 222 k4 = k3 + k; 223 a[k4] = 1.0 / a[k4]; 224 stmp = -a[k4]; 225 i__2 = k - 1; 226 aa = &a[k3 + 1]; 227 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 228 kp1 = k + 1; 229 if (9 < kp1) continue; 230 ax = aa; 231 for (j = kp1; j <= 9; ++j) { 232 j3 = 9*j; 233 stmp = a[k + j3]; 234 a[k + j3] = 0.0; 235 ay = &a[j3 + 1]; 236 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 237 } 238 } 239 240 /* form inverse(u)*inverse(l) */ 241 for (kb = 1; kb <= 8; ++kb) { 242 k = 9 - kb; 243 k3 = 9*k; 244 kp1 = k + 1; 245 aa = a + k3; 246 for (i = kp1; i <= 9; ++i) { 247 work[i-1] = aa[i]; 248 aa[i] = 0.0; 249 } 250 for (j = kp1; j <= 9; ++j) { 251 stmp = work[j-1]; 252 ax = &a[9*j + 1]; 253 ay = &a[k3 + 1]; 254 ay[0] += stmp*ax[0]; 255 ay[1] += stmp*ax[1]; 256 ay[2] += stmp*ax[2]; 257 ay[3] += stmp*ax[3]; 258 ay[4] += stmp*ax[4]; 259 ay[5] += stmp*ax[5]; 260 ay[6] += stmp*ax[6]; 261 ay[7] += stmp*ax[7]; 262 ay[8] += stmp*ax[8]; 263 } 264 l = ipvt[k-1]; 265 if (l != k) { 266 ax = &a[k3 + 1]; 267 ay = &a[9*l + 1]; 268 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 269 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 270 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 271 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 272 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 273 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 274 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 275 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 276 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 /* 283 Inverts 15 by 15 matrix using gaussian elimination with partial pivoting. 284 285 Used by the sparse factorization routines in 286 src/mat/impls/baij/seq 287 288 This is a combination of the Linpack routines 289 dgefa() and dgedi() specialized for a size of 15. 290 291 */ 292 293 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected) 294 { 295 PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3; 296 PetscInt k4,j3; 297 MatScalar *aa,*ax,*ay,stmp; 298 MatReal tmp,max; 299 300 PetscFunctionBegin; 301 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 302 303 /* Parameter adjustments */ 304 a -= 16; 305 306 for (k = 1; k <= 14; ++k) { 307 kp1 = k + 1; 308 k3 = 15*k; 309 k4 = k3 + k; 310 311 /* find l = pivot index */ 312 i__2 = 16 - k; 313 aa = &a[k4]; 314 max = PetscAbsScalar(aa[0]); 315 l = 1; 316 for (ll=1; ll<i__2; ll++) { 317 tmp = PetscAbsScalar(aa[ll]); 318 if (tmp > max) { max = tmp; l = ll+1;} 319 } 320 l += k - 1; 321 ipvt[k-1] = l; 322 323 if (a[l + k3] == 0.0) { 324 if (shift == 0.0) { 325 if (allowzeropivot) { 326 PetscCall(PetscInfo(NULL,"Zero pivot, row %" PetscInt_FMT "\n",k-1)); 327 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 328 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT,k-1); 329 } else { 330 a[l + k3] = shift; 331 } 332 } 333 334 /* interchange if necessary */ 335 if (l != k) { 336 stmp = a[l + k3]; 337 a[l + k3] = a[k4]; 338 a[k4] = stmp; 339 } 340 341 /* compute multipliers */ 342 stmp = -1. / a[k4]; 343 i__2 = 15 - k; 344 aa = &a[1 + k4]; 345 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 346 347 /* row elimination with column indexing */ 348 ax = &a[k4+1]; 349 for (j = kp1; j <= 15; ++j) { 350 j3 = 15*j; 351 stmp = a[l + j3]; 352 if (l != k) { 353 a[l + j3] = a[k + j3]; 354 a[k + j3] = stmp; 355 } 356 357 i__3 = 15 - k; 358 ay = &a[1+k+j3]; 359 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 360 } 361 } 362 ipvt[14] = 15; 363 if (a[240] == 0.0) { 364 if (PetscLikely(allowzeropivot)) { 365 PetscCall(PetscInfo(NULL,"Zero pivot, row 14\n")); 366 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; 367 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row 14"); 368 } 369 370 /* Now form the inverse */ 371 /* compute inverse(u) */ 372 for (k = 1; k <= 15; ++k) { 373 k3 = 15*k; 374 k4 = k3 + k; 375 a[k4] = 1.0 / a[k4]; 376 stmp = -a[k4]; 377 i__2 = k - 1; 378 aa = &a[k3 + 1]; 379 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 380 kp1 = k + 1; 381 if (15 < kp1) continue; 382 ax = aa; 383 for (j = kp1; j <= 15; ++j) { 384 j3 = 15*j; 385 stmp = a[k + j3]; 386 a[k + j3] = 0.0; 387 ay = &a[j3 + 1]; 388 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 389 } 390 } 391 392 /* form inverse(u)*inverse(l) */ 393 for (kb = 1; kb <= 14; ++kb) { 394 k = 15 - kb; 395 k3 = 15*k; 396 kp1 = k + 1; 397 aa = a + k3; 398 for (i = kp1; i <= 15; ++i) { 399 work[i-1] = aa[i]; 400 aa[i] = 0.0; 401 } 402 for (j = kp1; j <= 15; ++j) { 403 stmp = work[j-1]; 404 ax = &a[15*j + 1]; 405 ay = &a[k3 + 1]; 406 ay[0] += stmp*ax[0]; 407 ay[1] += stmp*ax[1]; 408 ay[2] += stmp*ax[2]; 409 ay[3] += stmp*ax[3]; 410 ay[4] += stmp*ax[4]; 411 ay[5] += stmp*ax[5]; 412 ay[6] += stmp*ax[6]; 413 ay[7] += stmp*ax[7]; 414 ay[8] += stmp*ax[8]; 415 ay[9] += stmp*ax[9]; 416 ay[10] += stmp*ax[10]; 417 ay[11] += stmp*ax[11]; 418 ay[12] += stmp*ax[12]; 419 ay[13] += stmp*ax[13]; 420 ay[14] += stmp*ax[14]; 421 } 422 l = ipvt[k-1]; 423 if (l != k) { 424 ax = &a[k3 + 1]; 425 ay = &a[15*l + 1]; 426 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 427 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 428 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 429 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 430 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 431 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 432 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 433 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 434 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 435 stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp; 436 stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp; 437 stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp; 438 stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp; 439 stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp; 440 stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp; 441 } 442 } 443 PetscFunctionReturn(0); 444 } 445