1 /*
2 Inverts 2 by 2 matrix using gaussian elimination with partial pivoting.
3
4 Used by the sparse factorization routines in
5 src/mat/impls/baij/seq
6
7 This is a combination of the Linpack routines
8 dgefa() and dgedi() specialized for a size of 2.
9
10 */
11 #include <petscsys.h>
12 #include <petsc/private/kernels/blockinvert.h>
13
PetscKernel_A_gets_inverse_A_2(MatScalar * a,PetscReal shift,PetscBool allowzeropivot,PetscBool * zeropivotdetected)14 PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected)
15 {
16 PetscInt i__2, i__3, kp1, j, k, l, ll, i, ipvt[2], k3;
17 PetscInt k4, j3;
18 MatScalar *aa, *ax, *ay, work[4], stmp;
19 MatReal tmp, max;
20
21 PetscFunctionBegin;
22 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
23 shift = .25 * shift * (1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3]));
24
25 /* Parameter adjustments */
26 a -= 3;
27
28 k = 1;
29 kp1 = k + 1;
30 k3 = 2 * k;
31 k4 = k3 + k;
32
33 /* find l = pivot index */
34 i__2 = 3 - k;
35 aa = &a[k4];
36 max = PetscAbsScalar(aa[0]);
37 l = 1;
38 for (ll = 1; ll < i__2; ll++) {
39 tmp = PetscAbsScalar(aa[ll]);
40 if (tmp > max) {
41 max = tmp;
42 l = ll + 1;
43 }
44 }
45 l += k - 1;
46 ipvt[k - 1] = l;
47
48 if (a[l + k3] == 0.0) {
49 if (shift == 0.0) {
50 PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1);
51 PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1));
52 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
53 } else {
54 a[l + k3] = shift;
55 }
56 }
57
58 /* interchange if necessary */
59 if (l != k) {
60 stmp = a[l + k3];
61 a[l + k3] = a[k4];
62 a[k4] = stmp;
63 }
64
65 /* compute multipliers */
66 stmp = -1. / a[k4];
67 i__2 = 2 - k;
68 aa = &a[1 + k4];
69 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
70
71 /* row elimination with column indexing */
72 ax = &a[k4 + 1];
73 for (j = kp1; j <= 2; ++j) {
74 j3 = 2 * j;
75 stmp = a[l + j3];
76 if (l != k) {
77 a[l + j3] = a[k + j3];
78 a[k + j3] = stmp;
79 }
80
81 i__3 = 2 - k;
82 ay = &a[1 + k + j3];
83 for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll];
84 }
85
86 ipvt[1] = 2;
87 if (a[6] == 0.0) {
88 PetscCheck(PetscLikely(allowzeropivot), PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 1");
89 PetscCall(PetscInfo(NULL, "Zero pivot, row 1\n"));
90 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
91 }
92
93 /* Now form the inverse */
94 /* compute inverse(u) */
95 for (k = 1; k <= 2; ++k) {
96 k3 = 2 * k;
97 k4 = k3 + k;
98 a[k4] = 1.0 / a[k4];
99 stmp = -a[k4];
100 i__2 = k - 1;
101 aa = &a[k3 + 1];
102 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
103 kp1 = k + 1;
104 if (2 < kp1) continue;
105 ax = aa;
106 for (j = kp1; j <= 2; ++j) {
107 j3 = 2 * j;
108 stmp = a[k + j3];
109 a[k + j3] = 0.0;
110 ay = &a[j3 + 1];
111 for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll];
112 }
113 }
114
115 /* form inverse(u)*inverse(l) */
116 k = 1;
117 k3 = 2 * k;
118 kp1 = k + 1;
119 aa = a + k3;
120 for (i = kp1; i <= 2; ++i) {
121 work[i - 1] = aa[i];
122 aa[i] = 0.0;
123 }
124 for (j = kp1; j <= 2; ++j) {
125 stmp = work[j - 1];
126 ax = &a[2 * j + 1];
127 ay = &a[k3 + 1];
128 ay[0] += stmp * ax[0];
129 ay[1] += stmp * ax[1];
130 }
131 l = ipvt[k - 1];
132 if (l != k) {
133 ax = &a[k3 + 1];
134 ay = &a[2 * l + 1];
135 stmp = ax[0];
136 ax[0] = ay[0];
137 ay[0] = stmp;
138 stmp = ax[1];
139 ax[1] = ay[1];
140 ay[1] = stmp;
141 }
142 PetscFunctionReturn(PETSC_SUCCESS);
143 }
144
145 /* Gaussian elimination with partial pivoting */
PetscKernel_A_gets_inverse_A_9(MatScalar * a,PetscReal shift,PetscBool allowzeropivot,PetscBool * zeropivotdetected)146 PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected)
147 {
148 PetscInt i__2, i__3, kp1, j, k, l, ll, i, ipvt[9], kb, k3;
149 PetscInt k4, j3;
150 MatScalar *aa, *ax, *ay, work[81], stmp;
151 MatReal tmp, max;
152
153 PetscFunctionBegin;
154 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
155
156 /* Parameter adjustments */
157 a -= 10;
158
159 for (k = 1; k <= 8; ++k) {
160 kp1 = k + 1;
161 k3 = 9 * k;
162 k4 = k3 + k;
163
164 /* find l = pivot index */
165 i__2 = 10 - k;
166 aa = &a[k4];
167 max = PetscAbsScalar(aa[0]);
168 l = 1;
169 for (ll = 1; ll < i__2; ll++) {
170 tmp = PetscAbsScalar(aa[ll]);
171 if (tmp > max) {
172 max = tmp;
173 l = ll + 1;
174 }
175 }
176 l += k - 1;
177 ipvt[k - 1] = l;
178
179 if (a[l + k3] == 0.0) {
180 if (shift == 0.0) {
181 PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1);
182 PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1));
183 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
184 } else {
185 a[l + k3] = shift;
186 }
187 }
188
189 /* interchange if necessary */
190 if (l != k) {
191 stmp = a[l + k3];
192 a[l + k3] = a[k4];
193 a[k4] = stmp;
194 }
195
196 /* compute multipliers */
197 stmp = -1. / a[k4];
198 i__2 = 9 - k;
199 aa = &a[1 + k4];
200 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
201
202 /* row elimination with column indexing */
203 ax = &a[k4 + 1];
204 for (j = kp1; j <= 9; ++j) {
205 j3 = 9 * j;
206 stmp = a[l + j3];
207 if (l != k) {
208 a[l + j3] = a[k + j3];
209 a[k + j3] = stmp;
210 }
211
212 i__3 = 9 - k;
213 ay = &a[1 + k + j3];
214 for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll];
215 }
216 }
217 ipvt[8] = 9;
218 if (a[90] == 0.0) {
219 PetscCheck(PetscLikely(allowzeropivot), PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 8");
220 PetscCall(PetscInfo(NULL, "Zero pivot, row 8\n"));
221 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
222 }
223
224 /* Now form the inverse */
225 /* compute inverse(u) */
226 for (k = 1; k <= 9; ++k) {
227 k3 = 9 * k;
228 k4 = k3 + k;
229 a[k4] = 1.0 / a[k4];
230 stmp = -a[k4];
231 i__2 = k - 1;
232 aa = &a[k3 + 1];
233 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
234 kp1 = k + 1;
235 if (9 < kp1) continue;
236 ax = aa;
237 for (j = kp1; j <= 9; ++j) {
238 j3 = 9 * j;
239 stmp = a[k + j3];
240 a[k + j3] = 0.0;
241 ay = &a[j3 + 1];
242 for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll];
243 }
244 }
245
246 /* form inverse(u)*inverse(l) */
247 for (kb = 1; kb <= 8; ++kb) {
248 k = 9 - kb;
249 k3 = 9 * k;
250 kp1 = k + 1;
251 aa = a + k3;
252 for (i = kp1; i <= 9; ++i) {
253 work[i - 1] = aa[i];
254 aa[i] = 0.0;
255 }
256 for (j = kp1; j <= 9; ++j) {
257 stmp = work[j - 1];
258 ax = &a[9 * j + 1];
259 ay = &a[k3 + 1];
260 ay[0] += stmp * ax[0];
261 ay[1] += stmp * ax[1];
262 ay[2] += stmp * ax[2];
263 ay[3] += stmp * ax[3];
264 ay[4] += stmp * ax[4];
265 ay[5] += stmp * ax[5];
266 ay[6] += stmp * ax[6];
267 ay[7] += stmp * ax[7];
268 ay[8] += stmp * ax[8];
269 }
270 l = ipvt[k - 1];
271 if (l != k) {
272 ax = &a[k3 + 1];
273 ay = &a[9 * l + 1];
274 stmp = ax[0];
275 ax[0] = ay[0];
276 ay[0] = stmp;
277 stmp = ax[1];
278 ax[1] = ay[1];
279 ay[1] = stmp;
280 stmp = ax[2];
281 ax[2] = ay[2];
282 ay[2] = stmp;
283 stmp = ax[3];
284 ax[3] = ay[3];
285 ay[3] = stmp;
286 stmp = ax[4];
287 ax[4] = ay[4];
288 ay[4] = stmp;
289 stmp = ax[5];
290 ax[5] = ay[5];
291 ay[5] = stmp;
292 stmp = ax[6];
293 ax[6] = ay[6];
294 ay[6] = stmp;
295 stmp = ax[7];
296 ax[7] = ay[7];
297 ay[7] = stmp;
298 stmp = ax[8];
299 ax[8] = ay[8];
300 ay[8] = stmp;
301 }
302 }
303 PetscFunctionReturn(PETSC_SUCCESS);
304 }
305
306 /*
307 Inverts 15 by 15 matrix using gaussian elimination with partial pivoting.
308
309 Used by the sparse factorization routines in
310 src/mat/impls/baij/seq
311
312 This is a combination of the Linpack routines
313 dgefa() and dgedi() specialized for a size of 15.
314
315 */
316
PetscKernel_A_gets_inverse_A_15(MatScalar * a,PetscInt * ipvt,MatScalar * work,PetscReal shift,PetscBool allowzeropivot,PetscBool * zeropivotdetected)317 PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a, PetscInt *ipvt, MatScalar *work, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected)
318 {
319 PetscInt i__2, i__3, kp1, j, k, l, ll, i, kb, k3;
320 PetscInt k4, j3;
321 MatScalar *aa, *ax, *ay, stmp;
322 MatReal tmp, max;
323
324 PetscFunctionBegin;
325 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
326
327 /* Parameter adjustments */
328 a -= 16;
329
330 for (k = 1; k <= 14; ++k) {
331 kp1 = k + 1;
332 k3 = 15 * k;
333 k4 = k3 + k;
334
335 /* find l = pivot index */
336 i__2 = 16 - k;
337 aa = &a[k4];
338 max = PetscAbsScalar(aa[0]);
339 l = 1;
340 for (ll = 1; ll < i__2; ll++) {
341 tmp = PetscAbsScalar(aa[ll]);
342 if (tmp > max) {
343 max = tmp;
344 l = ll + 1;
345 }
346 }
347 l += k - 1;
348 ipvt[k - 1] = l;
349
350 if (a[l + k3] == 0.0) {
351 if (shift == 0.0) {
352 PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1);
353 PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1));
354 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
355 } else {
356 a[l + k3] = shift;
357 }
358 }
359
360 /* interchange if necessary */
361 if (l != k) {
362 stmp = a[l + k3];
363 a[l + k3] = a[k4];
364 a[k4] = stmp;
365 }
366
367 /* compute multipliers */
368 stmp = -1. / a[k4];
369 i__2 = 15 - k;
370 aa = &a[1 + k4];
371 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
372
373 /* row elimination with column indexing */
374 ax = &a[k4 + 1];
375 for (j = kp1; j <= 15; ++j) {
376 j3 = 15 * j;
377 stmp = a[l + j3];
378 if (l != k) {
379 a[l + j3] = a[k + j3];
380 a[k + j3] = stmp;
381 }
382
383 i__3 = 15 - k;
384 ay = &a[1 + k + j3];
385 for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll];
386 }
387 }
388 ipvt[14] = 15;
389 if (a[240] == 0.0) {
390 PetscCheck(PetscLikely(allowzeropivot), PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 14");
391 PetscCall(PetscInfo(NULL, "Zero pivot, row 14\n"));
392 if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
393 }
394
395 /* Now form the inverse */
396 /* compute inverse(u) */
397 for (k = 1; k <= 15; ++k) {
398 k3 = 15 * k;
399 k4 = k3 + k;
400 a[k4] = 1.0 / a[k4];
401 stmp = -a[k4];
402 i__2 = k - 1;
403 aa = &a[k3 + 1];
404 for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
405 kp1 = k + 1;
406 if (15 < kp1) continue;
407 ax = aa;
408 for (j = kp1; j <= 15; ++j) {
409 j3 = 15 * j;
410 stmp = a[k + j3];
411 a[k + j3] = 0.0;
412 ay = &a[j3 + 1];
413 for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll];
414 }
415 }
416
417 /* form inverse(u)*inverse(l) */
418 for (kb = 1; kb <= 14; ++kb) {
419 k = 15 - kb;
420 k3 = 15 * k;
421 kp1 = k + 1;
422 aa = a + k3;
423 for (i = kp1; i <= 15; ++i) {
424 work[i - 1] = aa[i];
425 aa[i] = 0.0;
426 }
427 for (j = kp1; j <= 15; ++j) {
428 stmp = work[j - 1];
429 ax = &a[15 * j + 1];
430 ay = &a[k3 + 1];
431 ay[0] += stmp * ax[0];
432 ay[1] += stmp * ax[1];
433 ay[2] += stmp * ax[2];
434 ay[3] += stmp * ax[3];
435 ay[4] += stmp * ax[4];
436 ay[5] += stmp * ax[5];
437 ay[6] += stmp * ax[6];
438 ay[7] += stmp * ax[7];
439 ay[8] += stmp * ax[8];
440 ay[9] += stmp * ax[9];
441 ay[10] += stmp * ax[10];
442 ay[11] += stmp * ax[11];
443 ay[12] += stmp * ax[12];
444 ay[13] += stmp * ax[13];
445 ay[14] += stmp * ax[14];
446 }
447 l = ipvt[k - 1];
448 if (l != k) {
449 ax = &a[k3 + 1];
450 ay = &a[15 * l + 1];
451 stmp = ax[0];
452 ax[0] = ay[0];
453 ay[0] = stmp;
454 stmp = ax[1];
455 ax[1] = ay[1];
456 ay[1] = stmp;
457 stmp = ax[2];
458 ax[2] = ay[2];
459 ay[2] = stmp;
460 stmp = ax[3];
461 ax[3] = ay[3];
462 ay[3] = stmp;
463 stmp = ax[4];
464 ax[4] = ay[4];
465 ay[4] = stmp;
466 stmp = ax[5];
467 ax[5] = ay[5];
468 ay[5] = stmp;
469 stmp = ax[6];
470 ax[6] = ay[6];
471 ay[6] = stmp;
472 stmp = ax[7];
473 ax[7] = ay[7];
474 ay[7] = stmp;
475 stmp = ax[8];
476 ax[8] = ay[8];
477 ay[8] = stmp;
478 stmp = ax[9];
479 ax[9] = ay[9];
480 ay[9] = stmp;
481 stmp = ax[10];
482 ax[10] = ay[10];
483 ay[10] = stmp;
484 stmp = ax[11];
485 ax[11] = ay[11];
486 ay[11] = stmp;
487 stmp = ax[12];
488 ax[12] = ay[12];
489 ay[12] = stmp;
490 stmp = ax[13];
491 ax[13] = ay[13];
492 ay[13] = stmp;
493 stmp = ax[14];
494 ax[14] = ay[14];
495 ay[14] = stmp;
496 }
497 }
498 PetscFunctionReturn(PETSC_SUCCESS);
499 }
500