xref: /petsc/src/mat/impls/baij/seq/dgefa2.c (revision df4cd43f92eaa320656440c40edb1046daee8f75)
1 
2 /*
3      Inverts 2 by 2 matrix using gaussian elimination with partial pivoting.
4 
5        Used by the sparse factorization routines in
6      src/mat/impls/baij/seq
7 
8        This is a combination of the Linpack routines
9     dgefa() and dgedi() specialized for a size of 2.
10 
11 */
12 #include <petscsys.h>
13 
14 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected)
15 {
16   PetscInt   i__2, i__3, kp1, j, k, l, ll, i, ipvt[2], k3;
17   PetscInt   k4, j3;
18   MatScalar *aa, *ax, *ay, work[4], stmp;
19   MatReal    tmp, max;
20 
21   PetscFunctionBegin;
22   if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
23   shift = .25 * shift * (1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3]));
24 
25   /* Parameter adjustments */
26   a -= 3;
27 
28   k   = 1;
29   kp1 = k + 1;
30   k3  = 2 * k;
31   k4  = k3 + k;
32 
33   /* find l = pivot index */
34   i__2 = 3 - k;
35   aa   = &a[k4];
36   max  = PetscAbsScalar(aa[0]);
37   l    = 1;
38   for (ll = 1; ll < i__2; ll++) {
39     tmp = PetscAbsScalar(aa[ll]);
40     if (tmp > max) {
41       max = tmp;
42       l   = ll + 1;
43     }
44   }
45   l += k - 1;
46   ipvt[k - 1] = l;
47 
48   if (a[l + k3] == 0.0) {
49     if (shift == 0.0) {
50       if (allowzeropivot) {
51         PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1));
52         if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
53       } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1);
54     } else {
55       a[l + k3] = shift;
56     }
57   }
58 
59   /* interchange if necessary */
60   if (l != k) {
61     stmp      = a[l + k3];
62     a[l + k3] = a[k4];
63     a[k4]     = stmp;
64   }
65 
66   /* compute multipliers */
67   stmp = -1. / a[k4];
68   i__2 = 2 - k;
69   aa   = &a[1 + k4];
70   for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
71 
72   /* row elimination with column indexing */
73   ax = &a[k4 + 1];
74   for (j = kp1; j <= 2; ++j) {
75     j3   = 2 * j;
76     stmp = a[l + j3];
77     if (l != k) {
78       a[l + j3] = a[k + j3];
79       a[k + j3] = stmp;
80     }
81 
82     i__3 = 2 - k;
83     ay   = &a[1 + k + j3];
84     for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll];
85   }
86 
87   ipvt[1] = 2;
88   if (a[6] == 0.0) {
89     if (PetscLikely(allowzeropivot)) {
90       PetscCall(PetscInfo(NULL, "Zero pivot, row 1\n"));
91       if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
92     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 1");
93   }
94 
95   /* Now form the inverse */
96   /* compute inverse(u) */
97   for (k = 1; k <= 2; ++k) {
98     k3    = 2 * k;
99     k4    = k3 + k;
100     a[k4] = 1.0 / a[k4];
101     stmp  = -a[k4];
102     i__2  = k - 1;
103     aa    = &a[k3 + 1];
104     for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
105     kp1 = k + 1;
106     if (2 < kp1) continue;
107     ax = aa;
108     for (j = kp1; j <= 2; ++j) {
109       j3        = 2 * j;
110       stmp      = a[k + j3];
111       a[k + j3] = 0.0;
112       ay        = &a[j3 + 1];
113       for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll];
114     }
115   }
116 
117   /* form inverse(u)*inverse(l) */
118   k   = 1;
119   k3  = 2 * k;
120   kp1 = k + 1;
121   aa  = a + k3;
122   for (i = kp1; i <= 2; ++i) {
123     work[i - 1] = aa[i];
124     aa[i]       = 0.0;
125   }
126   for (j = kp1; j <= 2; ++j) {
127     stmp = work[j - 1];
128     ax   = &a[2 * j + 1];
129     ay   = &a[k3 + 1];
130     ay[0] += stmp * ax[0];
131     ay[1] += stmp * ax[1];
132   }
133   l = ipvt[k - 1];
134   if (l != k) {
135     ax    = &a[k3 + 1];
136     ay    = &a[2 * l + 1];
137     stmp  = ax[0];
138     ax[0] = ay[0];
139     ay[0] = stmp;
140     stmp  = ax[1];
141     ax[1] = ay[1];
142     ay[1] = stmp;
143   }
144   PetscFunctionReturn(PETSC_SUCCESS);
145 }
146 
147 /* gaussian elimination with partial pivoting */
148 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected)
149 {
150   PetscInt   i__2, i__3, kp1, j, k, l, ll, i, ipvt[9], kb, k3;
151   PetscInt   k4, j3;
152   MatScalar *aa, *ax, *ay, work[81], stmp;
153   MatReal    tmp, max;
154 
155   PetscFunctionBegin;
156   if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
157 
158   /* Parameter adjustments */
159   a -= 10;
160 
161   for (k = 1; k <= 8; ++k) {
162     kp1 = k + 1;
163     k3  = 9 * k;
164     k4  = k3 + k;
165 
166     /* find l = pivot index */
167     i__2 = 10 - k;
168     aa   = &a[k4];
169     max  = PetscAbsScalar(aa[0]);
170     l    = 1;
171     for (ll = 1; ll < i__2; ll++) {
172       tmp = PetscAbsScalar(aa[ll]);
173       if (tmp > max) {
174         max = tmp;
175         l   = ll + 1;
176       }
177     }
178     l += k - 1;
179     ipvt[k - 1] = l;
180 
181     if (a[l + k3] == 0.0) {
182       if (shift == 0.0) {
183         if (allowzeropivot) {
184           PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1));
185           if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
186         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1);
187       } else {
188         a[l + k3] = shift;
189       }
190     }
191 
192     /* interchange if necessary */
193     if (l != k) {
194       stmp      = a[l + k3];
195       a[l + k3] = a[k4];
196       a[k4]     = stmp;
197     }
198 
199     /* compute multipliers */
200     stmp = -1. / a[k4];
201     i__2 = 9 - k;
202     aa   = &a[1 + k4];
203     for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
204 
205     /* row elimination with column indexing */
206     ax = &a[k4 + 1];
207     for (j = kp1; j <= 9; ++j) {
208       j3   = 9 * j;
209       stmp = a[l + j3];
210       if (l != k) {
211         a[l + j3] = a[k + j3];
212         a[k + j3] = stmp;
213       }
214 
215       i__3 = 9 - k;
216       ay   = &a[1 + k + j3];
217       for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll];
218     }
219   }
220   ipvt[8] = 9;
221   if (a[90] == 0.0) {
222     if (PetscLikely(allowzeropivot)) {
223       PetscCall(PetscInfo(NULL, "Zero pivot, row 8\n"));
224       if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
225     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 8");
226   }
227 
228   /* Now form the inverse */
229   /* compute inverse(u) */
230   for (k = 1; k <= 9; ++k) {
231     k3    = 9 * k;
232     k4    = k3 + k;
233     a[k4] = 1.0 / a[k4];
234     stmp  = -a[k4];
235     i__2  = k - 1;
236     aa    = &a[k3 + 1];
237     for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
238     kp1 = k + 1;
239     if (9 < kp1) continue;
240     ax = aa;
241     for (j = kp1; j <= 9; ++j) {
242       j3        = 9 * j;
243       stmp      = a[k + j3];
244       a[k + j3] = 0.0;
245       ay        = &a[j3 + 1];
246       for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll];
247     }
248   }
249 
250   /* form inverse(u)*inverse(l) */
251   for (kb = 1; kb <= 8; ++kb) {
252     k   = 9 - kb;
253     k3  = 9 * k;
254     kp1 = k + 1;
255     aa  = a + k3;
256     for (i = kp1; i <= 9; ++i) {
257       work[i - 1] = aa[i];
258       aa[i]       = 0.0;
259     }
260     for (j = kp1; j <= 9; ++j) {
261       stmp = work[j - 1];
262       ax   = &a[9 * j + 1];
263       ay   = &a[k3 + 1];
264       ay[0] += stmp * ax[0];
265       ay[1] += stmp * ax[1];
266       ay[2] += stmp * ax[2];
267       ay[3] += stmp * ax[3];
268       ay[4] += stmp * ax[4];
269       ay[5] += stmp * ax[5];
270       ay[6] += stmp * ax[6];
271       ay[7] += stmp * ax[7];
272       ay[8] += stmp * ax[8];
273     }
274     l = ipvt[k - 1];
275     if (l != k) {
276       ax    = &a[k3 + 1];
277       ay    = &a[9 * l + 1];
278       stmp  = ax[0];
279       ax[0] = ay[0];
280       ay[0] = stmp;
281       stmp  = ax[1];
282       ax[1] = ay[1];
283       ay[1] = stmp;
284       stmp  = ax[2];
285       ax[2] = ay[2];
286       ay[2] = stmp;
287       stmp  = ax[3];
288       ax[3] = ay[3];
289       ay[3] = stmp;
290       stmp  = ax[4];
291       ax[4] = ay[4];
292       ay[4] = stmp;
293       stmp  = ax[5];
294       ax[5] = ay[5];
295       ay[5] = stmp;
296       stmp  = ax[6];
297       ax[6] = ay[6];
298       ay[6] = stmp;
299       stmp  = ax[7];
300       ax[7] = ay[7];
301       ay[7] = stmp;
302       stmp  = ax[8];
303       ax[8] = ay[8];
304       ay[8] = stmp;
305     }
306   }
307   PetscFunctionReturn(PETSC_SUCCESS);
308 }
309 
310 /*
311       Inverts 15 by 15 matrix using gaussian elimination with partial pivoting.
312 
313        Used by the sparse factorization routines in
314      src/mat/impls/baij/seq
315 
316        This is a combination of the Linpack routines
317     dgefa() and dgedi() specialized for a size of 15.
318 
319 */
320 
321 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a, PetscInt *ipvt, MatScalar *work, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected)
322 {
323   PetscInt   i__2, i__3, kp1, j, k, l, ll, i, kb, k3;
324   PetscInt   k4, j3;
325   MatScalar *aa, *ax, *ay, stmp;
326   MatReal    tmp, max;
327 
328   PetscFunctionBegin;
329   if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
330 
331   /* Parameter adjustments */
332   a -= 16;
333 
334   for (k = 1; k <= 14; ++k) {
335     kp1 = k + 1;
336     k3  = 15 * k;
337     k4  = k3 + k;
338 
339     /* find l = pivot index */
340     i__2 = 16 - k;
341     aa   = &a[k4];
342     max  = PetscAbsScalar(aa[0]);
343     l    = 1;
344     for (ll = 1; ll < i__2; ll++) {
345       tmp = PetscAbsScalar(aa[ll]);
346       if (tmp > max) {
347         max = tmp;
348         l   = ll + 1;
349       }
350     }
351     l += k - 1;
352     ipvt[k - 1] = l;
353 
354     if (a[l + k3] == 0.0) {
355       if (shift == 0.0) {
356         if (allowzeropivot) {
357           PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1));
358           if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
359         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1);
360       } else {
361         a[l + k3] = shift;
362       }
363     }
364 
365     /* interchange if necessary */
366     if (l != k) {
367       stmp      = a[l + k3];
368       a[l + k3] = a[k4];
369       a[k4]     = stmp;
370     }
371 
372     /* compute multipliers */
373     stmp = -1. / a[k4];
374     i__2 = 15 - k;
375     aa   = &a[1 + k4];
376     for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
377 
378     /* row elimination with column indexing */
379     ax = &a[k4 + 1];
380     for (j = kp1; j <= 15; ++j) {
381       j3   = 15 * j;
382       stmp = a[l + j3];
383       if (l != k) {
384         a[l + j3] = a[k + j3];
385         a[k + j3] = stmp;
386       }
387 
388       i__3 = 15 - k;
389       ay   = &a[1 + k + j3];
390       for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll];
391     }
392   }
393   ipvt[14] = 15;
394   if (a[240] == 0.0) {
395     if (PetscLikely(allowzeropivot)) {
396       PetscCall(PetscInfo(NULL, "Zero pivot, row 14\n"));
397       if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
398     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 14");
399   }
400 
401   /* Now form the inverse */
402   /* compute inverse(u) */
403   for (k = 1; k <= 15; ++k) {
404     k3    = 15 * k;
405     k4    = k3 + k;
406     a[k4] = 1.0 / a[k4];
407     stmp  = -a[k4];
408     i__2  = k - 1;
409     aa    = &a[k3 + 1];
410     for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
411     kp1 = k + 1;
412     if (15 < kp1) continue;
413     ax = aa;
414     for (j = kp1; j <= 15; ++j) {
415       j3        = 15 * j;
416       stmp      = a[k + j3];
417       a[k + j3] = 0.0;
418       ay        = &a[j3 + 1];
419       for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll];
420     }
421   }
422 
423   /* form inverse(u)*inverse(l) */
424   for (kb = 1; kb <= 14; ++kb) {
425     k   = 15 - kb;
426     k3  = 15 * k;
427     kp1 = k + 1;
428     aa  = a + k3;
429     for (i = kp1; i <= 15; ++i) {
430       work[i - 1] = aa[i];
431       aa[i]       = 0.0;
432     }
433     for (j = kp1; j <= 15; ++j) {
434       stmp = work[j - 1];
435       ax   = &a[15 * j + 1];
436       ay   = &a[k3 + 1];
437       ay[0] += stmp * ax[0];
438       ay[1] += stmp * ax[1];
439       ay[2] += stmp * ax[2];
440       ay[3] += stmp * ax[3];
441       ay[4] += stmp * ax[4];
442       ay[5] += stmp * ax[5];
443       ay[6] += stmp * ax[6];
444       ay[7] += stmp * ax[7];
445       ay[8] += stmp * ax[8];
446       ay[9] += stmp * ax[9];
447       ay[10] += stmp * ax[10];
448       ay[11] += stmp * ax[11];
449       ay[12] += stmp * ax[12];
450       ay[13] += stmp * ax[13];
451       ay[14] += stmp * ax[14];
452     }
453     l = ipvt[k - 1];
454     if (l != k) {
455       ax     = &a[k3 + 1];
456       ay     = &a[15 * l + 1];
457       stmp   = ax[0];
458       ax[0]  = ay[0];
459       ay[0]  = stmp;
460       stmp   = ax[1];
461       ax[1]  = ay[1];
462       ay[1]  = stmp;
463       stmp   = ax[2];
464       ax[2]  = ay[2];
465       ay[2]  = stmp;
466       stmp   = ax[3];
467       ax[3]  = ay[3];
468       ay[3]  = stmp;
469       stmp   = ax[4];
470       ax[4]  = ay[4];
471       ay[4]  = stmp;
472       stmp   = ax[5];
473       ax[5]  = ay[5];
474       ay[5]  = stmp;
475       stmp   = ax[6];
476       ax[6]  = ay[6];
477       ay[6]  = stmp;
478       stmp   = ax[7];
479       ax[7]  = ay[7];
480       ay[7]  = stmp;
481       stmp   = ax[8];
482       ax[8]  = ay[8];
483       ay[8]  = stmp;
484       stmp   = ax[9];
485       ax[9]  = ay[9];
486       ay[9]  = stmp;
487       stmp   = ax[10];
488       ax[10] = ay[10];
489       ay[10] = stmp;
490       stmp   = ax[11];
491       ax[11] = ay[11];
492       ay[11] = stmp;
493       stmp   = ax[12];
494       ax[12] = ay[12];
495       ay[12] = stmp;
496       stmp   = ax[13];
497       ax[13] = ay[13];
498       ay[13] = stmp;
499       stmp   = ax[14];
500       ax[14] = ay[14];
501       ay[14] = stmp;
502     }
503   }
504   PetscFunctionReturn(PETSC_SUCCESS);
505 }
506