xref: /petsc/src/mat/impls/baij/seq/dgefa2.c (revision 5c8f6a953e7ed1c81f507d64aebddb11080b60e9)
1 
2 /*
3      Inverts 2 by 2 matrix using partial pivoting.
4 
5        Used by the sparse factorization routines in
6      src/mat/impls/baij/seq
7 
8 
9        This is a combination of the Linpack routines
10     dgefa() and dgedi() specialized for a size of 2.
11 
12 */
13 #include <petscsys.h>
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_2"
17 PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift)
18 {
19     PetscInt   i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3;
20     PetscInt   k4,j3;
21     MatScalar  *aa,*ax,*ay,work[4],stmp;
22     MatReal    tmp,max;
23 
24 /*     gaussian elimination with partial pivoting */
25 
26     PetscFunctionBegin;
27     shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3]));
28     /* Parameter adjustments */
29     a       -= 3;
30 
31     /*for (k = 1; k <= 1; ++k) {*/
32         k   = 1;
33         kp1 = k + 1;
34         k3  = 2*k;
35         k4  = k3 + k;
36 /*        find l = pivot index */
37 
38         i__2 = 3 - k;
39         aa = &a[k4];
40         max = PetscAbsScalar(aa[0]);
41         l = 1;
42         for (ll=1; ll<i__2; ll++) {
43           tmp = PetscAbsScalar(aa[ll]);
44           if (tmp > max) { max = tmp; l = ll+1;}
45         }
46         l       += k - 1;
47         ipvt[k-1] = l;
48 
49         if (a[l + k3] == 0.0) {
50           if (shift == 0.0) {
51             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
52           } else {
53             a[l + k3] = shift;
54           }
55         }
56 
57 /*           interchange if necessary */
58 
59         if (l != k) {
60           stmp      = a[l + k3];
61           a[l + k3] = a[k4];
62           a[k4]     = stmp;
63         }
64 
65 /*           compute multipliers */
66 
67         stmp = -1. / a[k4];
68         i__2 = 2 - k;
69         aa = &a[1 + k4];
70         for (ll=0; ll<i__2; ll++) {
71           aa[ll] *= stmp;
72         }
73 
74 /*           row elimination with column indexing */
75 
76         ax = &a[k4+1];
77         for (j = kp1; j <= 2; ++j) {
78             j3   = 2*j;
79             stmp = a[l + j3];
80             if (l != k) {
81               a[l + j3] = a[k + j3];
82               a[k + j3] = stmp;
83             }
84 
85             i__3 = 2 - k;
86             ay = &a[1+k+j3];
87             for (ll=0; ll<i__3; ll++) {
88               ay[ll] += stmp*ax[ll];
89             }
90         }
91     /*}*/
92     ipvt[1] = 2;
93     if (a[6] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1);
94 
95     /*
96          Now form the inverse
97     */
98 
99    /*     compute inverse(u) */
100 
101     for (k = 1; k <= 2; ++k) {
102         k3    = 2*k;
103         k4    = k3 + k;
104         a[k4] = 1.0 / a[k4];
105         stmp  = -a[k4];
106         i__2  = k - 1;
107         aa    = &a[k3 + 1];
108         for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
109         kp1 = k + 1;
110         if (2 < kp1) continue;
111         ax = aa;
112         for (j = kp1; j <= 2; ++j) {
113             j3        = 2*j;
114             stmp      = a[k + j3];
115             a[k + j3] = 0.0;
116             ay        = &a[j3 + 1];
117             for (ll=0; ll<k; ll++) {
118               ay[ll] += stmp*ax[ll];
119             }
120         }
121     }
122 
123    /*    form inverse(u)*inverse(l) */
124 
125     /*for (kb = 1; kb <= 1; ++kb) {*/
126 
127         k   = 1;
128         k3  = 2*k;
129         kp1 = k + 1;
130         aa  = a + k3;
131         for (i = kp1; i <= 2; ++i) {
132             work[i-1] = aa[i];
133             aa[i]   = 0.0;
134         }
135         for (j = kp1; j <= 2; ++j) {
136             stmp  = work[j-1];
137             ax    = &a[2*j + 1];
138             ay    = &a[k3 + 1];
139             ay[0] += stmp*ax[0];
140             ay[1] += stmp*ax[1];
141         }
142         l = ipvt[k-1];
143         if (l != k) {
144             ax = &a[k3 + 1];
145             ay = &a[2*l + 1];
146             stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
147             stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
148         }
149 
150     PetscFunctionReturn(0);
151 }
152 
153 #undef __FUNCT__
154 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_9"
155 PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift)
156 {
157     PetscInt   i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3;
158     PetscInt   k4,j3;
159     MatScalar  *aa,*ax,*ay,work[81],stmp;
160     MatReal    tmp,max;
161 
162 /*     gaussian elimination with partial pivoting */
163 
164     PetscFunctionBegin;
165     /* Parameter adjustments */
166     a       -= 10;
167 
168     for (k = 1; k <= 8; ++k) {
169         kp1 = k + 1;
170         k3  = 9*k;
171         k4  = k3 + k;
172 /*        find l = pivot index */
173 
174         i__2 = 10 - k;
175         aa = &a[k4];
176         max = PetscAbsScalar(aa[0]);
177         l = 1;
178         for (ll=1; ll<i__2; ll++) {
179           tmp = PetscAbsScalar(aa[ll]);
180           if (tmp > max) { max = tmp; l = ll+1;}
181         }
182         l       += k - 1;
183         ipvt[k-1] = l;
184 
185         if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
186 
187 /*           interchange if necessary */
188 
189         if (l != k) {
190           stmp      = a[l + k3];
191           a[l + k3] = a[k4];
192           a[k4]     = stmp;
193         }
194 
195 /*           compute multipliers */
196 
197         stmp = -1. / a[k4];
198         i__2 = 9 - k;
199         aa = &a[1 + k4];
200         for (ll=0; ll<i__2; ll++) {
201           aa[ll] *= stmp;
202         }
203 
204 /*           row elimination with column indexing */
205 
206         ax = &a[k4+1];
207         for (j = kp1; j <= 9; ++j) {
208             j3   = 9*j;
209             stmp = a[l + j3];
210             if (l != k) {
211               a[l + j3] = a[k + j3];
212               a[k + j3] = stmp;
213             }
214 
215             i__3 = 9 - k;
216             ay = &a[1+k+j3];
217             for (ll=0; ll<i__3; ll++) {
218               ay[ll] += stmp*ax[ll];
219             }
220         }
221     }
222     ipvt[8] = 9;
223     if (a[90] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
224 
225     /*
226          Now form the inverse
227     */
228 
229    /*     compute inverse(u) */
230 
231     for (k = 1; k <= 9; ++k) {
232         k3    = 9*k;
233         k4    = k3 + k;
234         a[k4] = 1.0 / a[k4];
235         stmp  = -a[k4];
236         i__2  = k - 1;
237         aa    = &a[k3 + 1];
238         for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
239         kp1 = k + 1;
240         if (9 < kp1) continue;
241         ax = aa;
242         for (j = kp1; j <= 9; ++j) {
243             j3        = 9*j;
244             stmp      = a[k + j3];
245             a[k + j3] = 0.0;
246             ay        = &a[j3 + 1];
247             for (ll=0; ll<k; ll++) {
248               ay[ll] += stmp*ax[ll];
249             }
250         }
251     }
252 
253    /*    form inverse(u)*inverse(l) */
254 
255     for (kb = 1; kb <= 8; ++kb) {
256         k   = 9 - kb;
257         k3  = 9*k;
258         kp1 = k + 1;
259         aa  = a + k3;
260         for (i = kp1; i <= 9; ++i) {
261             work[i-1] = aa[i];
262             aa[i]   = 0.0;
263         }
264         for (j = kp1; j <= 9; ++j) {
265             stmp  = work[j-1];
266             ax    = &a[9*j + 1];
267             ay    = &a[k3 + 1];
268             ay[0] += stmp*ax[0];
269             ay[1] += stmp*ax[1];
270             ay[2] += stmp*ax[2];
271             ay[3] += stmp*ax[3];
272             ay[4] += stmp*ax[4];
273             ay[5] += stmp*ax[5];
274             ay[6] += stmp*ax[6];
275             ay[7] += stmp*ax[7];
276             ay[8] += stmp*ax[8];
277         }
278         l = ipvt[k-1];
279         if (l != k) {
280             ax = &a[k3 + 1];
281             ay = &a[9*l + 1];
282             stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
283             stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
284             stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
285             stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
286             stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
287             stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
288             stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
289             stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
290             stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
291         }
292     }
293     PetscFunctionReturn(0);
294 }
295 
296 /*
297       Inverts 15 by 15 matrix using partial pivoting.
298 
299        Used by the sparse factorization routines in
300      src/mat/impls/baij/seq
301 
302        This is a combination of the Linpack routines
303     dgefa() and dgedi() specialized for a size of 15.
304 
305 */
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_15"
309 PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift)
310 {
311     PetscInt         i__2,i__3,kp1,j,k,l,ll,i,kb,k3;
312     PetscInt         k4,j3;
313     MatScalar        *aa,*ax,*ay,stmp;
314     MatReal          tmp,max;
315 
316 /*     gaussian elimination with partial pivoting */
317 
318     PetscFunctionBegin;
319     /* Parameter adjustments */
320     a       -= 16;
321 
322     for (k = 1; k <= 14; ++k) {
323         kp1 = k + 1;
324         k3  = 15*k;
325         k4  = k3 + k;
326 /*        find l = pivot index */
327 
328         i__2 = 16 - k;
329         aa = &a[k4];
330         max = PetscAbsScalar(aa[0]);
331         l = 1;
332         for (ll=1; ll<i__2; ll++) {
333           tmp = PetscAbsScalar(aa[ll]);
334           if (tmp > max) { max = tmp; l = ll+1;}
335         }
336         l       += k - 1;
337         ipvt[k-1] = l;
338 
339         if (a[l + k3] == 0.0)  SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
340 
341 /*           interchange if necessary */
342 
343         if (l != k) {
344           stmp      = a[l + k3];
345           a[l + k3] = a[k4];
346           a[k4]     = stmp;
347         }
348 
349 /*           compute multipliers */
350 
351         stmp = -1. / a[k4];
352         i__2 = 15 - k;
353         aa = &a[1 + k4];
354         for (ll=0; ll<i__2; ll++) {
355           aa[ll] *= stmp;
356         }
357 
358 /*           row elimination with column indexing */
359 
360         ax = &a[k4+1];
361         for (j = kp1; j <= 15; ++j) {
362             j3   = 15*j;
363             stmp = a[l + j3];
364             if (l != k) {
365               a[l + j3] = a[k + j3];
366               a[k + j3] = stmp;
367             }
368 
369             i__3 = 15 - k;
370             ay = &a[1+k+j3];
371             for (ll=0; ll<i__3; ll++) {
372               ay[ll] += stmp*ax[ll];
373             }
374         }
375     }
376     ipvt[14] = 15;
377     if (a[240] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
378 
379     /*
380          Now form the inverse
381     */
382 
383    /*     compute inverse(u) */
384 
385     for (k = 1; k <= 15; ++k) {
386         k3    = 15*k;
387         k4    = k3 + k;
388         a[k4] = 1.0 / a[k4];
389         stmp  = -a[k4];
390         i__2  = k - 1;
391         aa    = &a[k3 + 1];
392         for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
393         kp1 = k + 1;
394         if (15 < kp1) continue;
395         ax = aa;
396         for (j = kp1; j <= 15; ++j) {
397             j3        = 15*j;
398             stmp      = a[k + j3];
399             a[k + j3] = 0.0;
400             ay        = &a[j3 + 1];
401             for (ll=0; ll<k; ll++) {
402               ay[ll] += stmp*ax[ll];
403             }
404         }
405     }
406 
407    /*    form inverse(u)*inverse(l) */
408 
409     for (kb = 1; kb <= 14; ++kb) {
410         k   = 15 - kb;
411         k3  = 15*k;
412         kp1 = k + 1;
413         aa  = a + k3;
414         for (i = kp1; i <= 15; ++i) {
415             work[i-1] = aa[i];
416             aa[i]   = 0.0;
417         }
418         for (j = kp1; j <= 15; ++j) {
419             stmp  = work[j-1];
420             ax    = &a[15*j + 1];
421             ay    = &a[k3 + 1];
422             ay[0]  += stmp*ax[0];
423             ay[1]  += stmp*ax[1];
424             ay[2]  += stmp*ax[2];
425             ay[3]  += stmp*ax[3];
426             ay[4]  += stmp*ax[4];
427             ay[5]  += stmp*ax[5];
428             ay[6]  += stmp*ax[6];
429             ay[7]  += stmp*ax[7];
430             ay[8]  += stmp*ax[8];
431             ay[9]  += stmp*ax[9];
432             ay[10] += stmp*ax[10];
433             ay[11] += stmp*ax[11];
434             ay[12] += stmp*ax[12];
435             ay[13] += stmp*ax[13];
436             ay[14] += stmp*ax[14];
437         }
438         l = ipvt[k-1];
439         if (l != k) {
440             ax = &a[k3 + 1];
441             ay = &a[15*l + 1];
442             stmp = ax[0];  ax[0]  = ay[0];  ay[0]  = stmp;
443             stmp = ax[1];  ax[1]  = ay[1];  ay[1]  = stmp;
444             stmp = ax[2];  ax[2]  = ay[2];  ay[2]  = stmp;
445             stmp = ax[3];  ax[3]  = ay[3];  ay[3]  = stmp;
446             stmp = ax[4];  ax[4]  = ay[4];  ay[4]  = stmp;
447             stmp = ax[5];  ax[5]  = ay[5];  ay[5]  = stmp;
448             stmp = ax[6];  ax[6]  = ay[6];  ay[6]  = stmp;
449             stmp = ax[7];  ax[7]  = ay[7];  ay[7]  = stmp;
450             stmp = ax[8];  ax[8]  = ay[8];  ay[8]  = stmp;
451             stmp = ax[9];  ax[9]  = ay[9];  ay[9]  = stmp;
452             stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp;
453             stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp;
454             stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp;
455             stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp;
456             stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp;
457         }
458     }
459     PetscFunctionReturn(0);
460 }
461