xref: /petsc/src/mat/impls/baij/seq/dgefa2.c (revision 2205254efee3a00a594e5e2a3a70f74dcb40bc03)
1 
2 /*
3      Inverts 2 by 2 matrix using partial pivoting.
4 
5        Used by the sparse factorization routines in
6      src/mat/impls/baij/seq
7 
8 
9        This is a combination of the Linpack routines
10     dgefa() and dgedi() specialized for a size of 2.
11 
12 */
13 #include <petscsys.h>
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_2"
17 PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift)
18 {
19   PetscInt   i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3;
20   PetscInt   k4,j3;
21   MatScalar  *aa,*ax,*ay,work[4],stmp;
22   MatReal    tmp,max;
23 
24 /*     gaussian elimination with partial pivoting */
25 
26   PetscFunctionBegin;
27   shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3]));
28   /* Parameter adjustments */
29   a       -= 3;
30 
31   /*for (k = 1; k <= 1; ++k) {*/
32   k   = 1;
33   kp1 = k + 1;
34   k3  = 2*k;
35   k4  = k3 + k;
36 /*        find l = pivot index */
37 
38   i__2 = 3 - k;
39   aa = &a[k4];
40   max = PetscAbsScalar(aa[0]);
41   l = 1;
42   for (ll=1; ll<i__2; ll++) {
43     tmp = PetscAbsScalar(aa[ll]);
44     if (tmp > max) { max = tmp; l = ll+1;}
45   }
46   l       += k - 1;
47   ipvt[k-1] = l;
48 
49   if (a[l + k3] == 0.0) {
50     if (shift == 0.0) {
51       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
52     } else {
53       a[l + k3] = shift;
54     }
55   }
56 
57 /*           interchange if necessary */
58 
59   if (l != k) {
60     stmp      = a[l + k3];
61     a[l + k3] = a[k4];
62     a[k4]     = stmp;
63   }
64 
65 /*           compute multipliers */
66 
67   stmp = -1. / a[k4];
68   i__2 = 2 - k;
69   aa = &a[1 + k4];
70   for (ll=0; ll<i__2; ll++) {
71     aa[ll] *= stmp;
72   }
73 
74 /*           row elimination with column indexing */
75 
76   ax = &a[k4+1];
77   for (j = kp1; j <= 2; ++j) {
78       j3   = 2*j;
79       stmp = a[l + j3];
80       if (l != k) {
81         a[l + j3] = a[k + j3];
82         a[k + j3] = stmp;
83       }
84 
85       i__3 = 2 - k;
86       ay = &a[1+k+j3];
87       for (ll=0; ll<i__3; ll++) {
88         ay[ll] += stmp*ax[ll];
89       }
90   }
91   /*}*/
92   ipvt[1] = 2;
93   if (a[6] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1);
94 
95   /*
96         Now form the inverse
97   */
98 
99   /*     compute inverse(u) */
100 
101   for (k = 1; k <= 2; ++k) {
102     k3    = 2*k;
103     k4    = k3 + k;
104     a[k4] = 1.0 / a[k4];
105     stmp  = -a[k4];
106     i__2  = k - 1;
107     aa    = &a[k3 + 1];
108     for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
109     kp1 = k + 1;
110     if (2 < kp1) continue;
111     ax = aa;
112     for (j = kp1; j <= 2; ++j) {
113       j3        = 2*j;
114       stmp      = a[k + j3];
115       a[k + j3] = 0.0;
116       ay        = &a[j3 + 1];
117       for (ll=0; ll<k; ll++) {
118         ay[ll] += stmp*ax[ll];
119       }
120     }
121   }
122 
123 /*    form inverse(u)*inverse(l) */
124 
125 /*for (kb = 1; kb <= 1; ++kb) {*/
126 
127   k   = 1;
128   k3  = 2*k;
129   kp1 = k + 1;
130   aa  = a + k3;
131   for (i = kp1; i <= 2; ++i) {
132     work[i-1] = aa[i];
133     aa[i]   = 0.0;
134   }
135   for (j = kp1; j <= 2; ++j) {
136     stmp  = work[j-1];
137     ax    = &a[2*j + 1];
138     ay    = &a[k3 + 1];
139     ay[0] += stmp*ax[0];
140     ay[1] += stmp*ax[1];
141   }
142   l = ipvt[k-1];
143   if (l != k) {
144     ax = &a[k3 + 1];
145     ay = &a[2*l + 1];
146     stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
147     stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
148   }
149   PetscFunctionReturn(0);
150 }
151 
152 #undef __FUNCT__
153 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_9"
154 PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift)
155 {
156   PetscInt   i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3;
157   PetscInt   k4,j3;
158   MatScalar  *aa,*ax,*ay,work[81],stmp;
159   MatReal    tmp,max;
160 
161 /*     gaussian elimination with partial pivoting */
162 
163   PetscFunctionBegin;
164   /* Parameter adjustments */
165   a       -= 10;
166 
167   for (k = 1; k <= 8; ++k) {
168     kp1 = k + 1;
169     k3  = 9*k;
170     k4  = k3 + k;
171 /*        find l = pivot index */
172 
173     i__2 = 10 - k;
174     aa = &a[k4];
175     max = PetscAbsScalar(aa[0]);
176     l = 1;
177     for (ll=1; ll<i__2; ll++) {
178       tmp = PetscAbsScalar(aa[ll]);
179       if (tmp > max) { max = tmp; l = ll+1;}
180     }
181     l       += k - 1;
182     ipvt[k-1] = l;
183 
184     if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
185 
186 /*           interchange if necessary */
187 
188     if (l != k) {
189       stmp      = a[l + k3];
190       a[l + k3] = a[k4];
191       a[k4]     = stmp;
192     }
193 
194 /*           compute multipliers */
195 
196     stmp = -1. / a[k4];
197     i__2 = 9 - k;
198     aa = &a[1 + k4];
199     for (ll=0; ll<i__2; ll++) {
200       aa[ll] *= stmp;
201     }
202 
203 /*           row elimination with column indexing */
204 
205     ax = &a[k4+1];
206     for (j = kp1; j <= 9; ++j) {
207       j3   = 9*j;
208       stmp = a[l + j3];
209       if (l != k) {
210         a[l + j3] = a[k + j3];
211         a[k + j3] = stmp;
212       }
213 
214       i__3 = 9 - k;
215       ay = &a[1+k+j3];
216       for (ll=0; ll<i__3; ll++) {
217         ay[ll] += stmp*ax[ll];
218       }
219     }
220   }
221   ipvt[8] = 9;
222   if (a[90] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
223 
224   /*
225         Now form the inverse
226   */
227 
228   /*     compute inverse(u) */
229 
230   for (k = 1; k <= 9; ++k) {
231     k3    = 9*k;
232     k4    = k3 + k;
233     a[k4] = 1.0 / a[k4];
234     stmp  = -a[k4];
235     i__2  = k - 1;
236     aa    = &a[k3 + 1];
237     for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
238     kp1 = k + 1;
239     if (9 < kp1) continue;
240     ax = aa;
241     for (j = kp1; j <= 9; ++j) {
242       j3        = 9*j;
243       stmp      = a[k + j3];
244       a[k + j3] = 0.0;
245       ay        = &a[j3 + 1];
246       for (ll=0; ll<k; ll++) {
247         ay[ll] += stmp*ax[ll];
248       }
249     }
250   }
251 
252   /*    form inverse(u)*inverse(l) */
253 
254   for (kb = 1; kb <= 8; ++kb) {
255     k   = 9 - kb;
256     k3  = 9*k;
257     kp1 = k + 1;
258     aa  = a + k3;
259     for (i = kp1; i <= 9; ++i) {
260       work[i-1] = aa[i];
261       aa[i]   = 0.0;
262     }
263     for (j = kp1; j <= 9; ++j) {
264       stmp  = work[j-1];
265       ax    = &a[9*j + 1];
266       ay    = &a[k3 + 1];
267       ay[0] += stmp*ax[0];
268       ay[1] += stmp*ax[1];
269       ay[2] += stmp*ax[2];
270       ay[3] += stmp*ax[3];
271       ay[4] += stmp*ax[4];
272       ay[5] += stmp*ax[5];
273       ay[6] += stmp*ax[6];
274       ay[7] += stmp*ax[7];
275       ay[8] += stmp*ax[8];
276     }
277     l = ipvt[k-1];
278     if (l != k) {
279       ax = &a[k3 + 1];
280       ay = &a[9*l + 1];
281       stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
282       stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
283       stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
284       stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
285       stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
286       stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
287       stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
288       stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
289       stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
290     }
291   }
292   PetscFunctionReturn(0);
293 }
294 
295 /*
296       Inverts 15 by 15 matrix using partial pivoting.
297 
298        Used by the sparse factorization routines in
299      src/mat/impls/baij/seq
300 
301        This is a combination of the Linpack routines
302     dgefa() and dgedi() specialized for a size of 15.
303 
304 */
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_15"
308 PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift)
309 {
310   PetscInt         i__2,i__3,kp1,j,k,l,ll,i,kb,k3;
311   PetscInt         k4,j3;
312   MatScalar        *aa,*ax,*ay,stmp;
313   MatReal          tmp,max;
314 
315 /*     gaussian elimination with partial pivoting */
316 
317   PetscFunctionBegin;
318   /* Parameter adjustments */
319   a       -= 16;
320 
321   for (k = 1; k <= 14; ++k) {
322     kp1 = k + 1;
323     k3  = 15*k;
324     k4  = k3 + k;
325 /*        find l = pivot index */
326 
327     i__2 = 16 - k;
328     aa = &a[k4];
329     max = PetscAbsScalar(aa[0]);
330     l = 1;
331     for (ll=1; ll<i__2; ll++) {
332       tmp = PetscAbsScalar(aa[ll]);
333       if (tmp > max) { max = tmp; l = ll+1;}
334     }
335     l       += k - 1;
336     ipvt[k-1] = l;
337 
338     if (a[l + k3] == 0.0)  SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
339 
340 /*           interchange if necessary */
341 
342     if (l != k) {
343       stmp      = a[l + k3];
344       a[l + k3] = a[k4];
345       a[k4]     = stmp;
346     }
347 
348 /*           compute multipliers */
349 
350     stmp = -1. / a[k4];
351     i__2 = 15 - k;
352     aa = &a[1 + k4];
353     for (ll=0; ll<i__2; ll++) {
354       aa[ll] *= stmp;
355     }
356 
357 /*           row elimination with column indexing */
358 
359     ax = &a[k4+1];
360     for (j = kp1; j <= 15; ++j) {
361       j3   = 15*j;
362       stmp = a[l + j3];
363       if (l != k) {
364         a[l + j3] = a[k + j3];
365         a[k + j3] = stmp;
366       }
367 
368       i__3 = 15 - k;
369       ay = &a[1+k+j3];
370       for (ll=0; ll<i__3; ll++) {
371         ay[ll] += stmp*ax[ll];
372       }
373     }
374   }
375   ipvt[14] = 15;
376   if (a[240] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
377 
378   /*
379         Now form the inverse
380   */
381 
382   /*     compute inverse(u) */
383 
384   for (k = 1; k <= 15; ++k) {
385     k3    = 15*k;
386     k4    = k3 + k;
387     a[k4] = 1.0 / a[k4];
388     stmp  = -a[k4];
389     i__2  = k - 1;
390     aa    = &a[k3 + 1];
391     for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
392     kp1 = k + 1;
393     if (15 < kp1) continue;
394     ax = aa;
395     for (j = kp1; j <= 15; ++j) {
396       j3        = 15*j;
397       stmp      = a[k + j3];
398       a[k + j3] = 0.0;
399       ay        = &a[j3 + 1];
400       for (ll=0; ll<k; ll++) {
401         ay[ll] += stmp*ax[ll];
402       }
403     }
404   }
405 
406   /*    form inverse(u)*inverse(l) */
407 
408   for (kb = 1; kb <= 14; ++kb) {
409     k   = 15 - kb;
410     k3  = 15*k;
411     kp1 = k + 1;
412     aa  = a + k3;
413     for (i = kp1; i <= 15; ++i) {
414         work[i-1] = aa[i];
415         aa[i]   = 0.0;
416     }
417     for (j = kp1; j <= 15; ++j) {
418       stmp  = work[j-1];
419       ax    = &a[15*j + 1];
420       ay    = &a[k3 + 1];
421       ay[0]  += stmp*ax[0];
422       ay[1]  += stmp*ax[1];
423       ay[2]  += stmp*ax[2];
424       ay[3]  += stmp*ax[3];
425       ay[4]  += stmp*ax[4];
426       ay[5]  += stmp*ax[5];
427       ay[6]  += stmp*ax[6];
428       ay[7]  += stmp*ax[7];
429       ay[8]  += stmp*ax[8];
430       ay[9]  += stmp*ax[9];
431       ay[10] += stmp*ax[10];
432       ay[11] += stmp*ax[11];
433       ay[12] += stmp*ax[12];
434       ay[13] += stmp*ax[13];
435       ay[14] += stmp*ax[14];
436     }
437     l = ipvt[k-1];
438     if (l != k) {
439       ax = &a[k3 + 1];
440       ay = &a[15*l + 1];
441       stmp = ax[0];  ax[0]  = ay[0];  ay[0]  = stmp;
442       stmp = ax[1];  ax[1]  = ay[1];  ay[1]  = stmp;
443       stmp = ax[2];  ax[2]  = ay[2];  ay[2]  = stmp;
444       stmp = ax[3];  ax[3]  = ay[3];  ay[3]  = stmp;
445       stmp = ax[4];  ax[4]  = ay[4];  ay[4]  = stmp;
446       stmp = ax[5];  ax[5]  = ay[5];  ay[5]  = stmp;
447       stmp = ax[6];  ax[6]  = ay[6];  ay[6]  = stmp;
448       stmp = ax[7];  ax[7]  = ay[7];  ay[7]  = stmp;
449       stmp = ax[8];  ax[8]  = ay[8];  ay[8]  = stmp;
450       stmp = ax[9];  ax[9]  = ay[9];  ay[9]  = stmp;
451       stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp;
452       stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp;
453       stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp;
454       stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp;
455       stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp;
456     }
457   }
458   PetscFunctionReturn(0);
459 }
460