xref: /petsc/src/mat/impls/baij/seq/dgefa2.c (revision f3fe499b4cc4d64bf04aa4f5e4963dcc4eb56541)
1 #define PETSCMAT_DLL
2 
3 /*
4      Inverts 2 by 2 matrix using partial pivoting.
5 
6        Used by the sparse factorization routines in
7      src/mat/impls/baij/seq
8 
9 
10        This is a combination of the Linpack routines
11     dgefa() and dgedi() specialized for a size of 2.
12 
13 */
14 #include "petscsys.h"
15 
16 #undef __FUNCT__
17 #define __FUNCT__ "Kernel_A_gets_inverse_A_2"
18 PetscErrorCode Kernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift)
19 {
20     PetscInt   i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3;
21     PetscInt   k4,j3;
22     MatScalar  *aa,*ax,*ay,work[4],stmp;
23     MatReal    tmp,max;
24 
25 /*     gaussian elimination with partial pivoting */
26 
27     PetscFunctionBegin;
28     shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3]));
29     /* Parameter adjustments */
30     a       -= 3;
31 
32     /*for (k = 1; k <= 1; ++k) {*/
33         k   = 1;
34 	kp1 = k + 1;
35         k3  = 2*k;
36         k4  = k3 + k;
37 /*        find l = pivot index */
38 
39 	i__2 = 3 - k;
40         aa = &a[k4];
41         max = PetscAbsScalar(aa[0]);
42         l = 1;
43         for (ll=1; ll<i__2; ll++) {
44           tmp = PetscAbsScalar(aa[ll]);
45           if (tmp > max) { max = tmp; l = ll+1;}
46         }
47         l       += k - 1;
48 	ipvt[k-1] = l;
49 
50 	if (a[l + k3] == 0.0) {
51 	  if (shift == 0.0) {
52 	    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
53 	  } else {
54 	    a[l + k3] = shift;
55 	  }
56 	}
57 
58 /*           interchange if necessary */
59 
60 	if (l != k) {
61 	  stmp      = a[l + k3];
62 	  a[l + k3] = a[k4];
63 	  a[k4]     = stmp;
64         }
65 
66 /*           compute multipliers */
67 
68 	stmp = -1. / a[k4];
69 	i__2 = 2 - k;
70         aa = &a[1 + k4];
71         for (ll=0; ll<i__2; ll++) {
72           aa[ll] *= stmp;
73         }
74 
75 /*           row elimination with column indexing */
76 
77 	ax = &a[k4+1];
78         for (j = kp1; j <= 2; ++j) {
79             j3   = 2*j;
80 	    stmp = a[l + j3];
81 	    if (l != k) {
82 	      a[l + j3] = a[k + j3];
83 	      a[k + j3] = stmp;
84             }
85 
86 	    i__3 = 2 - k;
87             ay = &a[1+k+j3];
88             for (ll=0; ll<i__3; ll++) {
89               ay[ll] += stmp*ax[ll];
90             }
91 	}
92     /*}*/
93     ipvt[1] = 2;
94     if (a[6] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1);
95 
96     /*
97          Now form the inverse
98     */
99 
100    /*     compute inverse(u) */
101 
102     for (k = 1; k <= 2; ++k) {
103         k3    = 2*k;
104         k4    = k3 + k;
105 	a[k4] = 1.0 / a[k4];
106 	stmp  = -a[k4];
107 	i__2  = k - 1;
108         aa    = &a[k3 + 1];
109         for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
110 	kp1 = k + 1;
111 	if (2 < kp1) continue;
112         ax = aa;
113         for (j = kp1; j <= 2; ++j) {
114             j3        = 2*j;
115 	    stmp      = a[k + j3];
116 	    a[k + j3] = 0.0;
117             ay        = &a[j3 + 1];
118             for (ll=0; ll<k; ll++) {
119               ay[ll] += stmp*ax[ll];
120             }
121 	}
122     }
123 
124    /*    form inverse(u)*inverse(l) */
125 
126     /*for (kb = 1; kb <= 1; ++kb) {*/
127 
128 	k   = 1;
129         k3  = 2*k;
130 	kp1 = k + 1;
131         aa  = a + k3;
132 	for (i = kp1; i <= 2; ++i) {
133             work[i-1] = aa[i];
134 	    aa[i]   = 0.0;
135 	}
136 	for (j = kp1; j <= 2; ++j) {
137 	    stmp  = work[j-1];
138             ax    = &a[2*j + 1];
139             ay    = &a[k3 + 1];
140             ay[0] += stmp*ax[0];
141             ay[1] += stmp*ax[1];
142 	}
143 	l = ipvt[k-1];
144 	if (l != k) {
145             ax = &a[k3 + 1];
146             ay = &a[2*l + 1];
147             stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
148             stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
149 	}
150 
151     PetscFunctionReturn(0);
152 }
153 
154 #undef __FUNCT__
155 #define __FUNCT__ "Kernel_A_gets_inverse_A_9"
156 PetscErrorCode Kernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift)
157 {
158     PetscInt   i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3;
159     PetscInt   k4,j3;
160     MatScalar  *aa,*ax,*ay,work[81],stmp;
161     MatReal    tmp,max;
162 
163 /*     gaussian elimination with partial pivoting */
164 
165     PetscFunctionBegin;
166     /* Parameter adjustments */
167     a       -= 10;
168 
169     for (k = 1; k <= 8; ++k) {
170 	kp1 = k + 1;
171         k3  = 9*k;
172         k4  = k3 + k;
173 /*        find l = pivot index */
174 
175 	i__2 = 10 - k;
176         aa = &a[k4];
177         max = PetscAbsScalar(aa[0]);
178         l = 1;
179         for (ll=1; ll<i__2; ll++) {
180           tmp = PetscAbsScalar(aa[ll]);
181           if (tmp > max) { max = tmp; l = ll+1;}
182         }
183         l       += k - 1;
184 	ipvt[k-1] = l;
185 
186 	if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
187 
188 /*           interchange if necessary */
189 
190 	if (l != k) {
191 	  stmp      = a[l + k3];
192 	  a[l + k3] = a[k4];
193 	  a[k4]     = stmp;
194         }
195 
196 /*           compute multipliers */
197 
198 	stmp = -1. / a[k4];
199 	i__2 = 9 - k;
200         aa = &a[1 + k4];
201         for (ll=0; ll<i__2; ll++) {
202           aa[ll] *= stmp;
203         }
204 
205 /*           row elimination with column indexing */
206 
207 	ax = &a[k4+1];
208         for (j = kp1; j <= 9; ++j) {
209             j3   = 9*j;
210 	    stmp = a[l + j3];
211 	    if (l != k) {
212 	      a[l + j3] = a[k + j3];
213 	      a[k + j3] = stmp;
214             }
215 
216 	    i__3 = 9 - k;
217             ay = &a[1+k+j3];
218             for (ll=0; ll<i__3; ll++) {
219               ay[ll] += stmp*ax[ll];
220             }
221 	}
222     }
223     ipvt[8] = 9;
224     if (a[90] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
225 
226     /*
227          Now form the inverse
228     */
229 
230    /*     compute inverse(u) */
231 
232     for (k = 1; k <= 9; ++k) {
233         k3    = 9*k;
234         k4    = k3 + k;
235 	a[k4] = 1.0 / a[k4];
236 	stmp  = -a[k4];
237 	i__2  = k - 1;
238         aa    = &a[k3 + 1];
239         for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
240 	kp1 = k + 1;
241 	if (9 < kp1) continue;
242         ax = aa;
243         for (j = kp1; j <= 9; ++j) {
244             j3        = 9*j;
245 	    stmp      = a[k + j3];
246 	    a[k + j3] = 0.0;
247             ay        = &a[j3 + 1];
248             for (ll=0; ll<k; ll++) {
249               ay[ll] += stmp*ax[ll];
250             }
251 	}
252     }
253 
254    /*    form inverse(u)*inverse(l) */
255 
256     for (kb = 1; kb <= 8; ++kb) {
257 	k   = 9 - kb;
258         k3  = 9*k;
259 	kp1 = k + 1;
260         aa  = a + k3;
261 	for (i = kp1; i <= 9; ++i) {
262             work[i-1] = aa[i];
263 	    aa[i]   = 0.0;
264 	}
265 	for (j = kp1; j <= 9; ++j) {
266 	    stmp  = work[j-1];
267             ax    = &a[9*j + 1];
268             ay    = &a[k3 + 1];
269             ay[0] += stmp*ax[0];
270             ay[1] += stmp*ax[1];
271             ay[2] += stmp*ax[2];
272             ay[3] += stmp*ax[3];
273             ay[4] += stmp*ax[4];
274             ay[5] += stmp*ax[5];
275             ay[6] += stmp*ax[6];
276             ay[7] += stmp*ax[7];
277             ay[8] += stmp*ax[8];
278 	}
279 	l = ipvt[k-1];
280 	if (l != k) {
281             ax = &a[k3 + 1];
282             ay = &a[9*l + 1];
283             stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
284             stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
285             stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
286             stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
287             stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
288             stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
289             stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
290             stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
291             stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
292 	}
293     }
294     PetscFunctionReturn(0);
295 }
296 
297 /*
298       Inverts 15 by 15 matrix using partial pivoting.
299 
300        Used by the sparse factorization routines in
301      src/mat/impls/baij/seq
302 
303        This is a combination of the Linpack routines
304     dgefa() and dgedi() specialized for a size of 15.
305 
306 */
307 
308 #undef __FUNCT__
309 #define __FUNCT__ "Kernel_A_gets_inverse_A_15"
310 PetscErrorCode Kernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift)
311 {
312     PetscInt         i__2,i__3,kp1,j,k,l,ll,i,kb,k3;
313     PetscInt         k4,j3;
314     MatScalar        *aa,*ax,*ay,stmp;
315     MatReal          tmp,max;
316 
317 /*     gaussian elimination with partial pivoting */
318 
319     PetscFunctionBegin;
320     /* Parameter adjustments */
321     a       -= 16;
322 
323     for (k = 1; k <= 14; ++k) {
324 	kp1 = k + 1;
325         k3  = 15*k;
326         k4  = k3 + k;
327 /*        find l = pivot index */
328 
329 	i__2 = 16 - k;
330         aa = &a[k4];
331         max = PetscAbsScalar(aa[0]);
332         l = 1;
333         for (ll=1; ll<i__2; ll++) {
334           tmp = PetscAbsScalar(aa[ll]);
335           if (tmp > max) { max = tmp; l = ll+1;}
336         }
337         l       += k - 1;
338 	ipvt[k-1] = l;
339 
340 	if (a[l + k3] == 0.0)  SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
341 
342 /*           interchange if necessary */
343 
344 	if (l != k) {
345 	  stmp      = a[l + k3];
346 	  a[l + k3] = a[k4];
347 	  a[k4]     = stmp;
348         }
349 
350 /*           compute multipliers */
351 
352 	stmp = -1. / a[k4];
353 	i__2 = 15 - k;
354         aa = &a[1 + k4];
355         for (ll=0; ll<i__2; ll++) {
356           aa[ll] *= stmp;
357         }
358 
359 /*           row elimination with column indexing */
360 
361 	ax = &a[k4+1];
362         for (j = kp1; j <= 15; ++j) {
363             j3   = 15*j;
364 	    stmp = a[l + j3];
365 	    if (l != k) {
366 	      a[l + j3] = a[k + j3];
367 	      a[k + j3] = stmp;
368             }
369 
370 	    i__3 = 15 - k;
371             ay = &a[1+k+j3];
372             for (ll=0; ll<i__3; ll++) {
373               ay[ll] += stmp*ax[ll];
374             }
375 	}
376     }
377     ipvt[14] = 15;
378     if (a[240] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
379 
380     /*
381          Now form the inverse
382     */
383 
384    /*     compute inverse(u) */
385 
386     for (k = 1; k <= 15; ++k) {
387         k3    = 15*k;
388         k4    = k3 + k;
389 	a[k4] = 1.0 / a[k4];
390 	stmp  = -a[k4];
391 	i__2  = k - 1;
392         aa    = &a[k3 + 1];
393         for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
394 	kp1 = k + 1;
395 	if (15 < kp1) continue;
396         ax = aa;
397         for (j = kp1; j <= 15; ++j) {
398             j3        = 15*j;
399 	    stmp      = a[k + j3];
400 	    a[k + j3] = 0.0;
401             ay        = &a[j3 + 1];
402             for (ll=0; ll<k; ll++) {
403               ay[ll] += stmp*ax[ll];
404             }
405 	}
406     }
407 
408    /*    form inverse(u)*inverse(l) */
409 
410     for (kb = 1; kb <= 14; ++kb) {
411 	k   = 15 - kb;
412         k3  = 15*k;
413 	kp1 = k + 1;
414         aa  = a + k3;
415 	for (i = kp1; i <= 15; ++i) {
416             work[i-1] = aa[i];
417 	    aa[i]   = 0.0;
418 	}
419 	for (j = kp1; j <= 15; ++j) {
420 	    stmp  = work[j-1];
421             ax    = &a[15*j + 1];
422             ay    = &a[k3 + 1];
423             ay[0]  += stmp*ax[0];
424             ay[1]  += stmp*ax[1];
425             ay[2]  += stmp*ax[2];
426             ay[3]  += stmp*ax[3];
427             ay[4]  += stmp*ax[4];
428             ay[5]  += stmp*ax[5];
429             ay[6]  += stmp*ax[6];
430 	    ay[7]  += stmp*ax[7];
431             ay[8]  += stmp*ax[8];
432             ay[9]  += stmp*ax[9];
433             ay[10] += stmp*ax[10];
434             ay[11] += stmp*ax[11];
435             ay[12] += stmp*ax[12];
436             ay[13] += stmp*ax[13];
437 	    ay[14] += stmp*ax[14];
438 	}
439 	l = ipvt[k-1];
440 	if (l != k) {
441             ax = &a[k3 + 1];
442             ay = &a[15*l + 1];
443             stmp = ax[0];  ax[0]  = ay[0];  ay[0]  = stmp;
444             stmp = ax[1];  ax[1]  = ay[1];  ay[1]  = stmp;
445             stmp = ax[2];  ax[2]  = ay[2];  ay[2]  = stmp;
446             stmp = ax[3];  ax[3]  = ay[3];  ay[3]  = stmp;
447             stmp = ax[4];  ax[4]  = ay[4];  ay[4]  = stmp;
448             stmp = ax[5];  ax[5]  = ay[5];  ay[5]  = stmp;
449             stmp = ax[6];  ax[6]  = ay[6];  ay[6]  = stmp;
450 	    stmp = ax[7];  ax[7]  = ay[7];  ay[7]  = stmp;
451             stmp = ax[8];  ax[8]  = ay[8];  ay[8]  = stmp;
452             stmp = ax[9];  ax[9]  = ay[9];  ay[9]  = stmp;
453             stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp;
454             stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp;
455             stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp;
456             stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp;
457 	    stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp;
458 	}
459     }
460     PetscFunctionReturn(0);
461 }
462