xref: /phasta/phSolver/incompressible/ftools.f (revision 1e99f302ca5103688ae35115c2fefb7cfa6714f1)
1c---------------------------------------------------------------------
2c
3c ftools.f : Bundle of Fortran routines
4c
5c Each routine is to be called by drvftools.f
6c
7c Various operations run based on les**.c
8c
9c---------------------------------------------------------------------
10c
11c--------------
12c flesPrepDiag
13c--------------
14c
15 	subroutine flesPrepDiag ( ien, xKebe, xGoc, flowDiag )
16c
17        include "common.h"
18c
19        dimension xKebe(npro,3*nshl,3*nshl),
20     &            xGoC(npro,4*nshl,nshl)
21        dimension Diagl(npro,nshl,nflow),   flowDiag(nshg, 4)
22        integer   ien(npro,nshl)
23c
24c
25c.... monentum contribution to diagonal
26c
27        do i = 1, nshl
28          i0 = (nsd) * (i - 1)
29          Diagl(:,i,1) = xKebe(1:npro,i0+1,i0+1)
30          Diagl(:,i,2) = xKebe(1:npro,i0+2,i0+2)
31          Diagl(:,i,3) = xKebe(1:npro,i0+3,i0+3)
32        enddo
33c
34c.... continuity contribution to diagonal
35c
36        nn2 = nshl * nsd
37        do i = 1, nshl
38          Diagl(:,i,4) = xGoC(1:npro,nn2+i,i)
39        enddo
40
41        call local (flowDiag,  Diagl, ien, nflow, 'scatter ')
42c
43        return
44        end
45c
46c--------------------------------
47c fMtxVdimVecMult
48c Farzin's implementation
49c row and column index exchanged
50c--------------------------------
51c
52        subroutine fMtxVdimVecMult( a, b, c, na, nb, nc, m, n )
53c
54c.... Data declaration
55c
56        implicit none
57        integer na,     nb,     nc,     m,      n
58        real*8  a(n,na),        b(n,nb),        c(n,nc)
59c
60        integer i,      j
61c
62c.... Do the work
63c
64C WIP: change to F90
65C
66        if ( m .eq. 1 ) then
67c
68            do i = 1, n
69                c(i,1) = a(i,1) * b(i,1)
70            enddo
71c
72        else if ( m .eq. 2 ) then
73c
74            do i = 1, n
75                c(i,1) = a(i,1) * b(i,1)
76                c(i,2) = a(i,2) * b(i,2)
77            enddo
78c
79        else if ( m .eq. 3 ) then
80c
81            do i = 1, n
82                c(i,1) = a(i,1) * b(i,1)
83                c(i,2) = a(i,2) * b(i,2)
84                c(i,3) = a(i,3) * b(i,3)
85            enddo
86c
87        else if ( m .eq. 4 ) then
88c
89            do i = 1, n
90                c(i,1) = a(i,1) * b(i,1)
91                c(i,2) = a(i,2) * b(i,2)
92                c(i,3) = a(i,3) * b(i,3)
93                c(i,4) = a(i,4) * b(i,4)
94            enddo
95c
96        else
97c
98            do i = 1, n
99                do j = 1, m
100                    c(i,j) = a(i,j) * b(i,j)
101                enddo
102            enddo
103c
104        endif
105c
106      return
107      end
108c
109c----------
110c flesZero
111c----------
112c
113	subroutine flesZero ( a, m, n )
114c
115        implicit none
116        integer  m, n, i, j
117        real*8   a(n,m)
118c
119        do i = 1, n
120          do j = 1, m
121            a(i,j) = 0.0e-0
122          enddo
123        enddo
124c
125        return
126        end
127c
128c--------
129c flesCp
130c--------
131c
132	subroutine flesCp ( a, b, m, n )
133c
134        implicit none
135        integer  m, n, i, j
136        real*8   a(n,m), b(n,m)
137c
138        do i = 1, n
139          do j = 1, m
140            b(i,j) = a(i,j)
141          enddo
142        enddo
143c
144        return
145        end
146c
147c-----------
148c flesScale
149c-----------
150c
151 	subroutine flesScale ( a, s, m, n )
152c
153        implicit none
154        integer  m, n, i, j
155        real*8   a(n,m), s
156c
157        do i = 1, n
158          do j = 1, m
159            a(i,j) = a(i,j) * s
160          enddo
161        enddo
162c
163        return
164        end
165c
166c-------------
167c flesScaleCp
168c-------------
169c
170	subroutine flesScaleCp ( a, b, s, m, n )
171c
172        implicit none
173        integer  m, n, i, j
174        real*8   a(n,m), b(n,m), s
175c
176        do i = 1, n
177          do j = 1, m
178            b(i,j) = a(i,j) * s
179          enddo
180        enddo
181c
182        return
183        end
184c
185c---------
186c flesAdd
187c---------
188c
189	subroutine flesAdd ( a, b, m, n )
190c
191        implicit none
192        integer  m, n, i, j
193        real*8   a(n,m), b(n,m)
194c
195        do i = 1, n
196          do j = 1, m
197            b(i,j) = b(i,j) + a(i,j)
198          enddo
199        enddo
200c
201        return
202        end
203c
204c---------
205c flesSub
206c---------
207c
208	subroutine flesSub ( a, b, m, n )
209c
210        implicit none
211        integer  m, n, i, j
212        real*8   a(n,m), b(n,m)
213c
214        do i = 1, n
215          do j = 1, m
216            b(i,j) = b(i,j) - a(i,j)
217          enddo
218        enddo
219c
220        return
221        end
222c
223c----------
224c flesDot1
225c----------
226c
227 	real*8 function flesDot1 ( a, m, n )
228c
229        implicit none
230        integer  m, n, i, j
231        real*8   a(n,m)
232c
233	flesDot1 = 0
234        do i = 1, n
235          do j = 1, m
236            flesDot1 = flesDot1 + a(i,j) * a(i,j)
237          enddo
238        enddo
239c
240        return
241        end
242c
243c----------
244c flesDot2
245c----------
246c
247	real*8 function flesDot2 ( a, b, m, n )
248c
249        implicit none
250        integer  m, n, i, j
251        real*8   a(n,m), b(n,m)
252c
253	flesDot2 = 0
254        do i = 1, n
255          do j = 1, m
256            flesDot2 = flesDot2 + a(i,j) * b(i,j)
257          enddo
258        enddo
259c
260        return
261        end
262c
263c-----------
264c flesDaxpy
265c-----------
266c
267	subroutine flesDaxpy ( x, y, a, m, n )
268c
269        implicit none
270        integer  m, n, i, j
271        real*8   x(n,m), y(n,m)
272        real*8   a
273c
274        do i = 1, n
275          do j= 1, m
276            y(i,j) = y(i,j) + a * x(i,j)
277          enddo
278        enddo
279c
280        return
281        end
282c
283c-----------
284c flesDxpay
285c-----------
286c
287	subroutine flesDxpay ( x, y, a, m, n )
288c
289        implicit none
290        integer  m, n, i, j
291        real*8   x(n,m), y(n,m)
292        real*8   a
293c
294        do i = 1, n
295          do j = 1, m
296            y(i,j) = a * y(i,j) + x(i,j)
297          enddo
298        enddo
299c
300        return
301        end
302c
303c---------
304c flesInv
305c---------
306c
307	subroutine flesInv ( x, m, n )
308c
309        implicit none
310        integer  m, n, i, j
311        real*8   x(n,m)
312c
313        do i = 1, n
314          do j = 1, m
315            if ( x(i,j) .ne. 0 ) x(i,j) = 1. / x(i,j)
316          enddo
317        enddo
318c
319        return
320        end
321c
322c--------------------------
323c fMtxBlkDot2
324c Farzin's implementation
325c row and column exchanged
326c--------------------------
327c
328        subroutine fMtxBlkDot2( x, y, c, m, n )
329c
330c.... Data declaration
331c
332        implicit none
333        integer m,      n
334        real*8  x(n,m), y(n),   c(m)
335c
336        real*8  tmp1,   tmp2,   tmp3,   tmp4
337        real*8  tmp5,   tmp6,   tmp7,   tmp8
338        integer i,      j,      m1
339c
340c.... Determine the left overs
341c
342        m1 = mod(m,8) + 1
343
344c
345c.... Do the small pieces
346c
347        goto ( 8000, 1000, 2000, 3000, 4000, 5000, 6000, 7000 ) m1
348c
3491000    continue
350        tmp1 = 0
351        do i = 1, n
352            tmp1 = tmp1 + x(i,1) * y(i)
353        enddo
354        c(1) = tmp1
355        goto 8000
356c
3572000    continue
358        tmp1 = 0
359        tmp2 = 0
360        do i = 1, n
361            tmp1 = tmp1 + x(i,1) * y(i)
362            tmp2 = tmp2 + x(i,2) * y(i)
363        enddo
364        c(1) = tmp1
365        c(2) = tmp2
366        goto 8000
367c
3683000    continue
369        tmp1 = 0
370        tmp2 = 0
371        tmp3 = 0
372        do i = 1, n
373            tmp1 = tmp1 + x(i,1) * y(i)
374            tmp2 = tmp2 + x(i,2) * y(i)
375            tmp3 = tmp3 + x(i,3) * y(i)
376        enddo
377        c(1) = tmp1
378        c(2) = tmp2
379        c(3) = tmp3
380        goto 8000
381c
3824000    continue
383        tmp1 = 0
384        tmp2 = 0
385        tmp3 = 0
386        tmp4 = 0
387        do i = 1, n
388            tmp1 = tmp1 + x(i,1) * y(i)
389            tmp2 = tmp2 + x(i,2) * y(i)
390            tmp3 = tmp3 + x(i,3) * y(i)
391            tmp4 = tmp4 + x(i,4) * y(i)
392        enddo
393        c(1) = tmp1
394        c(2) = tmp2
395        c(3) = tmp3
396        c(4) = tmp4
397        goto 8000
398c
3995000    continue
400        tmp1 = 0
401        tmp2 = 0
402        tmp3 = 0
403        tmp4 = 0
404        tmp5 = 0
405        do i = 1, n
406            tmp1 = tmp1 + x(i,1) * y(i)
407            tmp2 = tmp2 + x(i,2) * y(i)
408            tmp3 = tmp3 + x(i,3) * y(i)
409            tmp4 = tmp4 + x(i,4) * y(i)
410            tmp5 = tmp5 + x(i,5) * y(i)
411        enddo
412        c(1) = tmp1
413        c(2) = tmp2
414        c(3) = tmp3
415        c(4) = tmp4
416        c(5) = tmp5
417        goto 8000
418c
4196000    continue
420        tmp1 = 0
421        tmp2 = 0
422        tmp3 = 0
423        tmp4 = 0
424        tmp5 = 0
425        tmp6 = 0
426        do i = 1, n
427            tmp1 = tmp1 + x(i,1) * y(i)
428            tmp2 = tmp2 + x(i,2) * y(i)
429            tmp3 = tmp3 + x(i,3) * y(i)
430            tmp4 = tmp4 + x(i,4) * y(i)
431            tmp5 = tmp5 + x(i,5) * y(i)
432            tmp6 = tmp6 + x(i,6) * y(i)
433        enddo
434        c(1) = tmp1
435        c(2) = tmp2
436        c(3) = tmp3
437        c(4) = tmp4
438        c(5) = tmp5
439        c(6) = tmp6
440        goto 8000
441c
4427000    continue
443        tmp1 = 0
444        tmp2 = 0
445        tmp3 = 0
446        tmp4 = 0
447        tmp5 = 0
448        tmp6 = 0
449        tmp7 = 0
450        do i = 1, n
451            tmp1 = tmp1 + x(i,1) * y(i)
452            tmp2 = tmp2 + x(i,2) * y(i)
453            tmp3 = tmp3 + x(i,3) * y(i)
454            tmp4 = tmp4 + x(i,4) * y(i)
455            tmp5 = tmp5 + x(i,5) * y(i)
456            tmp6 = tmp6 + x(i,6) * y(i)
457            tmp7 = tmp7 + x(i,7) * y(i)
458        enddo
459        c(1) = tmp1
460        c(2) = tmp2
461        c(3) = tmp3
462        c(4) = tmp4
463        c(5) = tmp5
464        c(6) = tmp6
465        c(7) = tmp7
466        goto 8000
467c
468c.... Do the remaining part
469c
4708000    continue
471c
472        do j = m1, m, 8
473            tmp1 = 0
474            tmp2 = 0
475            tmp3 = 0
476            tmp4 = 0
477            tmp5 = 0
478            tmp6 = 0
479            tmp7 = 0
480            tmp8 = 0
481            do i = 1, n
482                tmp1 = tmp1 + x(i,j+0) * y(i)
483                tmp2 = tmp2 + x(i,j+1) * y(i)
484                tmp3 = tmp3 + x(i,j+2) * y(i)
485                tmp4 = tmp4 + x(i,j+3) * y(i)
486                tmp5 = tmp5 + x(i,j+4) * y(i)
487                tmp6 = tmp6 + x(i,j+5) * y(i)
488                tmp7 = tmp7 + x(i,j+6) * y(i)
489                tmp8 = tmp8 + x(i,j+7) * y(i)
490            enddo
491            c(j+0) = tmp1
492            c(j+1) = tmp2
493            c(j+2) = tmp3
494            c(j+3) = tmp4
495            c(j+4) = tmp5
496            c(j+5) = tmp6
497            c(j+6) = tmp7
498            c(j+7) = tmp8
499        enddo
500c
501        return
502        end
503c
504c--------------------------
505c fMtxBlkDaxpy
506c Farzin's implementation
507c row and column exchanged
508c--------------------------
509c
510        subroutine fMtxBlkDaxpy( x, y, c, m, n )
511c
512c.... Data declaration
513c
514        implicit none
515        integer m,      n
516        real*8  x(n,m), y(n),   c(m)
517c
518        real*8  tmp1,   tmp2,   tmp3,   tmp4
519        real*8  tmp5,   tmp6,   tmp7,   tmp8
520        integer i,      j,      m1
521c
522c.... Determine the left overs
523c
524        m1 = mod(m,8) + 1
525c
526c.... Do the small pieces
527c
528        goto ( 8000, 1000, 2000, 3000, 4000, 5000, 6000, 7000 ) m1
529c
5301000    continue
531        tmp1 = c(1)
532        do i = 1, n
533            y(i) = y(i)
534     1           + tmp1 * x(i,1)
535        enddo
536        goto 8000
537c
5382000    continue
539        tmp1 = c(1)
540        tmp2 = c(2)
541        do i = 1, n
542            y(i) = y(i)
543     1           + tmp1 * x(i,1) + tmp2 * x(i,2)
544        enddo
545        goto 8000
546c
5473000    continue
548        tmp1 = c(1)
549        tmp2 = c(2)
550        tmp3 = c(3)
551
552        do i = 1, n
553            y(i) = y(i)
554     1           + tmp1 * x(i,1) + tmp2 * x(i,2)
555     2           + tmp3 * x(i,3)
556        enddo
557        goto 8000
558c
5594000    continue
560        tmp1 = c(1)
561        tmp2 = c(2)
562        tmp3 = c(3)
563        tmp4 = c(4)
564        do i = 1, n
565            y(i) = y(i)
566     1           + tmp1 * x(i,1) + tmp2 * x(i,2)
567     2           + tmp3 * x(i,3) + tmp4 * x(i,4)
568        enddo
569        goto 8000
570c
5715000    continue
572        tmp1 = c(1)
573        tmp2 = c(2)
574        tmp3 = c(3)
575        tmp4 = c(4)
576        tmp5 = c(5)
577        do i = 1, n
578            y(i) = y(i)
579     1           + tmp1 * x(i,1) + tmp2 * x(i,2)
580     2           + tmp3 * x(i,3) + tmp4 * x(i,4)
581     3           + tmp5 * x(i,5)
582        enddo
583        goto 8000
584c
5856000    continue
586        tmp1 = c(1)
587        tmp2 = c(2)
588        tmp3 = c(3)
589        tmp4 = c(4)
590        tmp5 = c(5)
591        tmp6 = c(6)
592        do i = 1, n
593            y(i) = y(i)
594     1           + tmp1 * x(i,1) + tmp2 * x(i,2)
595     2           + tmp3 * x(i,3) + tmp4 * x(i,4)
596     3           + tmp5 * x(i,5) + tmp6 * x(i,6)
597        enddo
598        goto 8000
599c
6007000    continue
601        tmp1 = c(1)
602        tmp2 = c(2)
603        tmp3 = c(3)
604        tmp4 = c(4)
605        tmp5 = c(5)
606        tmp6 = c(6)
607        tmp7 = c(7)
608        do i = 1, n
609            y(i) = y(i)
610     1           + tmp1 * x(i,1) + tmp2 * x(i,2)
611     2           + tmp3 * x(i,3) + tmp4 * x(i,4)
612     3           + tmp5 * x(i,5) + tmp6 * x(i,6)
613     4           + tmp7 * x(i,7)
614        enddo
615        goto 8000
616c
617c.... Do the remaining part
618c
6198000    continue
620c
621        do j = m1, m, 8
622            tmp1 = c(j+0)
623            tmp2 = c(j+1)
624            tmp3 = c(j+2)
625            tmp4 = c(j+3)
626            tmp5 = c(j+4)
627            tmp6 = c(j+5)
628            tmp7 = c(j+6)
629            tmp8 = c(j+7)
630            do i = 1, n
631                y(i) = y(i)
632     1               + tmp1 * x(i,j+0) + tmp2 * x(i,j+1)
633     2               + tmp3 * x(i,j+2) + tmp4 * x(i,j+3)
634     3               + tmp5 * x(i,j+4) + tmp6 * x(i,j+5)
635     4               + tmp7 * x(i,j+6) + tmp8 * x(i,j+7)
636            enddo
637        enddo
638c
639        return
640        end
641c
642c--------------------------
643c fMtxBlkDyeax
644c Farzin's implementation
645c row and column exchanged
646c--------------------------
647c
648        subroutine fMtxBlkDyeax( x, y, c, m, n )
649c
650c.... Data declaration
651c
652        implicit none
653        integer m,      n
654        real*8  x(n,m), y(n),   c(m)
655c
656        real*8  tmp1,   tmp2,   tmp3,   tmp4
657        real*8  tmp5,   tmp6,   tmp7,   tmp8
658        integer i,      j,      m1
659c
660c.... Determine the left overs
661c
662        m1 = mod(m,8) + 1
663c
664c.... Do the small pieces
665c
666        goto ( 8000, 1000, 2000, 3000, 4000, 5000, 6000, 7000 ) m1
667c
6681000    continue
669        tmp1 = c(1)
670        do i = 1, n
671            y(i) =
672     1           + tmp1 * x(i,1)
673        enddo
674        goto 8001
675c
6762000    continue
677        tmp1 = c(1)
678        tmp2 = c(2)
679        do i = 1, n
680            y(i) =
681     1           + tmp1 * x(i,1) + tmp2 * x(i,2)
682        enddo
683        goto 8001
684c
6853000    continue
686        tmp1 = c(1)
687        tmp2 = c(2)
688        tmp3 = c(3)
689        do i = 1, n
690            y(i) =
691     1           + tmp1 * x(i,1) + tmp2 * x(i,2)
692     2           + tmp3 * x(i,3)
693        enddo
694        goto 8001
695c
6964000    continue
697        tmp1 = c(1)
698        tmp2 = c(2)
699        tmp3 = c(3)
700        tmp4 = c(4)
701        do i = 1, n
702            y(i) =
703     1           + tmp1 * x(i,1) + tmp2 * x(i,2)
704     2           + tmp3 * x(i,3) + tmp4 * x(i,4)
705        enddo
706        goto 8001
707c
7085000    continue
709        tmp1 = c(1)
710        tmp2 = c(2)
711        tmp3 = c(3)
712        tmp4 = c(4)
713        tmp5 = c(5)
714        do i = 1, n
715            y(i) =
716     1           + tmp1 * x(i,1) + tmp2 * x(i,2)
717     2           + tmp3 * x(i,3) + tmp4 * x(i,4)
718     3           + tmp5 * x(i,5)
719        enddo
720        goto 8001
721c
7226000    continue
723        tmp1 = c(1)
724        tmp2 = c(2)
725        tmp3 = c(3)
726        tmp4 = c(4)
727        tmp5 = c(5)
728        tmp6 = c(6)
729        do i = 1, n
730            y(i) =
731     1           + tmp1 * x(i,1) + tmp2 * x(i,2)
732     2           + tmp3 * x(i,3) + tmp4 * x(i,4)
733     3           + tmp5 * x(i,5) + tmp6 * x(i,6)
734       enddo
735        goto 8001
736c
7377000    continue
738        tmp1 = c(1)
739        tmp2 = c(2)
740        tmp3 = c(3)
741        tmp4 = c(4)
742        tmp5 = c(5)
743        tmp6 = c(6)
744        tmp7 = c(7)
745        do i = 1, n
746            y(i) =
747     1           + tmp1 * x(i,1) + tmp2 * x(i,2)
748     2           + tmp3 * x(i,3) + tmp4 * x(i,4)
749     3           + tmp5 * x(i,5) + tmp6 * x(i,6)
750     4           + tmp7 * x(i,7)
751        enddo
752        goto 8001
753c
7548000    continue
755        do i = 1, n
756            y(i) = 0
757        enddo
758        goto 8001
759c
760c.... Do the remaining part
761c
7628001    continue
763c
764        do j = m1, m, 8
765            tmp1 = c(j+0)
766            tmp2 = c(j+1)
767            tmp3 = c(j+2)
768            tmp4 = c(j+3)
769            tmp5 = c(j+4)
770            tmp6 = c(j+5)
771            tmp7 = c(j+6)
772            tmp8 = c(j+7)
773            do i = 1, n
774                y(i) = y(i)
775     1               + tmp1 * x(i,j+0) + tmp2 * x(i,j+1)
776     2               + tmp3 * x(i,j+2) + tmp4 * x(i,j+3)
777     3               + tmp5 * x(i,j+4) + tmp6 * x(i,j+5)
778     4               + tmp7 * x(i,j+6) + tmp8 * x(i,j+7)
779            enddo
780        enddo
781c
782        return
783        end
784c
785c--------------------------
786c fMtxBlkDmaxpy
787c Farzin's implementation
788c row and column exchanged
789c--------------------------
790c
791       subroutine fMtxBlkDmaxpy( x, y, c, m, n )
792c
793c.... Data declaration
794c
795        implicit none
796        integer m,      n
797        real*8  x(n,m), y(n),   c(m)
798c
799        real*8  tmp1,   tmp2,   tmp3,   tmp4
800        real*8  tmp5,   tmp6,   tmp7,   tmp8
801        integer i,      j,      m1
802c
803c.... Determine the left overs
804c
805        m1 = mod(m,8) + 1
806c
807c.... Do the small pieces
808c
809        goto ( 8000, 1000, 2000, 3000, 4000, 5000, 6000, 7000 ) m1
810c
8111000    continue
812        tmp1 = c(1)
813        do i = 1, n
814            y(i) = y(i)
815     1           - tmp1 * x(i,1)
816        enddo
817        goto 8000
818c
8192000    continue
820        tmp1 = c(1)
821        tmp2 = c(2)
822        do i = 1, n
823            y(i) = y(i)
824     1           - tmp1 * x(i,1) - tmp2 * x(i,2)
825        enddo
826        goto 8000
827c
8283000    continue
829        tmp1 = c(1)
830        tmp2 = c(2)
831        tmp3 = c(3)
832        do i = 1, n
833            y(i) = y(i)
834     1           - tmp1 * x(i,1) - tmp2 * x(i,2)
835     2           - tmp3 * x(i,3)
836        enddo
837        goto 8000
838c
8394000    continue
840        tmp1 = c(1)
841        tmp2 = c(2)
842        tmp3 = c(3)
843        tmp4 = c(4)
844        do i = 1, n
845            y(i) = y(i)
846     1           - tmp1 * x(i,1) - tmp2 * x(i,2)
847     2           - tmp3 * x(i,3) - tmp4 * x(i,4)
848        enddo
849        goto 8000
850c
8515000    continue
852        tmp1 = c(1)
853        tmp2 = c(2)
854        tmp3 = c(3)
855        tmp4 = c(4)
856        tmp5 = c(5)
857        do i = 1, n
858            y(i) = y(i)
859     1           - tmp1 * x(i,1) - tmp2 * x(i,2)
860     2           - tmp3 * x(i,3) - tmp4 * x(i,4)
861     3           - tmp5 * x(i,5)
862        enddo
863        goto 8000
864c
8656000    continue
866        tmp1 = c(1)
867        tmp2 = c(2)
868        tmp3 = c(3)
869        tmp4 = c(4)
870        tmp5 = c(5)
871        tmp6 = c(6)
872        do i = 1, n
873            y(i) = y(i)
874     1           - tmp1 * x(i,1) - tmp2 * x(i,2)
875     2           - tmp3 * x(i,3) - tmp4 * x(i,4)
876     3           - tmp5 * x(i,5) - tmp6 * x(i,6)
877        enddo
878        goto 8000
879
8807000    continue
881        tmp1 = c(1)
882        tmp2 = c(2)
883        tmp3 = c(3)
884        tmp4 = c(4)
885        tmp5 = c(5)
886        tmp6 = c(6)
887        tmp7 = c(7)
888        do i = 1, n
889            y(i) = y(i)
890     1           - tmp1 * x(i,1) - tmp2 * x(i,2)
891     2           - tmp3 * x(i,3) - tmp4 * x(i,4)
892     3           - tmp5 * x(i,5) - tmp6 * x(i,6)
893     4           - tmp7 * x(i,7)
894        enddo
895        goto 8000
896c
897c.... Do the remaining part
898c
8998000    continue
900c
901        do j = m1, m, 8
902            tmp1 = c(j+0)
903            tmp2 = c(j+1)
904            tmp3 = c(j+2)
905            tmp4 = c(j+3)
906            tmp5 = c(j+4)
907            tmp6 = c(j+5)
908            tmp7 = c(j+6)
909            tmp8 = c(j+7)
910            do i = 1, n
911                y(i) = y(i)
912     1               - tmp1 * x(i,j+0) - tmp2 * x(i,j+1)
913     2               - tmp3 * x(i,j+2) - tmp4 * x(i,j+3)
914     3               - tmp5 * x(i,j+4) - tmp6 * x(i,j+5)
915     4               - tmp7 * x(i,j+6) - tmp8 * x(i,j+7)
916            enddo
917        enddo
918c
919        return
920        end
921c
922c--------------------------
923c fMtxVdimVecCp
924c Farzin's implementation
925c row and column exchanged
926c--------------------------
927c
928        subroutine fMtxVdimVecCp( a, b, na, nb, m, n )
929c
930c.... Data declaration
931c
932        implicit none
933        integer na,     nb,     m,      n
934        real*8  a(n,na),        b(n,nb)
935c
936        integer i,      j
937c
938c.... Do the work
939c
940        if ( m .eq. 1 ) then
941
942            do i = 1, n
943                b(i,1) = a(i,1)
944            enddo
945
946        else if ( m .eq. 2 ) then
947
948            do i = 1, n
949                b(i,1) = a(i,1)
950                b(i,2) = a(i,2)
951            enddo
952
953        else if ( m .eq. 3 ) then
954
955            do i = 1, n
956                b(i,1) = a(i,1)
957                b(i,2) = a(i,2)
958                b(i,3) = a(i,3)
959            enddo
960
961        else if ( m .eq. 4 ) then
962
963            do i = 1, n
964                b(i,1) = a(i,1)
965                b(i,2) = a(i,2)
966                b(i,3) = a(i,3)
967                b(i,4) = a(i,4)
968            enddo
969
970        else
971
972            do i = 1, n
973                do j = 1, m
974                    b(i,j) = a(i,j)
975                enddo
976            enddo
977
978        endif
979c
980        return
981        end
982c
983c--------------------------
984c fMtxVdimVecDot2
985c Farzin's implementation
986c row and column exchanged
987c--------------------------
988c
989        subroutine fMtxVdimVecDot2( a, b, c, na, nb, m, n )
990c
991c.... Data declaration
992c
993        implicit none
994        integer na,     nb,     m,      n
995        real*8  a(n,na),        b(n,nb),        c(m)
996c
997        integer i,      j
998c
999c.... Do the work
1000c
1001        if ( m .eq. 1 ) then
1002
1003            c(1) = 0
1004            do i = 1, n
1005                c(1) = c(1) + a(i,1) * b(i,1)
1006            enddo
1007
1008        else if ( m .eq. 2 ) then
1009
1010            c(1) = 0
1011            c(2) = 0
1012            do i = 1, n
1013                c(1) = c(1) + a(i,1) * b(i,1)
1014                c(2) = c(2) + a(i,2) * b(i,2)
1015            enddo
1016
1017        else if ( m .eq. 3 ) then
1018
1019            c(1) = 0
1020            c(2) = 0
1021            c(3) = 0
1022            do i = 1, n
1023                c(1) = c(1) + a(i,1) * b(i,1)
1024                c(2) = c(2) + a(i,2) * b(i,2)
1025                c(3) = c(3) + a(i,3) * b(i,3)
1026            enddo
1027
1028        else if ( m .eq. 4 ) then
1029
1030            c(1) = 0
1031            c(2) = 0
1032            c(3) = 0
1033            c(4) = 0
1034            do i = 1, n
1035                c(1) = c(1) + a(i,1) * b(i,1)
1036                c(2) = c(2) + a(i,2) * b(i,2)
1037                c(3) = c(3) + a(i,3) * b(i,3)
1038                c(4) = c(4) + a(i,4) * b(i,4)
1039            enddo
1040
1041        else
1042
1043            do j = 1, m
1044                c(j) = 0
1045                do i = 1, n
1046                    c(j) = c(j) + a(i,j) * b(i,j)
1047                enddo
1048            enddo
1049
1050        endif
1051c
1052        return
1053        end
1054c
1055c--------------------------
1056c fMtxVdimVecDaxpy
1057c Farzin's implementation
1058c row and column exchanged
1059c--------------------------
1060c
1061        subroutine fMtxVdimVecDaxpy( a, b, c, na, nb, m, n )
1062c
1063c.... Data declaration
1064c
1065        implicit none
1066        integer na,     nb,     m,      n
1067        real*8  a(n,na),        b(n,nb),        c(m)
1068c
1069        integer i,      j
1070c
1071c.... Do the work
1072c
1073        if ( m .eq. 1 ) then
1074
1075            do i = 1, n
1076                b(i,1) = b(i,1) + c(1) * a(i,1)
1077            enddo
1078
1079        else if ( m .eq. 2 ) then
1080
1081            do i = 1, n
1082                b(i,1) = b(i,1) + c(1) * a(i,1)
1083                b(i,2) = b(i,2) + c(2) * a(i,2)
1084            enddo
1085
1086        else if ( m .eq. 3 ) then
1087
1088            do i = 1, n
1089                b(i,1) = b(i,1) + c(1) * a(i,1)
1090                b(i,2) = b(i,2) + c(2) * a(i,2)
1091                b(i,3) = b(i,3) + c(3) * a(i,3)
1092            enddo
1093
1094        else if ( m .eq. 4 ) then
1095
1096            do i = 1, n
1097                b(i,1) = b(i,1) + c(1) * a(i,1)
1098                b(i,2) = b(i,2) + c(2) * a(i,2)
1099                b(i,3) = b(i,3) + c(3) * a(i,3)
1100                b(i,4) = b(i,4) + c(4) * a(i,4)
1101            enddo
1102
1103        else
1104
1105            do j = 1, m
1106                do i = 1, n
1107                    b(i,j) = b(i,j) + c(j) * a(i,j)
1108                enddo
1109            enddo
1110
1111        endif
1112c
1113        return
1114        end
1115c
1116c---------
1117c flesApG
1118c---------
1119c
1120	subroutine flesApG ( ien, xGoC, lesP, lesQ, nPs, nQs )
1121c
1122        include "common.h"
1123c
1124        dimension xGoC(npro,4*nshl,nshl)
1125        real*8 lesP(nshg,nPs), lesQ(nshg,nQs)
1126        dimension ien(npro,nshl)
1127        dimension Ptemp(npro,nshl,nPs), Qtemp(npro,nshl,nQs)
1128c
1129c.... zero Qtemp
1130c
1131	Qtemp = zero
1132c
1133c.... localize the lesP for the EBE product
1134c
1135        call local ( lesP, Ptemp, ien, nPs, 'gather  ' )
1136c
1137c.... Now, product operation
1138c
1139    	do i = 1, nshl
1140           i0 = (nsd) * (i - 1)
1141           do j = 1, nshl
1142c
1143             Qtemp(:,i,1) = Qtemp(:,i,1)
1144     &                    + xGoC(1:npro,i0+1,j) * Ptemp(:,j,nPs)
1145c
1146             Qtemp(:,i,2) = Qtemp(:,i,2)
1147     &                    + xGoC(1:npro,i0+2,j) * Ptemp(:,j,nPs)
1148c
1149             Qtemp(:,i,3) = Qtemp(:,i,3)
1150     &                    + xGoC(1:npro,i0+3,j) * Ptemp(:,j,nPs)
1151c
1152           enddo
1153        enddo
1154c
1155c... assemble the result of the product
1156c
1157        call local ( lesQ, Qtemp, ien, nQs, 'scatter ' )
1158c
1159        return
1160        end
1161c
1162c----------
1163c flesApKG
1164c----------
1165c
1166 	subroutine flesApKG ( ien, xKebe, xGoC, lesP, lesQ, nPs, nQs )
1167c
1168        include "common.h"
1169c
1170        dimension xKebe(npro,3*nshl,3*nshl),
1171     &       xGoC(npro,4*nshl,nshl)
1172        dimension ien(npro,nshl)
1173        real*8 lesP(nshg,nPs), lesQ(nshg,nQs)
1174        dimension Ptemp(npro,nshl,nPs), Qtemp(npro,nshl,nQs)
1175c
1176c.... zero Qtemp
1177c
1178	Qtemp = zero
1179c
1180c.... localize the lesP for the EBE product
1181c
1182        call local ( lesP, Ptemp, ien, nPs, 'gather  ' )
1183c
1184c.... Now, product operation
1185c
1186c.... K contribution
1187c
1188        do i = 1, nshl
1189           i0 = (nsd) * (i - 1)
1190          do j = 1, nshl
1191             j0 = (nsd) * (j - 1)
1192c
1193            Qtemp(:,i,1) = Qtemp(:,i,1)
1194     &                   + xKebe(1:npro,i0+1,j0+1) * Ptemp(:,j,1)
1195     &                   + xKebe(1:npro,i0+1,j0+2) * Ptemp(:,j,2)
1196     &                   + xKebe(1:npro,i0+1,j0+3) * Ptemp(:,j,3)
1197c
1198            Qtemp(:,i,2) = Qtemp(:,i,2)
1199     &                   + xKebe(1:npro,i0+2,j0+1) * Ptemp(:,j,1)
1200     &                   + xKebe(1:npro,i0+2,j0+2) * Ptemp(:,j,2)
1201     &                   + xKebe(1:npro,i0+2,j0+3) * Ptemp(:,j,3)
1202            Qtemp(:,i,3) = Qtemp(:,i,3)
1203     &                   + xKebe(1:npro,i0+3,j0+1) * Ptemp(:,j,1)
1204     &                   + xKebe(1:npro,i0+3,j0+2) * Ptemp(:,j,2)
1205     &                   + xKebe(1:npro,i0+3,j0+3) * Ptemp(:,j,3)
1206c
1207          enddo
1208     	enddo
1209c
1210c.... G contribution
1211c
1212        do i = 1, nshl
1213           i0 = (nsd) * (i - 1)
1214          do j = 1, nshl
1215c
1216            Qtemp(:,i,1) = Qtemp(:,i,1)
1217     &                   + xGoC(1:npro,i0+1,j) * Ptemp(:,j,nPs)
1218            Qtemp(:,i,2) = Qtemp(:,i,2)
1219     &                   + xGoC(1:npro,i0+2,j) * Ptemp(:,j,nPs)
1220            Qtemp(:,i,3) = Qtemp(1:,i,3)
1221     &                   + xGoC(1:npro,i0+3,j) * Ptemp(:,j,nPs)
1222c
1223          enddo
1224        enddo
1225c
1226c.... assemble the result of the product
1227c
1228        call local ( lesQ, Qtemp, ien, nQs, 'scatter ' )
1229c
1230        return
1231        end
1232c
1233c-----------
1234c flesApNGt
1235c-----------
1236c
1237	subroutine flesApNGt ( ien, xGoC, lesP, lesQ, nPs, nQs )
1238c
1239        include "common.h"
1240c
1241        dimension ien(npro,nshl), xGoC(npro,4*nshl,nshl)
1242        real*8 lesP(nshg,nPs), lesQ(nshg,nQs)
1243        dimension Ptemp(npro,nshl,nPs), Qtemp(npro,nshl,nQs)
1244c
1245c.... zero Qtemp
1246c
1247	Qtemp = zero
1248c
1249c.... localize the lesP for the EBE product
1250c
1251        call local ( lesP, Ptemp, ien, nPs, 'gather  ' )
1252c
1253c.... Now, product operation
1254c
1255c.... Negative G^t contribution ( not explicitly formed )
1256c
1257        do i = 1, nshl
1258           do j = 1, nshl
1259              i0 = (nsd) * (j - 1)
1260c
1261             Qtemp(:,i,nQs) = Qtemp(:,i,nQs)
1262     &                      - xGoC(1:npro,i0+1,i) * Ptemp(:,j,1)
1263     &                      - xGoC(1:npro,i0+2,i) * Ptemp(:,j,2)
1264     &                      - xGoC(1:npro,i0+3,i) * Ptemp(:,j,3)
1265c
1266           enddo
1267        enddo
1268c
1269c... assemble the result of the product
1270c
1271        call local ( lesQ, Qtemp, ien, nQs, 'scatter  ' )
1272c
1273        return
1274        end
1275c
1276c------------
1277c flesApNGtC
1278c------------
1279c
1280	subroutine flesApNGtC ( ien, xGoC, lesP, lesQ, nPs, nQs )
1281c
1282        include "common.h"
1283c
1284        dimension ien(npro,nshl), xGoC(npro,4*nshl,nshl)
1285        real*8 lesP(nshg,nPs), lesQ(nshg,nQs)
1286        dimension Ptemp(npro,nshl,nPs), Qtemp(npro,nshl,nQs)
1287c
1288c.... zero Qtemp
1289c
1290	Qtemp = zero
1291c
1292c.... localize the lesP for the EBE product
1293c
1294	call local ( lesP, Ptemp, ien, nPs, 'gather  ')
1295c
1296c.... Now, product operation
1297c
1298c.... Negative G^t contribution ( not explicitly formed )
1299c
1300        do i = 1, nshl
1301           do j = 1, nshl
1302           i0 = (nsd) * (j - 1)
1303c
1304             Qtemp(:,i,nQs) = Qtemp(:,i,nQs)
1305     &                      - xGoC(1:npro,i0+1,i) * Ptemp(:,j,1)
1306     &                      - xGoC(1:npro,i0+2,i) * Ptemp(:,j,2)
1307     &                      - xGoC(1:npro,i0+3,i) * Ptemp(:,j,3)
1308c
1309           enddo
1310        enddo
1311c
1312c.... C contribution
1313c
1314        nnm2 = nshl * (nsd)
1315c
1316        do i = 1, nshl
1317           i0 = nnm2 + i
1318          do j = 1, nshl
1319c
1320             Qtemp(:,i,nQs) = Qtemp(:,i,nQs)
1321     &                      + xGoC(1:npro,i0,j) * Ptemp(:,j,nPs)
1322c
1323          enddo
1324        enddo
1325c
1326c... assemble the result of the product
1327c
1328        call local ( lesQ, Qtemp, ien, nQs, 'scatter  ' )
1329c
1330        return
1331        end
1332c
1333c------------
1334c flesApFull
1335c------------
1336c
1337	subroutine flesApFull ( ien, xKebe, xGoC, lesP, lesQ, nPs, nQs )
1338c
1339        include "common.h"
1340c
1341        dimension ien(npro,nshl)
1342        dimension xKebe(npro,3*nshl,3*nshl),
1343     &       xGoC(npro,4*nshl,nshl)
1344        real*8 lesP(nshg,nPs), lesQ(nshg,nQs)
1345        dimension Ptemp(npro,nshl,nPs), Qtemp(npro,nshl,nQs)
1346c
1347c.... zero Qtemp
1348c
1349	Qtemp = zero
1350c
1351c.... localize the lesP for the EBE product
1352c
1353 	call local ( lesP, Ptemp, ien, nPs, 'gather  ' )
1354c
1355c.... Now, product operation
1356c
1357c.... K * Du contribution
1358c
1359        do i = 1, nshl
1360           i0 = (nsd) * (i - 1)
1361          do j = 1, nshl
1362             j0 = (nsd) * (j - 1)
1363c
1364            Qtemp(:,i,1) = Qtemp(:,i,1)
1365     &                   + xKebe(1:npro,i0+1,j0+1) * Ptemp(:,j,1)
1366     &                   + xKebe(1:npro,i0+1,j0+2) * Ptemp(:,j,2)
1367     &                   + xKebe(1:npro,i0+1,j0+3) * Ptemp(:,j,3)
1368c
1369            Qtemp(:,i,2) = Qtemp(:,i,2)
1370     &                   + xKebe(1:npro,i0+2,j0+1) * Ptemp(:,j,1)
1371     &                   + xKebe(1:npro,i0+2,j0+2) * Ptemp(:,j,2)
1372     &                   + xKebe(1:npro,i0+2,j0+3) * Ptemp(:,j,3)
1373            Qtemp(:,i,3) = Qtemp(:,i,3)
1374     &                   + xKebe(1:npro,i0+3,j0+1) * Ptemp(:,j,1)
1375     &                   + xKebe(1:npro,i0+3,j0+2) * Ptemp(:,j,2)
1376     &                   + xKebe(1:npro,i0+3,j0+3) * Ptemp(:,j,3)
1377c
1378          enddo
1379        enddo
1380c
1381c.... G * Dp contribution
1382c
1383       do i = 1, nshl
1384           i0 = (nsd) * (i - 1)
1385          do j = 1, nshl
1386c
1387            Qtemp(:,i,1) = Qtemp(:,i,1)
1388     &                   + xGoC(1:npro,i0+1,j) * Ptemp(:,j,nPs)
1389            Qtemp(:,i,2) = Qtemp(:,i,2)
1390     &                   + xGoC(1:npro,i0+2,j) * Ptemp(:,j,nPs)
1391            Qtemp(:,i,3) = Qtemp(:,i,3)
1392     &                   + xGoC(1:npro,i0+3,j) * Ptemp(:,j,nPs)
1393c
1394          enddo
1395        enddo
1396c
1397c.... -G^t * Du contribution
1398c
1399       do i = 1, nshl
1400           do j = 1, nshl
1401              i0 = (nsd) * (j - 1)
1402c
1403             Qtemp(:,i,nQs) = Qtemp(:,i,nQs)
1404     &                      - xGoC(1:npro,i0+1,i) * Ptemp(:,j,1)
1405     &                      - xGoC(1:npro,i0+2,i) * Ptemp(:,j,2)
1406     &                      - xGoC(1:npro,i0+3,i) * Ptemp(:,j,3)
1407c
1408           enddo
1409        enddo
1410c
1411c.... C * Dp contribution
1412c
1413        nnm2 = nshl * (nsd)
1414c
1415        do i = 1, nshl
1416           i0 = nnm2 + i
1417          do j = 1, nshl
1418c
1419             Qtemp(:,i,nQs) = Qtemp(:,i,nQs)
1420     &                      + xGoC(1:npro,i0,j) * Ptemp(:,j,nPs)
1421c
1422          enddo
1423        enddo
1424
1425
1426
1427c
1428c... assemble the result of the product
1429c
1430        call local ( lesQ, Qtemp, ien, nQs, 'scatter ' )
1431c
1432        return
1433        end
1434c
1435c-----------
1436c fsclrDiag
1437c-----------
1438c
1439	subroutine fsclrDiag ( ien, xTe, sclrDiag )
1440c
1441        include "common.h"
1442c
1443        dimension xTe(npro,nshl,nshl)
1444        dimension sclrDiag(nshg,1), Diagl(npro,nshl,1)
1445        dimension ien(npro,nshl)
1446c
1447        do i = 1, nshl
1448           Diagl(:,i,1) = xTe(1:npro,i,i)
1449        enddo
1450c
1451        call local (sclrDiag, Diagl, ien, 1, 'scatter ')
1452c
1453        return
1454        end
1455c
1456c------------
1457c flesApSclr
1458c------------
1459c
1460	subroutine flesApSclr ( ien, xTe, lesP, lesQ, nPs, nQs )
1461c
1462        include "common.h"
1463c
1464        dimension xTe(npro,nshl,nshl)
1465        dimension ien(npro,nshl)
1466        real*8 lesP(nshg,nPs), lesQ(nshg,nQs)
1467        dimension Ptemp(npro,nshl,nPs), Qtemp(npro,nshl,nQs)
1468c
1469c.... zero Qtemp
1470c
1471        Qtemp = zero
1472c
1473c.... localize the lesP for the EBE product
1474c
1475        call local ( lesP, Ptemp, ien, nPs, 'gather  ')
1476c
1477c.... Now, product operation
1478c
1479        do i = 1, nshl
1480          do j = 1, nshl
1481c
1482            Qtemp(:,i,nQs) = Qtemp(:,i,nQs)
1483     &                     + xTe(1:npro,i,j) * Ptemp(:,j,nPs)
1484c
1485          enddo
1486        enddo
1487c
1488c.... assemble the result of the product
1489c
1490  	call local ( lesQ, Qtemp, ien, nQs, 'scatter ' )
1491c
1492        return
1493        end
1494