Switch to unified view

a/LibDBlasLxF77.def.m2pp b/LibDBlasLxF77.def.m2pp
...
...
35
  (*------------------------------------------------------------------------*)
35
  (*------------------------------------------------------------------------*)
36
  (* Last change:                                                           *)
36
  (* Last change:                                                           *)
37
  (*                                                                        *)
37
  (*                                                                        *)
38
  (* 29.10.17, MRi: Erstellen der ersten Version nur mit dgemm              *)
38
  (* 29.10.17, MRi: Erstellen der ersten Version nur mit dgemm              *)
39
  (* 23.06.18, MRi: Hinzufuegen von zgemm                                   *)
39
  (* 23.06.18, MRi: Hinzufuegen von zgemm                                   *)
40
  (* 11.09.18, MRi: Hinzufuegen von dgemv und zgemm                         *)
40
  (*------------------------------------------------------------------------*)
41
  (*------------------------------------------------------------------------*)
41
  (* Offene Punkte                                                          *)
42
  (* Offene Punkte                                                          *)
42
  (*                                                                        *)
43
  (*                                                                        *)
43
  (* - Testen                                                               *)
44
  (* - Testen                                                               *)
44
  (*------------------------------------------------------------------------*)
45
  (*------------------------------------------------------------------------*)
45
  (* Licence : GNU Lesser General Public License (LGPL)                     *)
46
  (* Licence : GNU Lesser General Public License (LGPL)                     *)
46
  (*------------------------------------------------------------------------*)
47
  (*------------------------------------------------------------------------*)
47
48
48
  (* $Id: LibDBlasLxF77.def.m2pp,v 1.2 2018/01/16 09:19:51 mriedl Exp mriedl $ *)
49
  (* $Id: LibDBlasLxF77.def.m2pp,v 1.4 2018/09/12 13:20:49 mriedl Exp mriedl $ *)
49
50
50
FROM LibDBlasL1F77 IMPORT CHAR1,INTEGER4,REAL4,DOUBLEPRECISION,DOUBLECOMPLEX;
51
FROM LibDBlasL1F77 IMPORT CHAR1,INTEGER4,REAL4,DOUBLEPRECISION,DOUBLECOMPLEX;
51
52
52
<* IF (__XDS__) THEN *>
53
<* IF (__XDS__) THEN *>
53
CONST Version = "LibDBlasLxF77 for XDS Modula-2";
54
CONST Version = "LibDBlasLxF77 for XDS Modula-2";
...
...
56
CONST Version = "LibDBlasLxF77 for GNU Modula-2";
57
CONST Version = "LibDBlasLxF77 for GNU Modula-2";
57
<* END *>
58
<* END *>
58
<* IF (__MOCKA__) THEN *>
59
<* IF (__MOCKA__) THEN *>
59
CONST Version = "LibDBlasLxF77 for GMD Mocka";
60
CONST Version = "LibDBlasLxF77 for GMD Mocka";
60
<* END *>
61
<* END *>
62
63
PROCEDURE dgemv_(VAR Trans : CHAR1; 
64
                 VAR M,N   : INTEGER4;
65
                 VAR Alpha : DOUBLEPRECISION;
66
                 VAR A     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
67
                 VAR lda   : INTEGER4;
68
                 VAR X     : (* ARRAY OF *) DOUBLEPRECISION;
69
                 VAR IncX  : INTEGER4;
70
                 VAR Beta  : DOUBLEPRECISION;
71
                 VAR Y     : (* ARRAY OF *) DOUBLEPRECISION;
72
                 VAR IncY  : INTEGER4);
73
74
          (*----------------------------------------------------------------*)
75
          (* Aufruf der Fortran Version von BLAS2 subroutine dgemv          *)
76
          (*                                                                *)
77
          (* Performs one of the matrix-vector operations                   *)
78
          (*                                                                *)
79
          (*   y = alpha*a *x + beta*y,   or                                *)
80
          (*   y = alpha*a'*x + beta*y,   or;                               *)
81
          (*                                                                *)
82
          (* where Alpha and Beta are scalars, X and Y are vectors          *)
83
          (* and A is an M by N matrix.                                     *)
84
          (*----------------------------------------------------------------*)
85
86
PROCEDURE zgemv_(VAR Trans : CHAR1; 
87
                 VAR M,N   : INTEGER4;
88
                 VAR Alpha : DOUBLECOMPLEX;
89
                 VAR A     : (* ARRAY OF ARRAY OF *) DOUBLECOMPLEX;
90
                 VAR lda   : INTEGER4;
91
                 VAR X     : (* ARRAY OF *) DOUBLECOMPLEX;
92
                 VAR IncX  : INTEGER4;
93
                 VAR Beta  : DOUBLECOMPLEX;
94
                 VAR Y     : (* ARRAY OF *) DOUBLECOMPLEX;
95
                 VAR IncY  : INTEGER4);
96
97
          (*----------------------------------------------------------------*)
98
          (* Aufruf der Fortran Version von BLAS2 subroutine zgemv          *)
99
          (*                                                                *)
100
          (* Performs one of the matrix-vector operations                   *)
101
          (*                                                                *)
102
          (*   y = alpha*a *x + beta*y,   or                                *)
103
          (*   y = alpha*a'*x + beta*y,   or;                               *)
104
          (*   y = alpha*conjg(a')*x + beta*y,;                             *)
105
          (*                                                                *)
106
          (* where Alpha and Beta are scalars, X and Y are vectors          *)
107
          (* and A is an M by N matrix.                                     *)
108
          (*----------------------------------------------------------------*)
61
109
62
PROCEDURE sgemm_(VAR TA    : CHAR1;
110
PROCEDURE sgemm_(VAR TA    : CHAR1;
63
                 VAR TB    : CHAR1;
111
                 VAR TB    : CHAR1;
64
                 VAR M,N,K : INTEGER4;
112
                 VAR M,N,K : INTEGER4;
65
                 VAR Alpha : REAL4;
113
                 VAR Alpha : REAL4;
...
...
69
                 VAR ldb   : INTEGER4;
117
                 VAR ldb   : INTEGER4;
70
                 VAR Beta  : REAL4;
118
                 VAR Beta  : REAL4;
71
                 VAR C     : (* ARRAY OF ARRAY OF *) REAL4;
119
                 VAR C     : (* ARRAY OF ARRAY OF *) REAL4;
72
                 VAR ldc   : INTEGER4);
120
                 VAR ldc   : INTEGER4);
73
121
74
          (*---------------------------------------------------------------*)
122
          (*----------------------------------------------------------------*)
75
          (* Matrix Matrix Multiplikatione / matrix matrix multiplication, *)
123
          (* Matrix Matrix Multiplikatione / matrix matrix multiplication,  *)
76
          (* single precision version, see LibDBlas.def dgemm for details  *)
124
          (* single precision version, see LibDBlas.def dgemm for details   *)
77
          (*---------------------------------------------------------------*)
125
          (*----------------------------------------------------------------*)
78
126
79
PROCEDURE dgemm_(VAR TA    : CHAR1;
127
PROCEDURE dgemm_(VAR TA    : CHAR1;
80
                 VAR TB    : CHAR1;
128
                 VAR TB    : CHAR1;
81
                 VAR M,N,K : INTEGER4;
129
                 VAR M,N,K : INTEGER4;
82
                 VAR Alpha : DOUBLEPRECISION;
130
                 VAR Alpha : DOUBLEPRECISION;
...
...
86
                 VAR ldb   : INTEGER4;
134
                 VAR ldb   : INTEGER4;
87
                 VAR Beta  : DOUBLEPRECISION;
135
                 VAR Beta  : DOUBLEPRECISION;
88
                 VAR C     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
136
                 VAR C     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
89
                 VAR ldc   : INTEGER4);
137
                 VAR ldc   : INTEGER4);
90
138
91
          (*---------------------------------------------------------------*)
139
          (*----------------------------------------------------------------*)
92
          (* Matrix Matrix Multiplikatione / matrix matrix multiplication, *)
140
          (* Matrix Matrix Multiplikatione / matrix matrix multiplication,  *)
93
          (* double precision version, see LibDBlas.def dgemm for details  *)
141
          (* double precision version, see LibDBlas.def dgemm for details   *)
94
          (* See   LibDBlas.def for  details                               *)
142
          (* See   LibDBlas.def for  details                                *)
95
          (*                                                               *)
143
          (*                                                                *)
96
          (* Hint: There are Fortran version with unrolled loops which     *)
144
          (* Hint: There are Fortran version with unrolled loops which      *)
97
          (* perform far better than the subroutines provided in the       *)
145
          (* perform far better than the subroutines provided in the        *)
98
          (* standard source. If you use an optimized BLAS level3 library  *)
146
          (* standard source. If you use an optimized BLAS level3 library   *)
99
          (* (e.g. ATLAS) there is no need to take that into consideration *)
147
          (* (e.g. ATLAS) there is no need to take that into consideration  *)
100
          (*---------------------------------------------------------------*)
148
          (*----------------------------------------------------------------*)
101
149
102
PROCEDURE dgemmomp(VAR TA    : CHAR1;
150
PROCEDURE dgemmomp(VAR TA    : CHAR1;
103
                   VAR TB    : CHAR1;
151
                   VAR TB    : CHAR1;
104
                   VAR M,N,K : INTEGER4;
152
                   VAR M,N,K : INTEGER4;
105
                   VAR Alpha : DOUBLEPRECISION;
153
                   VAR Alpha : DOUBLEPRECISION;
...
...
109
                   VAR ldb   : INTEGER4;
157
                   VAR ldb   : INTEGER4;
110
                   VAR Beta  : DOUBLEPRECISION;
158
                   VAR Beta  : DOUBLEPRECISION;
111
                   VAR C     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
159
                   VAR C     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
112
                   VAR ldc   : INTEGER4);
160
                   VAR ldc   : INTEGER4);
113
161
114
          (*---------------------------------------------------------------*)
162
          (*----------------------------------------------------------------*)
115
          (* Matrix Matrix Multiplikation / matrix matrix multiplication,  *)
163
          (* Matrix Matrix Multiplikation / matrix matrix multiplication,   *)
116
          (* double precision version, see LibDBlas.def dgemm for details  *)
164
          (* double precision version, see LibDBlas.def dgemm for details   *)
117
          (*                                                               *)
165
          (*                                                                *)
118
          (* dgemmomp is an OpenMP based version of level 3 BLAS dgemmm.   *)
166
          (* dgemmomp is an OpenMP based version of level 3 BLAS dgemmm.    *)
119
          (* dgemmomp is far from beeing optimal - please test if it is    *)
167
          (* dgemmomp is far from beeing optimal - please test if it is     *)
120
          (* really improving the performance within your sprecific        *)
168
          (* really improving the performance within your sprecific         *)
121
          (* environment. On 32 bit systems the communication overhead     *)
169
          (* environment. On 32 bit systems the communication overhead      *)
122
          (* outperforms the potential gain in speed by using more than    *)
170
          (* outperforms the potential gain in speed by using more than     *)
123
          (* one thread in many cases. If you set the number of threads to *)
171
          (* one thread in many cases. If you set the number of threads to  *)
124
          (* one a version of dgemm using unrolled loops and blocking is   *)
172
          (* one a version of dgemm using unrolled loops and blocking is    *)
125
          (* used if the dimension of the matriced involved exeed a        *)
173
          (* used if the dimension of the matriced involved exeed a         *)
126
          (* specific limit (see dgemmCbind.f90) so it still might be      *)
174
          (* specific limit (see dgemmCbind.f90) so it still might be       *)
127
          (* worth calling dgemmomp. dgemmomp has an explicit Fortran 2003 *)
175
          (* worth calling dgemmomp. dgemmomp has an explicit Fortran 2003  *)
128
          (* "C" interface so no need for "_" in the function name.        *)
176
          (* "C" interface so no need for "_" in the function name.         *)
129
          (*                                                               *)
177
          (*                                                                *)
130
          (* HINT: The module OpenMPF77 permits controlling the number of  *)
178
          (* HINT: The module OpenMPF77 permits controlling the number of   *)
131
          (*       OpenMP threads used.                                    *)
179
          (*       OpenMP threads used.                                     *)
132
          (*---------------------------------------------------------------*)
180
          (*----------------------------------------------------------------*)
133
181
134
PROCEDURE dgemmomp2(VAR TA    : CHAR1;
182
PROCEDURE dgemmomp2(VAR TA    : CHAR1;
135
                    VAR TB    : CHAR1;
183
                    VAR TB    : CHAR1;
136
                    VAR M,N,K : INTEGER4;
184
                    VAR M,N,K : INTEGER4;
137
                    VAR Alpha : DOUBLEPRECISION;
185
                    VAR Alpha : DOUBLEPRECISION;
...
...
140
                    VAR B     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
188
                    VAR B     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
141
                    VAR ldb   : INTEGER4;
189
                    VAR ldb   : INTEGER4;
142
                    VAR Beta  : DOUBLEPRECISION;
190
                    VAR Beta  : DOUBLEPRECISION;
143
                    VAR C     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
191
                    VAR C     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
144
                    VAR ldc   : INTEGER4);
192
                    VAR ldc   : INTEGER4);
145
          (*---------------------------------------------------------------*)
193
          (*----------------------------------------------------------------*)
146
          (* Another OMP parallel version of dgemm (experimental)          *)
194
          (* Another OMP parallel version of dgemm (experimental)           *)
147
          (*---------------------------------------------------------------*)
195
          (*----------------------------------------------------------------*)
148
196
149
PROCEDURE zgemm_(VAR TA    : CHAR1;
197
PROCEDURE zgemm_(VAR TA    : CHAR1;
150
                 VAR TB    : CHAR1;
198
                 VAR TB    : CHAR1;
151
                 VAR M,N,K : INTEGER4;
199
                 VAR M,N,K : INTEGER4;
152
                 VAR Alpha : DOUBLECOMPLEX;
200
                 VAR Alpha : DOUBLECOMPLEX;
...
...
156
                 VAR ldb   : INTEGER4;
204
                 VAR ldb   : INTEGER4;
157
                 VAR Beta  : DOUBLECOMPLEX;
205
                 VAR Beta  : DOUBLECOMPLEX;
158
                 VAR C     : (* ARRAY OF ARRAY OF *) DOUBLECOMPLEX;
206
                 VAR C     : (* ARRAY OF ARRAY OF *) DOUBLECOMPLEX;
159
                 VAR ldc   : INTEGER4);
207
                 VAR ldc   : INTEGER4);
160
208
161
          (*---------------------------------------------------------------*)
209
          (*----------------------------------------------------------------*)
162
          (* Matrix Matrix Multiplikatione / matrix matrix multiplication, *)
210
          (* Matrix Matrix Multiplikatione / matrix matrix multiplication,  *)
163
          (* double complex version, see LibDBlas.def dgemm for details    *)
211
          (* double complex version, see LibDBlas.def dgemm for details     *)
164
          (*---------------------------------------------------------------*)
212
          (*----------------------------------------------------------------*)
165
213
166
END LibDBlasLxF77.
214
END LibDBlasLxF77.