|
a/LibDBlasLxF77.def.m2pp |
|
b/LibDBlasLxF77.def.m2pp |
|
... |
|
... |
35 |
(*------------------------------------------------------------------------*)
|
35 |
(*------------------------------------------------------------------------*)
|
36 |
(* Last change: *)
|
36 |
(* Last change: *)
|
37 |
(* *)
|
37 |
(* *)
|
38 |
(* 29.10.17, MRi: Erstellen der ersten Version nur mit dgemm *)
|
38 |
(* 29.10.17, MRi: Erstellen der ersten Version nur mit dgemm *)
|
39 |
(* 23.06.18, MRi: Hinzufuegen von zgemm *)
|
39 |
(* 23.06.18, MRi: Hinzufuegen von zgemm *)
|
|
|
40 |
(* 11.09.18, MRi: Hinzufuegen von dgemv und zgemm *)
|
40 |
(*------------------------------------------------------------------------*)
|
41 |
(*------------------------------------------------------------------------*)
|
41 |
(* Offene Punkte *)
|
42 |
(* Offene Punkte *)
|
42 |
(* *)
|
43 |
(* *)
|
43 |
(* - Testen *)
|
44 |
(* - Testen *)
|
44 |
(*------------------------------------------------------------------------*)
|
45 |
(*------------------------------------------------------------------------*)
|
45 |
(* Licence : GNU Lesser General Public License (LGPL) *)
|
46 |
(* Licence : GNU Lesser General Public License (LGPL) *)
|
46 |
(*------------------------------------------------------------------------*)
|
47 |
(*------------------------------------------------------------------------*)
|
47 |
|
48 |
|
48 |
(* $Id: LibDBlasLxF77.def.m2pp,v 1.2 2018/01/16 09:19:51 mriedl Exp mriedl $ *)
|
49 |
(* $Id: LibDBlasLxF77.def.m2pp,v 1.4 2018/09/12 13:20:49 mriedl Exp mriedl $ *)
|
49 |
|
50 |
|
50 |
FROM LibDBlasL1F77 IMPORT CHAR1,INTEGER4,REAL4,DOUBLEPRECISION,DOUBLECOMPLEX;
|
51 |
FROM LibDBlasL1F77 IMPORT CHAR1,INTEGER4,REAL4,DOUBLEPRECISION,DOUBLECOMPLEX;
|
51 |
|
52 |
|
52 |
<* IF (__XDS__) THEN *>
|
53 |
<* IF (__XDS__) THEN *>
|
53 |
CONST Version = "LibDBlasLxF77 for XDS Modula-2";
|
54 |
CONST Version = "LibDBlasLxF77 for XDS Modula-2";
|
|
... |
|
... |
56 |
CONST Version = "LibDBlasLxF77 for GNU Modula-2";
|
57 |
CONST Version = "LibDBlasLxF77 for GNU Modula-2";
|
57 |
<* END *>
|
58 |
<* END *>
|
58 |
<* IF (__MOCKA__) THEN *>
|
59 |
<* IF (__MOCKA__) THEN *>
|
59 |
CONST Version = "LibDBlasLxF77 for GMD Mocka";
|
60 |
CONST Version = "LibDBlasLxF77 for GMD Mocka";
|
60 |
<* END *>
|
61 |
<* END *>
|
|
|
62 |
|
|
|
63 |
PROCEDURE dgemv_(VAR Trans : CHAR1;
|
|
|
64 |
VAR M,N : INTEGER4;
|
|
|
65 |
VAR Alpha : DOUBLEPRECISION;
|
|
|
66 |
VAR A : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
|
|
|
67 |
VAR lda : INTEGER4;
|
|
|
68 |
VAR X : (* ARRAY OF *) DOUBLEPRECISION;
|
|
|
69 |
VAR IncX : INTEGER4;
|
|
|
70 |
VAR Beta : DOUBLEPRECISION;
|
|
|
71 |
VAR Y : (* ARRAY OF *) DOUBLEPRECISION;
|
|
|
72 |
VAR IncY : INTEGER4);
|
|
|
73 |
|
|
|
74 |
(*----------------------------------------------------------------*)
|
|
|
75 |
(* Aufruf der Fortran Version von BLAS2 subroutine dgemv *)
|
|
|
76 |
(* *)
|
|
|
77 |
(* Performs one of the matrix-vector operations *)
|
|
|
78 |
(* *)
|
|
|
79 |
(* y = alpha*a *x + beta*y, or *)
|
|
|
80 |
(* y = alpha*a'*x + beta*y, or; *)
|
|
|
81 |
(* *)
|
|
|
82 |
(* where Alpha and Beta are scalars, X and Y are vectors *)
|
|
|
83 |
(* and A is an M by N matrix. *)
|
|
|
84 |
(*----------------------------------------------------------------*)
|
|
|
85 |
|
|
|
86 |
PROCEDURE zgemv_(VAR Trans : CHAR1;
|
|
|
87 |
VAR M,N : INTEGER4;
|
|
|
88 |
VAR Alpha : DOUBLECOMPLEX;
|
|
|
89 |
VAR A : (* ARRAY OF ARRAY OF *) DOUBLECOMPLEX;
|
|
|
90 |
VAR lda : INTEGER4;
|
|
|
91 |
VAR X : (* ARRAY OF *) DOUBLECOMPLEX;
|
|
|
92 |
VAR IncX : INTEGER4;
|
|
|
93 |
VAR Beta : DOUBLECOMPLEX;
|
|
|
94 |
VAR Y : (* ARRAY OF *) DOUBLECOMPLEX;
|
|
|
95 |
VAR IncY : INTEGER4);
|
|
|
96 |
|
|
|
97 |
(*----------------------------------------------------------------*)
|
|
|
98 |
(* Aufruf der Fortran Version von BLAS2 subroutine zgemv *)
|
|
|
99 |
(* *)
|
|
|
100 |
(* Performs one of the matrix-vector operations *)
|
|
|
101 |
(* *)
|
|
|
102 |
(* y = alpha*a *x + beta*y, or *)
|
|
|
103 |
(* y = alpha*a'*x + beta*y, or; *)
|
|
|
104 |
(* y = alpha*conjg(a')*x + beta*y,; *)
|
|
|
105 |
(* *)
|
|
|
106 |
(* where Alpha and Beta are scalars, X and Y are vectors *)
|
|
|
107 |
(* and A is an M by N matrix. *)
|
|
|
108 |
(*----------------------------------------------------------------*)
|
61 |
|
109 |
|
62 |
PROCEDURE sgemm_(VAR TA : CHAR1;
|
110 |
PROCEDURE sgemm_(VAR TA : CHAR1;
|
63 |
VAR TB : CHAR1;
|
111 |
VAR TB : CHAR1;
|
64 |
VAR M,N,K : INTEGER4;
|
112 |
VAR M,N,K : INTEGER4;
|
65 |
VAR Alpha : REAL4;
|
113 |
VAR Alpha : REAL4;
|
|
... |
|
... |
69 |
VAR ldb : INTEGER4;
|
117 |
VAR ldb : INTEGER4;
|
70 |
VAR Beta : REAL4;
|
118 |
VAR Beta : REAL4;
|
71 |
VAR C : (* ARRAY OF ARRAY OF *) REAL4;
|
119 |
VAR C : (* ARRAY OF ARRAY OF *) REAL4;
|
72 |
VAR ldc : INTEGER4);
|
120 |
VAR ldc : INTEGER4);
|
73 |
|
121 |
|
74 |
(*---------------------------------------------------------------*)
|
122 |
(*----------------------------------------------------------------*)
|
75 |
(* Matrix Matrix Multiplikatione / matrix matrix multiplication, *)
|
123 |
(* Matrix Matrix Multiplikatione / matrix matrix multiplication, *)
|
76 |
(* single precision version, see LibDBlas.def dgemm for details *)
|
124 |
(* single precision version, see LibDBlas.def dgemm for details *)
|
77 |
(*---------------------------------------------------------------*)
|
125 |
(*----------------------------------------------------------------*)
|
78 |
|
126 |
|
79 |
PROCEDURE dgemm_(VAR TA : CHAR1;
|
127 |
PROCEDURE dgemm_(VAR TA : CHAR1;
|
80 |
VAR TB : CHAR1;
|
128 |
VAR TB : CHAR1;
|
81 |
VAR M,N,K : INTEGER4;
|
129 |
VAR M,N,K : INTEGER4;
|
82 |
VAR Alpha : DOUBLEPRECISION;
|
130 |
VAR Alpha : DOUBLEPRECISION;
|
|
... |
|
... |
86 |
VAR ldb : INTEGER4;
|
134 |
VAR ldb : INTEGER4;
|
87 |
VAR Beta : DOUBLEPRECISION;
|
135 |
VAR Beta : DOUBLEPRECISION;
|
88 |
VAR C : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
|
136 |
VAR C : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
|
89 |
VAR ldc : INTEGER4);
|
137 |
VAR ldc : INTEGER4);
|
90 |
|
138 |
|
91 |
(*---------------------------------------------------------------*)
|
139 |
(*----------------------------------------------------------------*)
|
92 |
(* Matrix Matrix Multiplikatione / matrix matrix multiplication, *)
|
140 |
(* Matrix Matrix Multiplikatione / matrix matrix multiplication, *)
|
93 |
(* double precision version, see LibDBlas.def dgemm for details *)
|
141 |
(* double precision version, see LibDBlas.def dgemm for details *)
|
94 |
(* See LibDBlas.def for details *)
|
142 |
(* See LibDBlas.def for details *)
|
95 |
(* *)
|
143 |
(* *)
|
96 |
(* Hint: There are Fortran version with unrolled loops which *)
|
144 |
(* Hint: There are Fortran version with unrolled loops which *)
|
97 |
(* perform far better than the subroutines provided in the *)
|
145 |
(* perform far better than the subroutines provided in the *)
|
98 |
(* standard source. If you use an optimized BLAS level3 library *)
|
146 |
(* standard source. If you use an optimized BLAS level3 library *)
|
99 |
(* (e.g. ATLAS) there is no need to take that into consideration *)
|
147 |
(* (e.g. ATLAS) there is no need to take that into consideration *)
|
100 |
(*---------------------------------------------------------------*)
|
148 |
(*----------------------------------------------------------------*)
|
101 |
|
149 |
|
102 |
PROCEDURE dgemmomp(VAR TA : CHAR1;
|
150 |
PROCEDURE dgemmomp(VAR TA : CHAR1;
|
103 |
VAR TB : CHAR1;
|
151 |
VAR TB : CHAR1;
|
104 |
VAR M,N,K : INTEGER4;
|
152 |
VAR M,N,K : INTEGER4;
|
105 |
VAR Alpha : DOUBLEPRECISION;
|
153 |
VAR Alpha : DOUBLEPRECISION;
|
|
... |
|
... |
109 |
VAR ldb : INTEGER4;
|
157 |
VAR ldb : INTEGER4;
|
110 |
VAR Beta : DOUBLEPRECISION;
|
158 |
VAR Beta : DOUBLEPRECISION;
|
111 |
VAR C : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
|
159 |
VAR C : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
|
112 |
VAR ldc : INTEGER4);
|
160 |
VAR ldc : INTEGER4);
|
113 |
|
161 |
|
114 |
(*---------------------------------------------------------------*)
|
162 |
(*----------------------------------------------------------------*)
|
115 |
(* Matrix Matrix Multiplikation / matrix matrix multiplication, *)
|
163 |
(* Matrix Matrix Multiplikation / matrix matrix multiplication, *)
|
116 |
(* double precision version, see LibDBlas.def dgemm for details *)
|
164 |
(* double precision version, see LibDBlas.def dgemm for details *)
|
117 |
(* *)
|
165 |
(* *)
|
118 |
(* dgemmomp is an OpenMP based version of level 3 BLAS dgemmm. *)
|
166 |
(* dgemmomp is an OpenMP based version of level 3 BLAS dgemmm. *)
|
119 |
(* dgemmomp is far from beeing optimal - please test if it is *)
|
167 |
(* dgemmomp is far from beeing optimal - please test if it is *)
|
120 |
(* really improving the performance within your sprecific *)
|
168 |
(* really improving the performance within your sprecific *)
|
121 |
(* environment. On 32 bit systems the communication overhead *)
|
169 |
(* environment. On 32 bit systems the communication overhead *)
|
122 |
(* outperforms the potential gain in speed by using more than *)
|
170 |
(* outperforms the potential gain in speed by using more than *)
|
123 |
(* one thread in many cases. If you set the number of threads to *)
|
171 |
(* one thread in many cases. If you set the number of threads to *)
|
124 |
(* one a version of dgemm using unrolled loops and blocking is *)
|
172 |
(* one a version of dgemm using unrolled loops and blocking is *)
|
125 |
(* used if the dimension of the matriced involved exeed a *)
|
173 |
(* used if the dimension of the matriced involved exeed a *)
|
126 |
(* specific limit (see dgemmCbind.f90) so it still might be *)
|
174 |
(* specific limit (see dgemmCbind.f90) so it still might be *)
|
127 |
(* worth calling dgemmomp. dgemmomp has an explicit Fortran 2003 *)
|
175 |
(* worth calling dgemmomp. dgemmomp has an explicit Fortran 2003 *)
|
128 |
(* "C" interface so no need for "_" in the function name. *)
|
176 |
(* "C" interface so no need for "_" in the function name. *)
|
129 |
(* *)
|
177 |
(* *)
|
130 |
(* HINT: The module OpenMPF77 permits controlling the number of *)
|
178 |
(* HINT: The module OpenMPF77 permits controlling the number of *)
|
131 |
(* OpenMP threads used. *)
|
179 |
(* OpenMP threads used. *)
|
132 |
(*---------------------------------------------------------------*)
|
180 |
(*----------------------------------------------------------------*)
|
133 |
|
181 |
|
134 |
PROCEDURE dgemmomp2(VAR TA : CHAR1;
|
182 |
PROCEDURE dgemmomp2(VAR TA : CHAR1;
|
135 |
VAR TB : CHAR1;
|
183 |
VAR TB : CHAR1;
|
136 |
VAR M,N,K : INTEGER4;
|
184 |
VAR M,N,K : INTEGER4;
|
137 |
VAR Alpha : DOUBLEPRECISION;
|
185 |
VAR Alpha : DOUBLEPRECISION;
|
|
... |
|
... |
140 |
VAR B : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
|
188 |
VAR B : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
|
141 |
VAR ldb : INTEGER4;
|
189 |
VAR ldb : INTEGER4;
|
142 |
VAR Beta : DOUBLEPRECISION;
|
190 |
VAR Beta : DOUBLEPRECISION;
|
143 |
VAR C : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
|
191 |
VAR C : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
|
144 |
VAR ldc : INTEGER4);
|
192 |
VAR ldc : INTEGER4);
|
145 |
(*---------------------------------------------------------------*)
|
193 |
(*----------------------------------------------------------------*)
|
146 |
(* Another OMP parallel version of dgemm (experimental) *)
|
194 |
(* Another OMP parallel version of dgemm (experimental) *)
|
147 |
(*---------------------------------------------------------------*)
|
195 |
(*----------------------------------------------------------------*)
|
148 |
|
196 |
|
149 |
PROCEDURE zgemm_(VAR TA : CHAR1;
|
197 |
PROCEDURE zgemm_(VAR TA : CHAR1;
|
150 |
VAR TB : CHAR1;
|
198 |
VAR TB : CHAR1;
|
151 |
VAR M,N,K : INTEGER4;
|
199 |
VAR M,N,K : INTEGER4;
|
152 |
VAR Alpha : DOUBLECOMPLEX;
|
200 |
VAR Alpha : DOUBLECOMPLEX;
|
|
... |
|
... |
156 |
VAR ldb : INTEGER4;
|
204 |
VAR ldb : INTEGER4;
|
157 |
VAR Beta : DOUBLECOMPLEX;
|
205 |
VAR Beta : DOUBLECOMPLEX;
|
158 |
VAR C : (* ARRAY OF ARRAY OF *) DOUBLECOMPLEX;
|
206 |
VAR C : (* ARRAY OF ARRAY OF *) DOUBLECOMPLEX;
|
159 |
VAR ldc : INTEGER4);
|
207 |
VAR ldc : INTEGER4);
|
160 |
|
208 |
|
161 |
(*---------------------------------------------------------------*)
|
209 |
(*----------------------------------------------------------------*)
|
162 |
(* Matrix Matrix Multiplikatione / matrix matrix multiplication, *)
|
210 |
(* Matrix Matrix Multiplikatione / matrix matrix multiplication, *)
|
163 |
(* double complex version, see LibDBlas.def dgemm for details *)
|
211 |
(* double complex version, see LibDBlas.def dgemm for details *)
|
164 |
(*---------------------------------------------------------------*)
|
212 |
(*----------------------------------------------------------------*)
|
165 |
|
213 |
|
166 |
END LibDBlasLxF77.
|
214 |
END LibDBlasLxF77.
|