Parent:
[93da36]
(diff)
Child:
[28b809]
(diff)
Download this file
LibDBlasLxF77.def.m2pp
215 lines (196 with data), 12.2 kB
<* IF (__XDS__) THEN *>
DEFINITION MODULE ["C"] LibDBlasLxF77; (* XDS *)
<* END *>
<* IF (__GM2__) THEN *>
DEFINITION MODULE FOR "C" LibDBlasLxF77; (* GNU M2 *)
<* END *>
<* IF (__MOCKA__) THEN *>
FOREIGN MODULE LibDBlasLxF77; (* Mocka *)
<* END *>
(*========================================================================*)
(* WICHTIG: BITTE NUR DIE DATEI LibDBlasLxF77.def.m2pp EDITIEREN !!! *)
(*========================================================================*)
(* Es sind 3 Versionen enthalten die mit *)
(* *)
(* m2pp -D __{Parameter}__ < LibDBlasLxF77.mod.m2pp > LibDBlasLxF77.mod *)
(* *)
(* mit Parameter = {XDS|GM2|MOCKA} erzeugt werden koennen. *)
(* *)
(* GM2 : Definitionsmodul im Stil des GM2 M2 Compilers *)
(* XDS : Definitionsmodul im Stil des XDS M2 Compilers *)
(* MOCKA : Definitionsmodul im Stil des Mocka Compilers *)
(* *)
(* ansonsten gibt es keine Aenderungen am Quellcode *)
(* *)
(* There are three version contained, one for the GNU, XDS and Mocka *)
(* compiler which can be produced by the m2pp command line given above *)
(*------------------------------------------------------------------------*)
(* Schnittstelle zu dblas level 2 & 3 FORTRAN 77 Subroutinen. *)
(* Low level interface to dblas level 2 & 3 FORTRAN 77 subroutines. *)
(* *)
(* Hint: If you use a OpenMPI based version of a Fortran subroutine you *)
(* can regulate the numer of threads used by omp_set_num_threads in *)
(* module OpenMPF77 *)
(*------------------------------------------------------------------------*)
(* Last change: *)
(* *)
(* 29.10.17, MRi: Erstellen der ersten Version nur mit dgemm *)
(* 23.06.18, MRi: Hinzufuegen von zgemm *)
(* 11.09.18, MRi: Hinzufuegen von dgemv und zgemm *)
(*------------------------------------------------------------------------*)
(* Offene Punkte *)
(* *)
(* - Testen *)
(*------------------------------------------------------------------------*)
(* Licence : GNU Lesser General Public License (LGPL) *)
(*------------------------------------------------------------------------*)
(* $Id: LibDBlasLxF77.def.m2pp,v 1.4 2018/09/12 13:20:49 mriedl Exp mriedl $ *)
FROM LibDBlasL1F77 IMPORT CHAR1,INTEGER4,REAL4,DOUBLEPRECISION,DOUBLECOMPLEX;
<* IF (__XDS__) THEN *>
CONST Version = "LibDBlasLxF77 for XDS Modula-2";
<* END *>
<* IF (__GM2__) THEN *>
CONST Version = "LibDBlasLxF77 for GNU Modula-2";
<* END *>
<* IF (__MOCKA__) THEN *>
CONST Version = "LibDBlasLxF77 for GMD Mocka";
<* END *>
PROCEDURE dgemv_(VAR Trans : CHAR1;
VAR M,N : INTEGER4;
VAR Alpha : DOUBLEPRECISION;
VAR A : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
VAR lda : INTEGER4;
VAR X : (* ARRAY OF *) DOUBLEPRECISION;
VAR IncX : INTEGER4;
VAR Beta : DOUBLEPRECISION;
VAR Y : (* ARRAY OF *) DOUBLEPRECISION;
VAR IncY : INTEGER4);
(*----------------------------------------------------------------*)
(* Aufruf der Fortran Version von BLAS2 subroutine dgemv *)
(* *)
(* Performs one of the matrix-vector operations *)
(* *)
(* y = alpha*a *x + beta*y, or *)
(* y = alpha*a'*x + beta*y, or; *)
(* *)
(* where Alpha and Beta are scalars, X and Y are vectors *)
(* and A is an M by N matrix. *)
(*----------------------------------------------------------------*)
PROCEDURE zgemv_(VAR Trans : CHAR1;
VAR M,N : INTEGER4;
VAR Alpha : DOUBLECOMPLEX;
VAR A : (* ARRAY OF ARRAY OF *) DOUBLECOMPLEX;
VAR lda : INTEGER4;
VAR X : (* ARRAY OF *) DOUBLECOMPLEX;
VAR IncX : INTEGER4;
VAR Beta : DOUBLECOMPLEX;
VAR Y : (* ARRAY OF *) DOUBLECOMPLEX;
VAR IncY : INTEGER4);
(*----------------------------------------------------------------*)
(* Aufruf der Fortran Version von BLAS2 subroutine zgemv *)
(* *)
(* Performs one of the matrix-vector operations *)
(* *)
(* y = alpha*a *x + beta*y, or *)
(* y = alpha*a'*x + beta*y, or; *)
(* y = alpha*conjg(a')*x + beta*y,; *)
(* *)
(* where Alpha and Beta are scalars, X and Y are vectors *)
(* and A is an M by N matrix. *)
(*----------------------------------------------------------------*)
PROCEDURE sgemm_(VAR TA : CHAR1;
VAR TB : CHAR1;
VAR M,N,K : INTEGER4;
VAR Alpha : REAL4;
VAR A : (* ARRAY OF ARRAY OF *) REAL4;
VAR lda : INTEGER4;
VAR B : (* ARRAY OF ARRAY OF *) REAL4;
VAR ldb : INTEGER4;
VAR Beta : REAL4;
VAR C : (* ARRAY OF ARRAY OF *) REAL4;
VAR ldc : INTEGER4);
(*----------------------------------------------------------------*)
(* Matrix Matrix Multiplikatione / matrix matrix multiplication, *)
(* single precision version, see LibDBlas.def dgemm for details *)
(*----------------------------------------------------------------*)
PROCEDURE dgemm_(VAR TA : CHAR1;
VAR TB : CHAR1;
VAR M,N,K : INTEGER4;
VAR Alpha : DOUBLEPRECISION;
VAR A : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
VAR lda : INTEGER4;
VAR B : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
VAR ldb : INTEGER4;
VAR Beta : DOUBLEPRECISION;
VAR C : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
VAR ldc : INTEGER4);
(*----------------------------------------------------------------*)
(* Matrix Matrix Multiplikatione / matrix matrix multiplication, *)
(* double precision version, see LibDBlas.def dgemm for details *)
(* See LibDBlas.def for details *)
(* *)
(* Hint: There are Fortran version with unrolled loops which *)
(* perform far better than the subroutines provided in the *)
(* standard source. If you use an optimized BLAS level3 library *)
(* (e.g. ATLAS) there is no need to take that into consideration *)
(*----------------------------------------------------------------*)
PROCEDURE dgemmomp(VAR TA : CHAR1;
VAR TB : CHAR1;
VAR M,N,K : INTEGER4;
VAR Alpha : DOUBLEPRECISION;
VAR A : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
VAR lda : INTEGER4;
VAR B : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
VAR ldb : INTEGER4;
VAR Beta : DOUBLEPRECISION;
VAR C : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
VAR ldc : INTEGER4);
(*----------------------------------------------------------------*)
(* Matrix Matrix Multiplikation / matrix matrix multiplication, *)
(* double precision version, see LibDBlas.def dgemm for details *)
(* *)
(* dgemmomp is an OpenMP based version of level 3 BLAS dgemmm. *)
(* dgemmomp is far from beeing optimal - please test if it is *)
(* really improving the performance within your sprecific *)
(* environment. On 32 bit systems the communication overhead *)
(* outperforms the potential gain in speed by using more than *)
(* one thread in many cases. If you set the number of threads to *)
(* one a version of dgemm using unrolled loops and blocking is *)
(* used if the dimension of the matriced involved exeed a *)
(* specific limit (see dgemmCbind.f90) so it still might be *)
(* worth calling dgemmomp. dgemmomp has an explicit Fortran 2003 *)
(* "C" interface so no need for "_" in the function name. *)
(* *)
(* HINT: The module OpenMPF77 permits controlling the number of *)
(* OpenMP threads used. *)
(*----------------------------------------------------------------*)
PROCEDURE dgemmomp2(VAR TA : CHAR1;
VAR TB : CHAR1;
VAR M,N,K : INTEGER4;
VAR Alpha : DOUBLEPRECISION;
VAR A : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
VAR lda : INTEGER4;
VAR B : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
VAR ldb : INTEGER4;
VAR Beta : DOUBLEPRECISION;
VAR C : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
VAR ldc : INTEGER4);
(*----------------------------------------------------------------*)
(* Another OMP parallel version of dgemm (experimental) *)
(*----------------------------------------------------------------*)
PROCEDURE zgemm_(VAR TA : CHAR1;
VAR TB : CHAR1;
VAR M,N,K : INTEGER4;
VAR Alpha : DOUBLECOMPLEX;
VAR A : (* ARRAY OF ARRAY OF *) DOUBLECOMPLEX;
VAR lda : INTEGER4;
VAR B : (* ARRAY OF ARRAY OF *) DOUBLECOMPLEX;
VAR ldb : INTEGER4;
VAR Beta : DOUBLECOMPLEX;
VAR C : (* ARRAY OF ARRAY OF *) DOUBLECOMPLEX;
VAR ldc : INTEGER4);
(*----------------------------------------------------------------*)
(* Matrix Matrix Multiplikatione / matrix matrix multiplication, *)
(* double complex version, see LibDBlas.def dgemm for details *)
(*----------------------------------------------------------------*)
END LibDBlasLxF77.