Switch to side-by-side view

--- a/LibDBlasLxF77.def.m2pp
+++ b/LibDBlasLxF77.def.m2pp
@@ -22,6 +22,9 @@
   (*   MOCKA  : Definitionsmodul im Stil des Mocka  Compilers               *)
   (*                                                                        *)
   (* ansonsten gibt es keine Aenderungen am Quellcode                       *)
+  (*                                                                        *)
+  (* There are three version contained, one for the GNU, XDS and Mocka      *)
+  (* compiler which can be produced by the m2pp command line given above    *)
   (*------------------------------------------------------------------------*)
   (* Schnittstelle zu dblas level 2 & 3 FORTRAN 77 Subroutinen.             *)
   (* Low level interface to dblas level 2 & 3 FORTRAN 77 subroutines.       *)
@@ -33,6 +36,7 @@
   (* Last change:                                                           *)
   (*                                                                        *)
   (* 29.10.17, MRi: Erstellen der ersten Version nur mit dgemm              *)
+  (* 23.06.18, MRi: Hinzufuegen von zgemm                                   *)
   (*------------------------------------------------------------------------*)
   (* Offene Punkte                                                          *)
   (*                                                                        *)
@@ -43,7 +47,7 @@
 
   (* $Id: LibDBlasLxF77.def.m2pp,v 1.2 2018/01/16 09:19:51 mriedl Exp mriedl $ *)
 
-FROM LibDBlasL1F77 IMPORT CHAR1,INTEGER4,REAL4,DOUBLEPRECISION;
+FROM LibDBlasL1F77 IMPORT CHAR1,INTEGER4,REAL4,DOUBLEPRECISION,DOUBLECOMPLEX;
 
 <* IF (__XDS__) THEN *>
 CONST Version = "LibDBlasLxF77 for XDS Modula-2";
@@ -67,6 +71,11 @@
                  VAR C     : (* ARRAY OF ARRAY OF *) REAL4;
                  VAR ldc   : INTEGER4);
 
+          (*---------------------------------------------------------------*)
+          (* Matrix Matrix Multiplikatione / matrix matrix multiplication, *)
+          (* single precision version, see LibDBlas.def dgemm for details  *)
+          (*---------------------------------------------------------------*)
+
 PROCEDURE dgemm_(VAR TA    : CHAR1;
                  VAR TB    : CHAR1;
                  VAR M,N,K : INTEGER4;
@@ -79,4 +88,79 @@
                  VAR C     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
                  VAR ldc   : INTEGER4);
 
+          (*---------------------------------------------------------------*)
+          (* Matrix Matrix Multiplikatione / matrix matrix multiplication, *)
+          (* double precision version, see LibDBlas.def dgemm for details  *)
+          (* See   LibDBlas.def for  details                               *)
+          (*                                                               *)
+          (* Hint: There are Fortran version with unrolled loops which     *)
+          (* perform far better than the subroutines provided in the       *)
+          (* standard source. If you use an optimized BLAS level3 library  *)
+          (* (e.g. ATLAS) there is no need to take that into consideration *)
+          (*---------------------------------------------------------------*)
+
+PROCEDURE dgemmomp(VAR TA    : CHAR1;
+                   VAR TB    : CHAR1;
+                   VAR M,N,K : INTEGER4;
+                   VAR Alpha : DOUBLEPRECISION;
+                   VAR A     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
+                   VAR lda   : INTEGER4;
+                   VAR B     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
+                   VAR ldb   : INTEGER4;
+                   VAR Beta  : DOUBLEPRECISION;
+                   VAR C     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
+                   VAR ldc   : INTEGER4);
+
+          (*---------------------------------------------------------------*)
+          (* Matrix Matrix Multiplikation / matrix matrix multiplication,  *)
+          (* double precision version, see LibDBlas.def dgemm for details  *)
+          (*                                                               *)
+          (* dgemmomp is an OpenMP based version of level 3 BLAS dgemmm.   *)
+          (* dgemmomp is far from beeing optimal - please test if it is    *)
+          (* really improving the performance within your sprecific        *)
+          (* environment. On 32 bit systems the communication overhead     *)
+          (* outperforms the potential gain in speed by using more than    *)
+          (* one thread in many cases. If you set the number of threads to *)
+          (* one a version of dgemm using unrolled loops and blocking is   *)
+          (* used if the dimension of the matriced involved exeed a        *)
+          (* specific limit (see dgemmCbind.f90) so it still might be      *)
+          (* worth calling dgemmomp. dgemmomp has an explicit Fortran 2003 *)
+          (* "C" interface so no need for "_" in the function name.        *)
+          (*                                                               *)
+          (* HINT: The module OpenMPF77 permits controlling the number of  *)
+          (*       OpenMP threads used.                                    *)
+          (*---------------------------------------------------------------*)
+
+PROCEDURE dgemmomp2(VAR TA    : CHAR1;
+                    VAR TB    : CHAR1;
+                    VAR M,N,K : INTEGER4;
+                    VAR Alpha : DOUBLEPRECISION;
+                    VAR A     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
+                    VAR lda   : INTEGER4;
+                    VAR B     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
+                    VAR ldb   : INTEGER4;
+                    VAR Beta  : DOUBLEPRECISION;
+                    VAR C     : (* ARRAY OF ARRAY OF *) DOUBLEPRECISION;
+                    VAR ldc   : INTEGER4);
+          (*---------------------------------------------------------------*)
+          (* Another OMP parallel version of dgemm (experimantal)          *)
+          (*---------------------------------------------------------------*)
+
+PROCEDURE zgemm_(VAR TA    : CHAR1;
+                 VAR TB    : CHAR1;
+                 VAR M,N,K : INTEGER4;
+                 VAR Alpha : DOUBLECOMPLEX;
+                 VAR A     : (* ARRAY OF ARRAY OF *) DOUBLECOMPLEX;
+                 VAR lda   : INTEGER4;
+                 VAR B     : (* ARRAY OF ARRAY OF *) DOUBLECOMPLEX;
+                 VAR ldb   : INTEGER4;
+                 VAR Beta  : DOUBLECOMPLEX;
+                 VAR C     : (* ARRAY OF ARRAY OF *) DOUBLECOMPLEX;
+                 VAR ldc   : INTEGER4);
+
+          (*---------------------------------------------------------------*)
+          (* Matrix Matrix Multiplikatione / matrix matrix multiplication, *)
+          (* double complex version, see LibDBlas.def dgemm for details    *)
+          (*---------------------------------------------------------------*)
+
 END LibDBlasLxF77.