Switch to side-by-side view

--- a
+++ b/src/kde/recoll_applet/admin/nmcheck
@@ -0,0 +1,371 @@
+#!/usr/bin/perl -w
+
+# Check namespace cleanness of a library.
+# Allowed symbols are passed as arguments.
+# They may have trailing * = wildcard.
+# Wildcards may be also specified as *::* (e.g. K*::* for all KDE classes)
+# Symbols are listed as full function unmangled names without arguments,
+# e.g. 'foo bar* nspace::*' allows foo(), foo(int), bar(), barbar()
+# and all symbols in namespace/class nspace.
+# If an argument has comma in it, it's a filename of a file containing
+# allowed symbols, one per line.
+
+
+$thisProg   = "$0";     # This programs name
+
+$library = "";
+$allowed_symbols = "";
+$debug = 0;
+$allowed_weak = "";
+$weak_specified = 0;
+
+while( defined( $ARGV[ 0 ] ))
+{
+    $_ = shift;
+    if( /^--verbose$|^-v$/ )
+    {
+	$debug = 1;
+    }
+    elsif( /^--help$|^-h$/ )
+    {
+        print STDOUT "Usage $thisProg [OPTION] ... library [allowed symbols] ...\n",
+                "\n",
+		"Check if the given library has only allowed public symbols.\n",
+                "\n",
+		"  --allowweak=[symbol] allow only these weak symbols\n",
+                "  -v, --verbose        verbosely list files processed\n",
+                "  -h, --help           print this help, then exit\n";
+        exit 0;
+    }
+    elsif( /^--allowweak=(.*)$/ )
+    {
+	$allowed_weak .= " " . $1;
+	$weak_specified = 1;
+    }
+    elsif( /^--allowweak$/ ) # simply list all weak
+    {
+	$allowed_weak .= " ";
+	$weak_specified = 1;
+    }
+    elsif( /^--*/ )
+    {
+	die "Invalid argument!\n";
+    }
+    else
+    {
+	if( ! $library )
+	{
+	    $library = $_;
+	}
+	else
+	{
+	    $allowed_symbols .= " " . $_;
+	}
+    }
+}
+
+if( ! $weak_specified )
+{
+    $allowed_weak = "*"; 
+    # allow all weak symbols by default
+    # instances of templates and similar stuff - unfortunately includes also things from other libraries,
+    # so it cannot be on by default
+}
+
+print STDERR "library:" . $library . "\n" if $debug;
+print STDERR "allowed_symbols:" . $allowed_symbols . "\n" if $debug;
+print STDERR "allowed_weak:" . $allowed_weak . "\n" if $debug;
+
+$default_symbols = "_fini _init";  # system symbols
+# on my system, every .so has :
+# A _DYNAMIC
+# A _GLOBAL_OFFSET_TABLE_
+# A __bss_start
+# A _edata
+# A _end
+# T _fini
+# T _init
+# no need to list A symbols in $default_symbols
+
+print STDERR "default_symbols: " . $default_symbols . "\n" if $debug;
+
+print STDOUT "Namespace cleanness check for " . $library . " :\n";
+
+$lib_file = "";
+if( $library =~ /\.la$/ )
+{
+    # get the real library file from .la
+    open( FILEIN, $library ) || die "Couldn't open $! !\n";
+    while( $line = <FILEIN> )
+    {
+	if( $line =~ /library_names=\'([^ ]*).*/o )
+	{
+	    $lib_file = $1;
+	}
+    }
+    close( FILEIN );
+    if( ! $lib_file )
+    {
+	print STDERR "Library file not found in .la file!\n";
+	exit 1;
+    }
+    my $libpath = $library;
+    $libpath =~ s%[^/]*$%%;
+    if(  -e $libpath . ".libs/" . $lib_file )
+    {
+	$lib_file = $libpath . ".libs/" . $lib_file;
+    }
+    else
+    {
+	$lib_file = $libpath . $lib_file;
+    }
+}
+else
+{
+    $lib_file = $library;
+}
+
+print STDERR "libfile: ". $lib_file . "\n" if $debug;
+
+$allowed_symbols .= " " . $default_symbols;
+
+sub process_symbols($\@\%\@);
+
+@wildcards = ();
+%exacts = ();
+@regwildcards = ();
+process_symbols( $allowed_symbols, @wildcards, %exacts, @regwildcards );
+@weak_wildcards = ();
+%weak_exacts = ();
+@weak_regwildcards = ();
+process_symbols( $allowed_weak, @weak_wildcards, %weak_exacts, @weak_regwildcards );
+
+# grep is for stripping not exported symbols, which don't have address (=first column)
+$nm_command = "nm -BDCg " . $lib_file . " | grep -v '^ ' |";
+
+# TODO how portable is this nmcheck stuff?
+
+print STDERR "nm command:" . $nm_command . "\n" if $debug;
+
+open( FILEIN, $nm_command ) || die "nm command failed\n";
+
+my $exit_code = 0;
+
+while( $line = <FILEIN> )
+{
+    my $type;
+    my $symbol;
+    if( $line =~ /^[^ ]* (.) (.*)$/o )
+    {
+	$type = $1;
+	$symbol = $2;
+    }
+    else
+    {
+	die "Invalid line: " . $line . "\n";
+    }
+    
+    print STDERR "Type: " . $type . " , symbol: " . $symbol . "\n" if $debug;
+    if( $type eq "A" )
+    { # these should be system symbols, so ignore them
+	next;
+    }
+
+    my $orig_symbol = $symbol;
+
+    if( $symbol =~ /\(anonymous namespace\)/o )
+    { # TODO tell to prefer named namespaces? (shorter symbols)
+	next;
+    }
+
+    # strip prefixes
+    # the :: appending is to make "CLASS::*" work also for "vtable for CLASS"
+    $symbol =~ s/^typeinfo for (.*)$/$1::/o;
+    $symbol =~ s/^typeinfo fn for (.*)$/$1::/o;
+    $symbol =~ s/^typeinfo name for (.*)$/$1::/o;
+    $symbol =~ s/^vtable for (.*)$/$1::/o;
+    $symbol =~ s/^guard variable for (.*)$/$1::/o;
+    $symbol =~ s/^reference temporary for (.*)$/$1::/o;
+    $symbol =~ s/^VTT for (.*)$/$1::/o;
+    $symbol =~ s/^virtual thunk \[[^\]]*\] to (.*)$/$1::/o;
+    $symbol =~ s/^non-virtual thunk \[[^\]]*\] to (.*)$/$1::/o;
+    $symbol =~ s/^covariant return thunk \[[^\]]*\] to (.*)$/$1::/o;
+    $symbol =~ s/^construction vtable thunk for (.*)$/$1::/o;
+    $symbol =~ s/^construction vtable for .*-in-(.*) [0-9]*$/$1::/o;
+
+    # templates seem to have also return types mangled in their name, and nm prints it too
+    # they have also template arguments in the symbol
+    # get rid of both of those
+    while( $symbol =~ /<.*>/o )
+    {
+        $symbol =~ s/<[^<>]*>//o; # strip innermost <>
+    }
+    if( $symbol !~ /operator\(\)/o )
+    {
+	$symbol =~ s/ ?\(.*\).*$//o;  # strip () and all after it
+    }
+    else
+    {
+	$symbol =~ s/(^|:| )operator\(\) ?\(.*\).*$//o;  # strip () and all after it
+    }
+    $symbol =~ s/\[.*\] *$//o;   # strip [in-charge] etc.
+    if( $symbol =~ /(^|:| )operator /o )
+    {
+	$symbol =~ s/.* ([^\s]*)operator /$1/o; # strip everything before 'X::operator blah'
+    }
+    else
+    {
+	$symbol =~ s/.* ([^\s]+) *$/$1/o;  # get last word (strip return type)
+    }
+
+    # print STDERR "Processed symbol: " . $symbol . "\n" if $debug;
+    
+    my $found = 0;
+    if( $exacts{ $symbol } )
+    {
+	$found = 1;
+    }
+    if( ! $found )
+    {
+	for my $wild ( @wildcards )
+	{
+	    if( index( $symbol, $wild ) == 0 )
+	    {
+		$found = 1;
+		last;
+	    }
+	}
+    }
+    if( ! $found )
+    {
+	for my $wild ( @regwildcards )
+	{
+	    if( $symbol =~ /^$wild$/ )
+	    {
+		$found = 1;
+		last;
+	    }
+	}
+    }
+    if( ( ! $found ) && ( $type eq "W" || $type eq "V" ))
+    {
+	if( $weak_exacts{ $symbol } )
+	{
+	    $found = 1;
+	}
+	if( ! $found )
+	{
+	    for my $wild ( @weak_wildcards )
+	    {
+	        if( index( $symbol, $wild ) == 0 )
+	        {
+	    	    $found = 1;
+		    last;
+		}
+	    }
+	}
+	if( ! $found )
+	{
+	    for my $wild ( @weak_regwildcards )
+	    {
+	        if( $symbol =~ /^$wild$/ )
+	        {
+		    $found = 1;
+		    last;
+	        }
+	    }
+	}
+    }
+
+    if( ! $found )
+    {
+	print STDERR "Public symbol " . $orig_symbol . " is not allowed!\n";
+	$exit_code = 1;
+    }
+}
+
+close( FILEIN );
+
+print STDOUT $exit_code == 0 ? "OK\n" : "FAILED\n";
+
+exit $exit_code;
+
+sub process_symbols($\@\%\@)
+{
+    my $allowed_symbols = $_[ 0 ];
+    my $wildcards_ref = $_[ 1 ];
+    my $exacts_ref = $_[ 2 ];
+    my $regwildcards_ref = $_[ 3 ];
+    
+    $allowed_symbols =~ s/^ *//o;  # strip whitespace
+    $allowed_symbols =~ s/ *$//o;
+
+    if( $allowed_symbols eq "NONE" )
+    {
+	$allowed_symbols = "";
+    }
+
+    my @symbols1 = split( ' ', $allowed_symbols );
+    my $i = 0;
+    my @symbols2 = ();
+    while( defined( $symbols1[ $i ] ))
+    {
+	my $symbol = $symbols1[ $i ];
+	if( $symbol =~ /\./ )  # dot in name -> file
+	{
+	    open( SYMIN, $symbol ) || die ( "Cannot open file " . $symbol . "!" );
+	    while( $line = <SYMIN> )
+	    {
+		$line =~ s/^\s*//o;  # strip whitespace
+		$line =~ s/\s*$//o;
+		if( $line !~ /^$/o  # empty line
+		    &&  $line !~ /^\s*#/ ) # comment line starting with #
+		{
+		    $symbols2[ $#symbols2 + 1 ] = $line;
+		}
+	    }
+	    close( SYMIN );
+	}
+	else
+	{
+	    $symbols2[ $#symbols2 + 1 ] = $symbol;
+	}
+	$i++;
+    }
+    $i = 0;
+    while( defined( $symbols2[ $i ] ))
+    {
+	my $symbol = $symbols2[ $i ];
+	if( $symbol =~ /__/
+	    || $symbol =~ /^_[A-Z]/ )
+	{ # ISO C++ 2.10.2
+	    die "Symbols containing a double underscore or beginning with an underscore and an upper-case letter are reserved!\n";
+	}
+	elsif( $symbol eq "main"
+	    || $symbol eq "main*" )
+	{
+	    die "Symbol main is not allowed!\n";
+	}
+	if( $symbol =~ /^([^\*]*)\*$/o   # trailing * without any * before it
+	    && $symbol !~ /operator\*$/o )
+        {
+	    print STDERR "wildcard:" . $symbol . "\n" if $debug;
+    	    $wildcards_ref->[ $#{$wildcards_ref} + 1 ] = $1;
+	}
+	elsif( $symbol =~ /\*$/o
+	    && ( $symbol =~ /\*::/o || $symbol =~ /::\*/o )
+	    && $symbol !~ /^\*/o
+	    && $symbol !~ /operator\*$/o )
+	{
+	    print STDERR "regwildcard:" . $symbol . "\n" if $debug;
+	    $symbol =~ s/\*/\.\*/go;  # change * to .* (regexp)
+	    $regwildcards_ref->[ $#{$regwildcards_ref} + 1 ] = $symbol;
+	}
+	else
+	{
+	    print STDERR "exact:" . $symbol . "\n" if $debug;
+	    $exacts_ref->{ $symbol } = 1;
+	}
+	$i++;
+    }
+}