#! /usr/bin/python

# mklibs.py: An automated way to create a minimal /lib/ directory.
#
# Copyright 2001 by Falk Hueffner <falk@debian.org>
#                 & Goswin Brederlow <goswin.brederlow@student.uni-tuebingen.de>
#
# mklibs.sh by Marcus Brinkmann <Marcus.Brinkmann@ruhr-uni-bochum.de>
# used as template
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

# HOW IT WORKS
#
# - Gather all unresolved symbols and libraries needed by the programs
#   and reduced libraries
# - Gather all symbols provided by the already reduced libraries
#   (none on the first pass)
# - If all symbols are provided we are done
# - go through all libraries and remember what symbols they provide
# - go through all unresolved/needed symbols and mark them as used
# - for each library:
#   - find pic file (if not present copy and strip the so)
#   - compile in only used symbols
#   - strip
# - back to the top

# TODO
# * complete argument parsing as given as comment in main

import commands
import string
import re
import sys
import os
import glob
import getopt
from stat import *

DEBUG_NORMAL  = 1
DEBUG_VERBOSE = 2
DEBUG_SPAM    = 3

debuglevel = DEBUG_NORMAL

def debug(level, *msg):
    if debuglevel >= level:
        print string.join(msg)

# A simple set class.
class Set:
    def __init__(self):
        self.__dict = {}

    def add(self, obj):
        self.__dict[obj] = 1

    def contains(self, obj):
        return self.__dict.has_key(obj)

    def merge(self, s):
        for e in s.elems():
            self.add(e)

    def elems(self):
        return self.__dict.keys()

    def size(self):
        return len(self.__dict)

    def __str__(self):
        return `self.__dict.keys()`

# return a list of lines of output of the command
def command(command, *args):
    debug(DEBUG_SPAM, "calling", command, string.join(args))
    (status, output) = commands.getstatusoutput(command + ' ' + string.join(args))
    if os.WEXITSTATUS(status) != 0:
        print "Command failed with status", os.WEXITSTATUS(status),  ":", \
               command, string.join(args)
	print "With output:", output
        sys.exit(1)
    return string.split(output, '\n')

# Filter a list according to a regexp containing a () group. Return
# a Set.
def regexpfilter(list, regexp, groupnr = 1):
    pattern = re.compile(regexp)
    result = Set()
    for x in list:
        match = pattern.match(x)
        if match:
            result.add(match.group(groupnr))

    return result

# Return a Set of libraries the passed objects depend on.
def library_depends(obj):
    if not os.access(obj, os.F_OK):
        raise "Cannot find lib: " + obj
    output = command("objdump", "--private-headers", obj)
    return regexpfilter(output, ".*NEEDED\s*(\S+)$")

# Return a list of libraries the passed objects depend on. The
# libraries are in "-lfoo" format suitable for passing to gcc.
# HACK: exclude libm, since we don't need it for anything
def library_depends_gcc_libnames(obj):
    if not os.access(obj, os.F_OK):
        raise "Cannot find lib: " + obj
    output = command("objdump", "--private-headers", obj)
    output = filter(lambda x: string.find(x, "libm.so") == -1, output)
    output = regexpfilter(output, ".*NEEDED\s*lib(\S+)\.so.*$")
    if not output.elems():
        return ""
    else:
        return "-l" + string.join(output.elems(), " -l")

# Return a Set of undefined symbols in an object
def undefined_symbols(obj):
    if not os.access(obj, os.F_OK):
        raise "Cannot find lib" + obj
    output = command("readelf", "-s", obj)
    output = filter(lambda x: string.find(x, "NOTYPE") == -1, output)
    output = filter(lambda x: string.find(x, "SECTION") == -1, output)
    output = filter(lambda x: string.find(x, "ABS") == -1, output)
    output = filter(lambda x: string.find(x, "LOCAL") == -1, output)
    return regexpfilter(output, ".*\sUND\s([^@\s]+)(@.+)?$")

# Return a Set of symbols provided by a library
def provided_symbols(obj):
    if not os.access(obj, os.F_OK):
        raise "Cannot find lib" + obj
    output = command("readelf", "-s", obj)
    output = filter(lambda x: string.find(x, "UND") == -1, output)
    output = filter(lambda x: string.find(x, "NOTYPE") == -1, output)
    output = filter(lambda x: string.find(x, "SECTION") == -1, output)
    output = filter(lambda x: string.find(x, "ABS") == -1, output)
    output = filter(lambda x: string.find(x, "LOCAL") == -1, output)
    return regexpfilter(output, ".*\d+:\s[\da-fA-f]+\s.+\s\d+\s([^@]+)(@\S+)?$")
    
# Return real target of a symlink
def resolve_link(file):
    debug(DEBUG_SPAM, "resolving", file)
    while S_ISLNK(os.lstat(file)[ST_MODE]):
        new_file = os.readlink(file)
        if new_file[0] != "/":
            file = os.path.join(os.path.dirname(file), new_file)
        else:
            file = new_file
    debug(DEBUG_SPAM, "resolved to", file)
    return file

# Find complete path of a library, by searching in lib_path
def find_lib(lib):
    for path in lib_path:
        if os.access(path + "/" + lib, os.F_OK):
            return resolve_link(path + "/" + lib)

    return ""

# Find a PIC archive for the library
def find_pic(lib):
    base_name = so_pattern.match(lib).group(1)
    for path in lib_path:
        for file in glob.glob(path + "/" + base_name + "*_pic.a"):
            if os.access(file, os.F_OK):
                return resolve_link(file)
    return ""

# Find a PIC .map file for the library
def find_pic_map(lib):
    base_name = so_pattern.match(lib).group(1)
    for path in lib_path:
        for file in glob.glob(path + "/" + base_name + "*_pic.map"):
            if os.access(file, os.F_OK):
                return resolve_link(file)
    return ""

#################### main ####################
## Usage: ./mklibs.py [OPTION]... -d DEST FILE ...
## Make a set of minimal libraries for FILE ... in directory DEST.
## 
## Options:
##   -L DIRECTORY               Add DIRECTORY to library search path.
##   -D, --no-default-lib       Do not use default lib directories of /lib:/usr/lib
##   -n, --dry-run              Don't actually run any commands; just print them.
##   -v, --verbose              Print additional progress information.
##   -V, --version              Print the version number and exit.
##   -h, --help                 Print this help and exit.
## 
##   -d, --dest-dir DIRECTORY   Create libraries in DIRECTORY.
## 
## Required arguments for long options are also mandatory for the short options.

# Argument parsing
opts = "L:D:nvVhd:"
longopts = ["no-default-lib", "dry-run", "verbose", "version", "help",
            "dest-dir="]
optlist, proglist = getopt.getopt(sys.argv[1:], opts, longopts)

# some global variables
lib_path = "/lib/", "/usr/lib/", "/usr/X11R6/lib/"
dest_path = "DEST"
so_pattern = re.compile("(.*)\.so(\.\d+)+")
script_pattern = re.compile("^#!\s*/")

for opt, arg in optlist:
    if opt == "-v":
        debuglevel = DEBUG_VERBOSE
    elif opt == "-L":
        lib_path = string.split(arg, ":")
    elif opt in ("-d", "--dest-dir"):
        dest_path = arg
    else:
        print "WARNING: unknown option: " + opt + "\targ: " + arg

objects = {}  # map from inode to filename
for prog in proglist:
    inode = os.stat(prog)[ST_INO]
    if objects.has_key(inode):
        debug(DEBUG_SPAM, prog, "is a hardlink to", objects[inode])
    else:
        # is it a script pehaps?
        if script_pattern.match(open(prog).read(256)):
            debug(DEBUG_SPAM, prog, "is a script")
        else:
            objects[inode] = prog

passnr = 1
while 1:
    debug(DEBUG_NORMAL, "I: library reduction pass", `passnr`)
    if debuglevel >= DEBUG_VERBOSE:
        print "Objects:",
        for obj in objects.values():
            print obj[string.rfind(obj, '/') + 1:],
        print

    passnr = passnr + 1
    # Gather all already reduced libraries and treat them as objects as well
    small_libs = []
    for lib in regexpfilter(os.listdir(dest_path), "(.*-so-stripped)$").elems():
        obj = dest_path + "/" + lib
        small_libs.append(obj)
        inode = os.stat(obj)[ST_INO]
        if objects.has_key(inode):
            debug(DEBUG_SPAM, obj, "is hardlink to", objects[inode])
        else:
            objects[inode] = obj

    # DEBUG
    for obj in objects.values():
        debug(DEBUG_VERBOSE, "Object:", obj)

    # calculate what symbols and libraries are needed
    needed_symbols = Set()
    libraries = Set()
    for obj in objects.values():
        needed_symbols.merge(undefined_symbols(obj))
        libraries.merge(library_depends(obj))

    # FIXME: on i386 this is undefined but not marked UND
    # I don't know how to detect those symbols but this seems
    # to be the only one and including in on alpha as well
    # doesn't hurt. I guess all archs can live with this.
    needed_symbols.add("sys_siglist")

    # calculate what symbols are present in small_libs
    present_symbols = Set()
    for lib in small_libs:
        present_symbols.merge(provided_symbols(lib))

    # are we finished?
    num_unresolved = 0
    present_symbols_elems = present_symbols.elems()
    for symbol in needed_symbols.elems():
        if not symbol in present_symbols_elems:
            debug(DEBUG_SPAM, "Still need:", symbol)
            num_unresolved = num_unresolved + 1

    debug (DEBUG_NORMAL, `needed_symbols.size()`, "symbols,",
           `num_unresolved`, "unresolved")

    if num_unresolved == 0:
        break
    
    library_symbols = {}
    library_symbols_used = {}
    symbol_provider = {}

    # Calculate all symbols each library provides
    for library in libraries.elems():
        path = find_lib(library)
        if not path:
            sys.exit("Library not found: " + library + " in path: "
                     + string.join(lib_path, " : "))
        symbols = provided_symbols(path)
        library_symbols[library] = Set()
        library_symbols_used[library] = Set()    
        for symbol in symbols.elems():
            if symbol_provider.has_key(symbol):
                # in doubt, prefer symbols from libc
                if re.match("^libc[\.-]", library):
                    library_symbols[library].add(symbol)
                    symbol_provider[symbol] = library
                else:
                    debug(DEBUG_SPAM, "duplicate symbol", symbol, "in", 
                          symbol_provider[symbol], "and", library)
            else:
                library_symbols[library].add(symbol)
                symbol_provider[symbol] = library

    # which symbols are actually used from each lib
    for symbol in needed_symbols.elems():
        try:
            lib = symbol_provider[symbol]
            library_symbols_used[lib].add(symbol)
        except KeyError:
            pass

    # reduce libraries
    for library in libraries.elems():
        debug(DEBUG_VERBOSE, "reducing", library)
        debug(DEBUG_SPAM, string.join(library_symbols_used[library].elems()))
        so_file = find_lib(library)
        so_file_name = os.path.basename(so_file)
        if not so_file:
            sys.exit("File not found:" + library)
        pic_file = find_pic(library)
        if not pic_file:
            # No pic file, so we have to use the .so file, no reduction
            debug(DEBUG_VERBOSE, "No pic file found for", so_file, "; copying")
            command("objcopy", "--strip-unneeded -R .note -R .comment",
                    so_file, dest_path + "/" + so_file_name + "-so-stripped")
        else:
            # we have a pic file, recompile
            debug(DEBUG_SPAM, "extracting from:", pic_file, "so_file:", so_file)
            soname = regexpfilter(command("readelf", "--all", so_file),
                                  ".*SONAME.*\[(.*)\].*").elems()[0]
            debug(DEBUG_SPAM, "soname:", soname)
            base_name = so_pattern.match(library).group(1)
            # libc needs its soinit.o and sofini.o as well as the pic
            if base_name == "libc":
                extra_flags = find_lib("ld-linux.so.2")
                extra_pre_obj = "/usr/lib/libc_pic/soinit.o"
                extra_post_obj = "/usr/lib/libc_pic/sofini.o"
            else:
                extra_flags = ""
                extra_pre_obj = ""
                extra_post_obj = ""
            map_file = find_pic_map(library)
            if map_file:
                extra_flags = extra_flags + " -Wl,--version-script=" + map_file
            if library_symbols_used[library].elems():
                joined_symbols = "-u" + string.join(library_symbols_used[library].elems(), " -u")
            else:
                joined_symbols = ""
            # compile in only used symbols
            command("gcc",
                "-nostdlib -nostartfiles -shared -Wl,-soname=" + soname,\
                joined_symbols, \
                "-o", dest_path + "/" + so_file_name + "-so", \
                extra_pre_obj, \
                pic_file, \
                extra_post_obj, \
                extra_flags, \
                "-lgcc -L", dest_path, \
                "-L" + string.join(lib_path, " -L"), \
                library_depends_gcc_libnames(so_file))
            # strip result
            command("objcopy", "--strip-unneeded -R .note -R .comment",
                      dest_path + "/" + so_file_name + "-so",
                      dest_path + "/" + so_file_name + "-so-stripped")
            ## DEBUG
            debug(DEBUG_VERBOSE, so_file, "\t", `os.stat(so_file)[ST_SIZE]`)
            debug(DEBUG_VERBOSE, dest_path + "/" + so_file_name + "-so", "\t",
                  `os.stat(dest_path + "/" + so_file_name + "-so")[ST_SIZE]`)
            debug(DEBUG_VERBOSE, dest_path + "/" + so_file_name + "-so-stripped",
                  "\t", `os.stat(dest_path + "/" + so_file_name + "-so-stripped")[ST_SIZE]`)

# Finalising libs and cleaning up
for lib in regexpfilter(os.listdir(dest_path), "(.*)-so-stripped$").elems():
    os.rename(dest_path + "/" + lib + "-so-stripped", dest_path + "/" + lib)
for lib in regexpfilter(os.listdir(dest_path), "(.*-so)$").elems():
    os.remove(dest_path + "/" + lib)

# Make the dynamic linker executable
ld_file = find_lib("ld-linux.so.2")
ld_file_name = os.path.basename(ld_file)
os.chmod(dest_path + "/" + ld_file_name, 0755)
