Source code for galaxy.eggs.__init__

"""
Manage Galaxy eggs
"""

import ConfigParser
import glob
import HTMLParser
import os
import pkg_resources
import shutil
import sys
import urllib
import urllib2
import zipfile
import zipimport

import logging
log = logging.getLogger( __name__ )
log.addHandler( logging.NullHandler() )


galaxy_dir = os.path.abspath( os.path.join( os.path.dirname( __file__ ), '..', '..', '..' ) )
eggs_dir = os.environ.get( 'GALAXY_EGGS_PATH', os.path.join( galaxy_dir, 'eggs' ) )
py = 'py%s' % sys.version[:3]


class EggNotFetchable( Exception ):
    def __init__( self, eggs ):
        if type( eggs ) in ( list, tuple ):
            self.eggs = eggs
        else:
            self.eggs = [ eggs ]

    def __str__( self ):
        return ' '.join( self.eggs )


# need the options to remain case sensitive
class CaseSensitiveConfigParser( ConfigParser.SafeConfigParser ):
    def optionxform( self, optionstr ):
        return optionstr


# so we can actually detect failures
class URLRetriever( urllib.FancyURLopener ):
    def http_error_default( *args ):
        urllib.URLopener.http_error_default( *args )


class Egg( object ):
    """
    Contains information about locating and downloading eggs.
    """
    def __init__( self, name=None, version=None, tag=None, url=None, platform=None, crate=None ):
        self.name = name
        self.version = version
        self.tag = tag
        self.url = url
        self.platform = platform
        self.crate = crate
        self.distribution = None
        self.dir = None
        self.removed_location = None
        self.enable_fetch = crate.enable_egg_fetch
        if self.name is not None and self.version is not None:
            self.set_distribution()

    def set_dir( self ):
        global eggs_dir
        self.dir = eggs_dir

    def set_distribution( self ):
        """
        Stores a pkg_resources Distribution object for reference later
        """
        if self.dir is None:
            self.set_dir()
        tag = self.tag or ''
        self.distribution = pkg_resources.Distribution.from_filename(
            os.path.join( self.dir, '-'.join( ( self.name, self.version + tag, self.platform ) ) + '.egg' ) )

    @property
    def path( self ):
        """
        Return the path of the egg, if it exists, or None
        """
        if env[self.distribution.project_name]:
            return env[self.distribution.project_name][0].location
        return None

    def fetch( self, requirement ):
        """
        fetch() serves as the install method to pkg_resources.working_set.resolve()
        """
        def find_alternative():
            """
            Some platforms (e.g. Solaris) support eggs compiled on older platforms
            """
            class LinkParser( HTMLParser.HTMLParser ):
                """
                Finds links in what should be an Apache-style directory index
                """
                def __init__( self ):
                    HTMLParser.HTMLParser.__init__( self )
                    self.links = []

                def handle_starttag( self, tag, attrs ):
                    if tag == 'a' and 'href' in dict( attrs ):
                        self.links.append( dict( attrs )['href'] )

            parser = LinkParser()
            try:
                parser.feed( urllib2.urlopen( self.url + '/' ).read() )
            except urllib2.HTTPError, e:
                if e.code == 404:
                    return None
            parser.close()
            for link in parser.links:
                file = urllib.unquote( link ).rsplit( '/', 1 )[-1]
                tmp_dist = pkg_resources.Distribution.from_filename( file )
                if tmp_dist.platform is not None and \
                        self.distribution.project_name == tmp_dist.project_name and \
                        self.distribution.version == tmp_dist.version and \
                        self.distribution.py_version == tmp_dist.py_version and \
                        pkg_resources.compatible_platforms( tmp_dist.platform, pkg_resources.get_platform() ):
                    return file
            return None

        def _fetch():
            if self.url is None:
                return False
            if not os.path.exists( self.dir ):
                os.makedirs( self.dir )
            alternative = None
            try:
                url = self.url + '/' + self.distribution.egg_name() + '.egg'
                URLRetriever().retrieve( url, self.distribution.location )
                log.debug( "Fetched %s" % url )
            except IOError, e:
                if e[1] == 404 and self.distribution.platform != py:
                    alternative = find_alternative()
                    if alternative is None:
                        return False
                else:
                    return False
            if alternative is not None:
                try:
                    url = '/'.join( ( self.url, alternative ) )
                    URLRetriever().retrieve( url, os.path.join( self.dir, alternative ) )
                    log.debug( "Fetched %s" % url )
                except IOError, e:
                    return False
                self.platform = alternative.split( '-', 2 )[-1].rsplit( '.egg', 1 )[0]
                self.set_distribution()
            self.unpack_if_needed()
            self.remove_doppelgangers()
            return True
        # If being called from a version conflict, that code has removed a
        # directory from sys.path.  That directory could be Python's main lib
        # directory (e.g. in the case of wsgiref under 2.6).  Temporarily put
        # it back during the execution of this code.
        if self.removed_location:
            sys.path.append( self.removed_location )
        try:
            assert _fetch()
            rval = self.distribution
        except:
            rval = None
        if self.removed_location:
            sys.path.remove( self.removed_location )
        if rval is not None:
            global env
            env = get_env()  # reset the global Environment object now that we've obtained a new egg
        return rval

    def unpack_if_needed( self ):
        meta = pkg_resources.EggMetadata( zipimport.zipimporter( self.distribution.location ) )
        if meta.has_metadata( 'not-zip-safe' ):
            unpack_zipfile( self.distribution.location, self.distribution.location + "-tmp" )
            os.remove( self.distribution.location )
            os.rename( self.distribution.location + "-tmp", self.distribution.location )

    def remove_doppelgangers( self ):
        doppelgangers = glob.glob( os.path.join( self.dir, "%s-*-%s.egg" % ( self.name, self.platform ) ) )
        if self.distribution.location in doppelgangers:
            doppelgangers.remove( self.distribution.location )
        for doppelganger in doppelgangers:
            remove_file_or_path( doppelganger )
            log.debug( "Removed conflicting egg: %s" % doppelganger )

    def resolve( self ):
        try:
            rval = []
            # resolve this egg and its dependencies
            if self.enable_fetch:
                dists = pkg_resources.working_set.resolve( ( self.distribution.as_requirement(), ), env, self.fetch )
            else:
                dists = pkg_resources.working_set.resolve( ( self.distribution.as_requirement(), ), env, lambda x: None )
            for dist in dists:
                # if any of the resolved dependencies should be managed eggs but are being pulled from the wrong path, fix them
                if dist.project_name in self.crate.all_names and not os.path.realpath( dist.location ).startswith( os.path.realpath( self.dir ) ):
                    # TODO: like eggs.require(), this breaks requirement versioning
                    subdists = self.version_conflict( dist, dist.as_requirement() )
                    if type( subdists ) == list:
                        rval.extend( subdists )
                    else:
                        rval.append( subdists )
                else:
                    rval.append( dist )
            return rval
        except pkg_resources.DistributionNotFound, e:
            # If this statement is true, it means we do have the requested egg,
            # just not one (or more) of its deps.
            if e.args[0].project_name != self.distribution.project_name:
                log.warning( "Warning: %s (a dependent egg of %s) cannot be fetched" % ( e.args[0].project_name, self.distribution.project_name ) )
                return ( self.distribution, )
            else:
                raise EggNotFetchable( self )
        except pkg_resources.VersionConflict, e:
            if e.args[1].key != e.args[0].key:
                log.error( "One of Galaxy's managed eggs depends on something which is missing, this is almost certainly a bug in the egg distribution." )
                log.error( 'Dependency "%s" requires "%s"' % ( e.args[0].project_name, str( e.args[1] ) ) )
                raise
            # there's a conflicting egg on the path, remove it
            return self.version_conflict( e.args[0], e.args[1] )

    def version_conflict( self, conflict_dist, conflict_req ):
        # since this conflict may be for a dependent egg, find the correct egg from the crate
        if conflict_dist.project_name == self.distribution.project_name:
            egg = self
            dist = egg.distribution
        elif conflict_dist.project_name in self.crate.all_names:
            egg = self.crate[conflict_dist.project_name]
            dist = egg.distribution
        else:
            # should not happen, but just in case
            egg = None
            dist = conflict_dist
        # use the canonical path to locate and remove the conflict from the working set
        location = os.path.realpath( conflict_dist.location )
        for entry in pkg_resources.working_set.entries:
            if os.path.realpath( entry ) == location:
                pkg_resources.working_set.entries.remove( entry )
                break
        else:
            location = entry = None
        del pkg_resources.working_set.by_key[conflict_dist.key]
        # remove the conflict from sys.path
        if entry is not None:
            pkg_resources.working_set.entry_keys[entry] = []
            if entry in sys.path:
                sys.path.remove(entry)
        # if the conflict is a dependent egg, fetch that specific egg
        if egg:
            # Store the removed path so the fetch method can use it
            egg.removed_location = location
            if self.enable_fetch:
                r = pkg_resources.working_set.resolve( ( dist.as_requirement(), ), env, egg.fetch )
            else:
                r = pkg_resources.working_set.resolve( ( dist.as_requirement(), ), env, lambda x: None )
            egg.removed_location = None
        else:
            r = pkg_resources.working_set.resolve( ( dist.as_requirement(), ), env )
        # re-add the path if it's a non-egg dir, in case more deps live there
        if location is not None and not location.endswith( '.egg' ):
            pkg_resources.working_set.entries.append( location )
            sys.path.append( location )
        return r

    def require( self ):
        try:
            dists = self.resolve()
            for dist in dists:
                if dist.location not in pkg_resources.working_set.entries:
                    pkg_resources.working_set.add( dist )
            return dists
        except:
            raise


class Crate( object ):
    """
    Reads the eggs.ini file for use with checking and fetching.
    """
    config_file = os.path.join( galaxy_dir, 'eggs.ini' )

    def __init__( self, galaxy_config_file=None, platform=None ):
        self.eggs = {}
        self.config = CaseSensitiveConfigParser()
        self.repo = None
        self.no_auto = []
        self.platform = platform
        self.py_platform = None
        if platform is not None:
            self.py_platform = platform.split( '-' )[0]
        self.enable_egg_fetch = string_as_bool(os.environ.get('GALAXY_ENABLE_EGG_FETCH', True))
        self.enable_eggs = string_as_bool(os.environ.get('GALAXY_ENABLE_EGGS', True))
        self.try_dependencies_from_env = string_as_bool(os.environ.get('GALAXY_TRY_DEPENDENCIES_FROM_ENV', False))
        self.galaxy_config = GalaxyConfig( galaxy_config_file )
        self.parse()

    def parse( self ):
        self.config.read( Crate.config_file )
        self.repo = self.config.get( 'general', 'repository' )
        self.no_auto = self.config.get( 'general', 'no_auto' ).split()
        self.parse_egg_section( self.config.items( 'eggs:platform' ), self.config.items( 'tags' ), True )
        self.parse_egg_section( self.config.items( 'eggs:noplatform' ), self.config.items( 'tags' ) )

    def parse_egg_section( self, eggs, tags, full_platform=False, egg_class=Egg ):
        for name, version in eggs:
            tag = dict( tags ).get( name, '' )
            url = '/'.join( ( self.repo, name ) )
            if full_platform:
                platform = self.platform or '-'.join( ( py, pkg_resources.get_platform() ) )
            else:
                platform = self.py_platform or py
            egg = egg_class( name=name, version=version, tag=tag, url=url, platform=platform, crate=self )
            self.eggs[name] = egg

    @property
    def config_missing( self ):
        """
        Return true if any eggs are missing, conditional on options set in the
        Galaxy config file.
        """
        for egg in self.config_eggs:
            if not egg.path:
                return True
        return False

    @property
    def all_missing( self ):
        """
        Return true if any eggs in the eggs config file are missing.
        """
        for egg in self.all_eggs:
            if not os.path.exists( egg.distribution.location ):
                return True
        return False

    @property
    def config_names( self ):
        """
        Return a list of names of all eggs in the crate that are needed based
        on the options set in the Galaxy config file.
        """
        return [ egg.name for egg in self.config_eggs ]

    @property
    def all_names( self ):
        """
        Return a list of names of all eggs in the crate.
        """
        return [ egg.name for egg in self.all_eggs ]

    @property
    def config_eggs( self ):
        """
        Return a list of all eggs in the crate that are needed based on the
        options set in the Galaxy config file.
        """
        return [ egg for egg in self.eggs.values() if self.galaxy_config.check_conditional( egg.name ) ]

    @property
    def all_eggs( self ):
        """
        Return a list of all eggs in the crate.
        """
        rval = []
        for egg in self.eggs.values():
            if egg.name in self.galaxy_config.always_conditional and not self.galaxy_config.check_conditional( egg.name ):
                continue
            rval.append( egg )
        return rval

    def __getitem__( self, name ):
        """
        Return a specific egg.
        """
        name = name.replace( '-', '_' )
        return self.eggs[name]

    def resolve( self, all=False ):
        """
        Try to resolve (e.g. fetch) all eggs in the crate.
        """
        if self.enable_eggs and not self.try_dependencies_from_env:
            if all:
                eggs = self.all_eggs
            else:
                eggs = self.config_eggs
            eggs = filter( lambda x: x.name not in self.no_auto, eggs )
            missing = []
            for egg in eggs:
                try:
                    egg.resolve()
                except EggNotFetchable:
                    missing.append( egg )
            if missing:
                raise EggNotFetchable( missing )
        else:
            log.info('Dependencies will attempt to be loaded from the environment')


class GalaxyConfig( object ):
    always_conditional = ( 'pysam', 'ctypes', 'python_daemon' )

    def __init__( self, config_file ):
        if config_file is None:
            self.config = None
        else:
            self.config = ConfigParser.ConfigParser()
            if self.config.read( config_file ) == []:
                raise Exception( "error: unable to read Galaxy config from %s" % config_file )

    def check_conditional( self, egg_name ):
        def check_pysam():
            # can't build pysam on solaris < 10
            plat = pkg_resources.get_platform().split( '-' )
            if plat[0] == 'solaris':
                minor = plat[1].split('.')[1]
                if int( minor ) < 10:
                    return False
            return True
        # If we're using require() we may not have a Galaxy config file, but if
        # we're using require(), we don't care about conditionals.
        if self.config is None:
            return True
        if egg_name == "pysqlite":
            # SQLite is different since it can be specified in two config vars and defaults to True
            try:
                return self.config.get( "app:main", "database_connection" ).startswith( "sqlite://" )
            except:
                return True
        else:
            try:
                return { "psycopg2":        lambda: self.config.get( "app:main", "database_connection" ).startswith( "postgres" ),
                         "MySQL_python":    lambda: self.config.get( "app:main", "database_connection" ).startswith( "mysql://" ),
                         "DRMAA_python":    lambda: "sge" in self.config.get( "app:main", "start_job_runners" ).split(","),
                         "drmaa":           lambda: "drmaa" in self.config.get( "app:main", "start_job_runners" ).split(","),
                         "pbs_python":      lambda: "pbs" in self.config.get( "app:main", "start_job_runners" ).split(","),
                         "python_openid":   lambda: self.config.get( "app:main", "enable_openid" ),
                         "python_daemon":   lambda: sys.version_info[:2] >= ( 2, 5 ),
                         "pysam":           lambda: check_pysam(),
                         "PyRods":          lambda: self.config.get( "app:main", "object_store" ) == "irods"
                         }.get( egg_name, lambda: True )()
            except:
                return False



def string_as_bool( string ):
    if str( string ).lower() in ( 'true', 'yes', 'on' ):
        return True
    else:
        return False

def get_env():
    env = pkg_resources.Environment( search_path='', platform=pkg_resources.get_platform() )
    for dist in pkg_resources.find_distributions( eggs_dir, False ):
        env.add( dist )
    return env
env = get_env()


[docs]def require( req_str ): c = Crate( None ) req = pkg_resources.Requirement.parse( req_str ) if c.try_dependencies_from_env or not c.enable_eggs: try: return pkg_resources.working_set.require( req_str ) except Exception as exc: if not c.enable_eggs: raise log.info("%s not found in local environment, will try Galaxy egg: %s", (req_str, exc)) # TODO: This breaks egg version requirements. Not currently a problem, but # it could become one. try: return c[req.project_name].require() except KeyError: # not a galaxy-owned dependency return pkg_resources.working_set.require( req_str ) except EggNotFetchable, e: raise EggNotFetchable( str( [ egg.name for egg in e.eggs ] ) )
pkg_resources.require = require def unpack_zipfile( filename, extract_dir, ignores=[] ): z = zipfile.ZipFile(filename) try: for info in z.infolist(): name = info.filename mode = (info.external_attr >> 16L) & 0777 # don't extract absolute paths or ones with .. in them if name.startswith('/') or '..' in name: continue target = os.path.join(extract_dir, *name.split('/')) if not target: continue for ignore in ignores: if ignore in name: continue if name.endswith('/'): # directory pkg_resources.ensure_directory(target) else: # file pkg_resources.ensure_directory(target) data = z.read(info.filename) f = open(target, 'wb') try: f.write(data) finally: f.close() del data try: if not os.path.islink(): os.chmod(target, mode) except: pass finally: z.close() def remove_file_or_path( f ): if os.path.isdir( f ): shutil.rmtree( f ) else: os.remove( f )