Source code for galaxy.eggs

"""
Manage Galaxy eggs
"""

import ConfigParser
import glob
import HTMLParser
import os
import pkg_resources
import shutil
import sys
import urllib
import urllib2
import zipfile
import zipimport

import logging
log = logging.getLogger( __name__ )
log.addHandler( logging.NullHandler() )


galaxy_dir = os.path.abspath( os.path.join( os.path.dirname( __file__ ), '..', '..', '..' ) )
eggs_dir = os.environ.get( 'GALAXY_EGGS_PATH', os.path.join( galaxy_dir, 'eggs' ) )
py = 'py%s' % sys.version[:3]


[docs]class EggNotFetchable( Exception ): def __init__( self, eggs ): if type( eggs ) in ( list, tuple ): self.eggs = eggs else: self.eggs = [ eggs ] def __str__( self ): return ' '.join( self.eggs ) # need the options to remain case sensitive
[docs]class CaseSensitiveConfigParser( ConfigParser.SafeConfigParser ):
[docs] def optionxform( self, optionstr ): return optionstr # so we can actually detect failures
[docs]class URLRetriever( urllib.FancyURLopener ):
[docs] def http_error_default( *args ): urllib.URLopener.http_error_default( *args )
[docs]class Egg( object ): """ Contains information about locating and downloading eggs. """ def __init__( self, name=None, version=None, tag=None, url=None, platform=None, crate=None ): self.name = name self.version = version self.tag = tag self.url = url self.platform = platform self.crate = crate self.distribution = None self.dir = None self.removed_location = None self.enable_fetch = crate.enable_egg_fetch if self.name is not None and self.version is not None: self.set_distribution()
[docs] def set_dir( self ): global eggs_dir self.dir = eggs_dir
[docs] def set_distribution( self ): """ Stores a pkg_resources Distribution object for reference later """ if self.dir is None: self.set_dir() tag = self.tag or '' self.distribution = pkg_resources.Distribution.from_filename( os.path.join( self.dir, '-'.join( ( self.name, self.version + tag, self.platform ) ) + '.egg' ) )
@property def path( self ): """ Return the path of the egg, if it exists, or None """ if env[self.distribution.project_name]: return env[self.distribution.project_name][0].location return None
[docs] def fetch( self, requirement ): """ fetch() serves as the install method to pkg_resources.working_set.resolve() """ def find_alternative(): """ Some platforms (e.g. Solaris) support eggs compiled on older platforms """ class LinkParser( HTMLParser.HTMLParser ): """ Finds links in what should be an Apache-style directory index """ def __init__( self ): HTMLParser.HTMLParser.__init__( self ) self.links = [] def handle_starttag( self, tag, attrs ): if tag == 'a' and 'href' in dict( attrs ): self.links.append( dict( attrs )['href'] ) parser = LinkParser() try: parser.feed( urllib2.urlopen( self.url + '/' ).read() ) except urllib2.HTTPError, e: if e.code == 404: return None parser.close() for link in parser.links: file = urllib.unquote( link ).rsplit( '/', 1 )[-1] tmp_dist = pkg_resources.Distribution.from_filename( file ) if tmp_dist.platform is not None and \ self.distribution.project_name == tmp_dist.project_name and \ self.distribution.version == tmp_dist.version and \ self.distribution.py_version == tmp_dist.py_version and \ pkg_resources.compatible_platforms( tmp_dist.platform, pkg_resources.get_platform() ): return file return None def _fetch(): if self.url is None: return False if not os.path.exists( self.dir ): os.makedirs( self.dir ) alternative = None try: url = self.url + '/' + self.distribution.egg_name() + '.egg' URLRetriever().retrieve( url, self.distribution.location ) log.debug( "Fetched %s" % url ) except IOError, e: if e[1] == 404 and self.distribution.platform != py: alternative = find_alternative() if alternative is None: return False else: return False if alternative is not None: try: url = '/'.join( ( self.url, alternative ) ) URLRetriever().retrieve( url, os.path.join( self.dir, alternative ) ) log.debug( "Fetched %s" % url ) except IOError, e: return False self.platform = alternative.split( '-', 2 )[-1].rsplit( '.egg', 1 )[0] self.set_distribution() self.unpack_if_needed() self.remove_doppelgangers() return True # If being called from a version conflict, that code has removed a # directory from sys.path. That directory could be Python's main lib # directory (e.g. in the case of wsgiref under 2.6). Temporarily put # it back during the execution of this code. if self.removed_location: sys.path.append( self.removed_location ) try: assert _fetch() rval = self.distribution except: rval = None if self.removed_location: sys.path.remove( self.removed_location ) if rval is not None: global env env = get_env() # reset the global Environment object now that we've obtained a new egg return rval
[docs] def unpack_if_needed( self ): meta = pkg_resources.EggMetadata( zipimport.zipimporter( self.distribution.location ) ) if meta.has_metadata( 'not-zip-safe' ): unpack_zipfile( self.distribution.location, self.distribution.location + "-tmp" ) os.remove( self.distribution.location ) os.rename( self.distribution.location + "-tmp", self.distribution.location )
[docs] def remove_doppelgangers( self ): doppelgangers = glob.glob( os.path.join( self.dir, "%s-*-%s.egg" % ( self.name, self.platform ) ) ) if self.distribution.location in doppelgangers: doppelgangers.remove( self.distribution.location ) for doppelganger in doppelgangers: remove_file_or_path( doppelganger ) log.debug( "Removed conflicting egg: %s" % doppelganger )
[docs] def resolve( self ): try: rval = [] # resolve this egg and its dependencies if self.enable_fetch: dists = pkg_resources.working_set.resolve( ( self.distribution.as_requirement(), ), env, self.fetch ) else: dists = pkg_resources.working_set.resolve( ( self.distribution.as_requirement(), ), env, lambda x: None ) for dist in dists: # if any of the resolved dependencies should be managed eggs but are being pulled from the wrong path, fix them if dist.project_name in self.crate.all_names and not os.path.realpath( dist.location ).startswith( os.path.realpath( self.dir ) ): # TODO: like eggs.require(), this breaks requirement versioning subdists = self.version_conflict( dist, dist.as_requirement() ) if type( subdists ) == list: rval.extend( subdists ) else: rval.append( subdists ) else: rval.append( dist ) return rval except pkg_resources.DistributionNotFound, e: # If this statement is true, it means we do have the requested egg, # just not one (or more) of its deps. if e.args[0].project_name != self.distribution.project_name: log.warning( "Warning: %s (a dependent egg of %s) cannot be fetched" % ( e.args[0].project_name, self.distribution.project_name ) ) return ( self.distribution, ) else: raise EggNotFetchable( self ) except pkg_resources.VersionConflict, e: if e.args[1].key != e.args[0].key: log.error( "One of Galaxy's managed eggs depends on something which is missing, this is almost certainly a bug in the egg distribution." ) log.error( 'Dependency "%s" requires "%s"' % ( e.args[0].project_name, str( e.args[1] ) ) ) raise # there's a conflicting egg on the path, remove it return self.version_conflict( e.args[0], e.args[1] )
[docs] def version_conflict( self, conflict_dist, conflict_req ): # since this conflict may be for a dependent egg, find the correct egg from the crate if conflict_dist.project_name == self.distribution.project_name: egg = self dist = egg.distribution elif conflict_dist.project_name in self.crate.all_names: egg = self.crate[conflict_dist.project_name] dist = egg.distribution else: # should not happen, but just in case egg = None dist = conflict_dist # use the canonical path to locate and remove the conflict from the working set location = os.path.realpath( conflict_dist.location ) for entry in pkg_resources.working_set.entries: if os.path.realpath( entry ) == location: pkg_resources.working_set.entries.remove( entry ) break else: location = entry = None del pkg_resources.working_set.by_key[conflict_dist.key] # remove the conflict from sys.path if entry is not None: pkg_resources.working_set.entry_keys[entry] = [] if entry in sys.path: sys.path.remove(entry) # if the conflict is a dependent egg, fetch that specific egg if egg: # Store the removed path so the fetch method can use it egg.removed_location = location if self.enable_fetch: r = pkg_resources.working_set.resolve( ( dist.as_requirement(), ), env, egg.fetch ) else: r = pkg_resources.working_set.resolve( ( dist.as_requirement(), ), env, lambda x: None ) egg.removed_location = None else: r = pkg_resources.working_set.resolve( ( dist.as_requirement(), ), env ) # re-add the path if it's a non-egg dir, in case more deps live there if location is not None and not location.endswith( '.egg' ): pkg_resources.working_set.entries.append( location ) sys.path.append( location ) return r
[docs] def require( self ): try: dists = self.resolve() for dist in dists: if dist.location not in pkg_resources.working_set.entries: pkg_resources.working_set.add( dist ) return dists except: raise
[docs]class Crate( object ): """ Reads the eggs.ini file for use with checking and fetching. """ config_file = os.path.join( galaxy_dir, 'eggs.ini' ) def __init__( self, galaxy_config_file=None, platform=None ): self.eggs = {} self.config = CaseSensitiveConfigParser() self.repo = None self.no_auto = [] self.platform = platform self.py_platform = None if platform is not None: self.py_platform = platform.split( '-' )[0] self.enable_egg_fetch = string_as_bool(os.environ.get('GALAXY_ENABLE_EGG_FETCH', True)) self.enable_eggs = string_as_bool(os.environ.get('GALAXY_ENABLE_EGGS', True)) self.try_dependencies_from_env = string_as_bool(os.environ.get('GALAXY_TRY_DEPENDENCIES_FROM_ENV', False)) self.galaxy_config = GalaxyConfig( galaxy_config_file ) self.parse()
[docs] def parse( self ): self.config.read( Crate.config_file ) self.repo = self.config.get( 'general', 'repository' ) self.no_auto = self.config.get( 'general', 'no_auto' ).split() self.parse_egg_section( self.config.items( 'eggs:platform' ), self.config.items( 'tags' ), True ) self.parse_egg_section( self.config.items( 'eggs:noplatform' ), self.config.items( 'tags' ) )
[docs] def parse_egg_section( self, eggs, tags, full_platform=False, egg_class=Egg ): for name, version in eggs: tag = dict( tags ).get( name, '' ) url = '/'.join( ( self.repo, name ) ) if full_platform: platform = self.platform or '-'.join( ( py, pkg_resources.get_platform() ) ) else: platform = self.py_platform or py egg = egg_class( name=name, version=version, tag=tag, url=url, platform=platform, crate=self ) self.eggs[name] = egg
@property def config_missing( self ): """ Return true if any eggs are missing, conditional on options set in the Galaxy config file. """ for egg in self.config_eggs: if not egg.path: return True return False @property def all_missing( self ): """ Return true if any eggs in the eggs config file are missing. """ for egg in self.all_eggs: if not os.path.exists( egg.distribution.location ): return True return False @property def config_names( self ): """ Return a list of names of all eggs in the crate that are needed based on the options set in the Galaxy config file. """ return [ egg.name for egg in self.config_eggs ] @property def all_names( self ): """ Return a list of names of all eggs in the crate. """ return [ egg.name for egg in self.all_eggs ] @property def config_eggs( self ): """ Return a list of all eggs in the crate that are needed based on the options set in the Galaxy config file. """ return [ egg for egg in self.eggs.values() if self.galaxy_config.check_conditional( egg.name ) ] @property def all_eggs( self ): """ Return a list of all eggs in the crate. """ rval = [] for egg in self.eggs.values(): if egg.name in self.galaxy_config.always_conditional and not self.galaxy_config.check_conditional( egg.name ): continue rval.append( egg ) return rval def __getitem__( self, name ): """ Return a specific egg. """ name = name.replace( '-', '_' ) return self.eggs[name]
[docs] def resolve( self, all=False ): """ Try to resolve (e.g. fetch) all eggs in the crate. """ if self.enable_eggs and not self.try_dependencies_from_env: if all: eggs = self.all_eggs else: eggs = self.config_eggs eggs = filter( lambda x: x.name not in self.no_auto, eggs ) missing = [] for egg in eggs: try: egg.resolve() except EggNotFetchable: missing.append( egg ) if missing: raise EggNotFetchable( missing ) else: log.info('Dependencies will attempt to be loaded from the environment')
[docs]class GalaxyConfig( object ): always_conditional = ( 'pysam', 'ctypes', 'python_daemon' ) def __init__( self, config_file ): if config_file is None: self.config = None else: self.config = ConfigParser.ConfigParser() if self.config.read( config_file ) == []: raise Exception( "error: unable to read Galaxy config from %s" % config_file )
[docs] def check_conditional( self, egg_name ): def check_pysam(): # can't build pysam on solaris < 10 plat = pkg_resources.get_platform().split( '-' ) if plat[0] == 'solaris': minor = plat[1].split('.')[1] if int( minor ) < 10: return False return True # If we're using require() we may not have a Galaxy config file, but if # we're using require(), we don't care about conditionals. if self.config is None: return True if egg_name == "pysqlite": # SQLite is different since it can be specified in two config vars and defaults to True try: return self.config.get( "app:main", "database_connection" ).startswith( "sqlite://" ) except: return True else: try: return { "psycopg2": lambda: self.config.get( "app:main", "database_connection" ).startswith( "postgres" ), "MySQL_python": lambda: self.config.get( "app:main", "database_connection" ).startswith( "mysql://" ), "DRMAA_python": lambda: "sge" in self.config.get( "app:main", "start_job_runners" ).split(","), "drmaa": lambda: "drmaa" in self.config.get( "app:main", "start_job_runners" ).split(","), "pbs_python": lambda: "pbs" in self.config.get( "app:main", "start_job_runners" ).split(","), "python_openid": lambda: self.config.get( "app:main", "enable_openid" ), "python_daemon": lambda: sys.version_info[:2] >= ( 2, 5 ), "pysam": lambda: check_pysam(), "PyRods": lambda: self.config.get( "app:main", "object_store" ) == "irods" }.get( egg_name, lambda: True )() except: return False
[docs]def string_as_bool( string ): if str( string ).lower() in ( 'true', 'yes', 'on' ): return True else: return False
[docs]def get_env(): env = pkg_resources.Environment( search_path='', platform=pkg_resources.get_platform() ) for dist in pkg_resources.find_distributions( eggs_dir, False ): env.add( dist ) return env
env = get_env()
[docs]def require( req_str ): c = Crate( None ) req = pkg_resources.Requirement.parse( req_str ) if c.try_dependencies_from_env or not c.enable_eggs: try: return pkg_resources.working_set.require( req_str ) except Exception as exc: if not c.enable_eggs: raise log.info("%s not found in local environment, will try Galaxy egg: %s", (req_str, exc)) # TODO: This breaks egg version requirements. Not currently a problem, but # it could become one. try: return c[req.project_name].require() except KeyError: # not a galaxy-owned dependency return pkg_resources.working_set.require( req_str ) except EggNotFetchable, e: raise EggNotFetchable( str( [ egg.name for egg in e.eggs ] ) )
pkg_resources.require = require
[docs]def unpack_zipfile( filename, extract_dir, ignores=[] ): z = zipfile.ZipFile(filename) try: for info in z.infolist(): name = info.filename mode = (info.external_attr >> 16L) & 0777 # don't extract absolute paths or ones with .. in them if name.startswith('/') or '..' in name: continue target = os.path.join(extract_dir, *name.split('/')) if not target: continue for ignore in ignores: if ignore in name: continue if name.endswith('/'): # directory pkg_resources.ensure_directory(target) else: # file pkg_resources.ensure_directory(target) data = z.read(info.filename) f = open(target, 'wb') try: f.write(data) finally: f.close() del data try: if not os.path.islink(): os.chmod(target, mode) except: pass finally: z.close()
[docs]def remove_file_or_path( f ): if os.path.isdir( f ): shutil.rmtree( f ) else: os.remove( f )