"""
velvet datatypes
James E Johnson - University of Minnesota
for velvet assembler tool in galaxy
"""
import data
import logging
import os
import re
import sys
from galaxy.datatypes import sequence
from galaxy.datatypes.images import Html
from galaxy.datatypes.metadata import MetadataElement
log = logging.getLogger(__name__)
[docs]class Amos( data.Text ):
"""Class describing the AMOS assembly file """
file_ext = 'afg'
[docs] def sniff( self, filename ):
# FIXME: this method will read the entire file.
# It should call get_headers() like other sniff methods.
"""
Determines whether the file is an amos assembly file format
Example::
{CTG
iid:1
eid:1
seq:
CCTCTCCTGTAGAGTTCAACCGA-GCCGGTAGAGTTTTATCA
.
qlt:
DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD
.
{TLE
src:1027
off:0
clr:618,0
gap:
250 612
.
}
}
"""
isAmos = False
try:
fh = open( filename )
while not isAmos:
line = fh.readline()
if not line:
break #EOF
line = line.strip()
if line: #first non-empty line
if line.startswith( '{' ):
if re.match(r'{(RED|CTG|TLE)$',line):
isAmos = True
fh.close()
except:
pass
return isAmos
[docs]class Sequences( sequence.Fasta ):
"""Class describing the Sequences file generated by velveth """
[docs] def sniff( self, filename ):
"""
Determines whether the file is a velveth produced fasta format
The id line has 3 fields separated by tabs: sequence_name sequence_index cataegory::
>SEQUENCE_0_length_35 1 1
GGATATAGGGCCAACCCAACTCAACGGCCTGTCTT
>SEQUENCE_1_length_35 2 1
CGACGAATGACAGGTCACGAATTTGGCGGGGATTA
"""
try:
fh = open( filename )
while True:
line = fh.readline()
if not line:
break #EOF
line = line.strip()
if line: #first non-empty line
if line.startswith( '>' ):
if not re.match(r'>[^\t]+\t\d+\t\d+$',line):
break
#The next line.strip() must not be '', nor startwith '>'
line = fh.readline().strip()
if line == '' or line.startswith( '>' ):
break
return True
else:
break #we found a non-empty line, but it's not a fasta header
fh.close()
except:
pass
return False
[docs]class Roadmaps( data.Text ):
"""Class describing the Sequences file generated by velveth """
[docs] def sniff( self, filename ):
"""
Determines whether the file is a velveth produced RoadMap::
142858 21 1
ROADMAP 1
ROADMAP 2
...
"""
try:
fh = open( filename )
while True:
line = fh.readline()
if not line:
break #EOF
line = line.strip()
if line: #first non-empty line
if not re.match(r'\d+\t\d+\t\d+$',line):
break
#The next line.strip() should be 'ROADMAP 1'
line = fh.readline().strip()
if not re.match(r'ROADMAP \d+$',line):
break
return True
else:
break #we found a non-empty line, but it's not a fasta header
fh.close()
except:
pass
return False
[docs]class Velvet( Html ):
MetadataElement( name="base_name", desc="base name for velveth dataset", default="velvet", readonly=True, set_in_upload=True)
MetadataElement( name="paired_end_reads", desc="has paired-end reads", default="False", readonly=False, set_in_upload=True)
MetadataElement( name="long_reads", desc="has long reads", default="False", readonly=False, set_in_upload=True)
MetadataElement( name="short2_reads", desc="has 2nd short reads", default="False", readonly=False, set_in_upload=True)
composite_type = 'auto_primary_file'
allow_datatype_change = False
file_ext = 'html'
def __init__( self, **kwd ):
Html.__init__( self, **kwd )
self.add_composite_file( 'Sequences', mimetype = 'text/html', description = 'Sequences', substitute_name_with_metadata = None, is_binary = False )
self.add_composite_file( 'Roadmaps', mimetype = 'text/html', description = 'Roadmaps', substitute_name_with_metadata = None, is_binary = False )
self.add_composite_file( 'Log', mimetype = 'text/html', description = 'Log', optional = 'True', substitute_name_with_metadata = None, is_binary = False )
[docs] def generate_primary_file( self, dataset = None ):
log.debug( "Velvet log info %s %s" % ('JJ generate_primary_file',dataset))
rval = ['<html><head><title>Velvet Galaxy Composite Dataset </title></head><p/>']
rval.append('<div>This composite dataset is composed of the following files:<p/><ul>')
for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems():
fn = composite_name
log.debug( "Velvet log info %s %s %s" % ('JJ generate_primary_file',fn,composite_file))
opt_text = ''
if composite_file.optional:
opt_text = ' (optional)'
if composite_file.get('description'):
rval.append( '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) )
else:
rval.append( '<li><a href="%s" type="text/plain">%s</a>%s</li>' % ( fn, fn, opt_text ) )
rval.append( '</ul></div></html>' )
return "\n".join( rval )
[docs] def regenerate_primary_file(self,dataset):
"""
cannot do this until we are setting metadata
"""
log.debug( "Velvet log info %s" % 'JJ regenerate_primary_file')
gen_msg = ''
try:
efp = dataset.extra_files_path
log_path = os.path.join(efp,'Log')
f = open(log_path,'r')
log_content = f.read(1000)
f.close()
log_msg = re.sub('/\S*/','',log_content)
log.debug( "Velveth log info %s" % log_msg)
paired_end_reads = re.search('-(short|long)Paired', log_msg) != None
dataset.metadata.paired_end_reads = paired_end_reads
long_reads = re.search('-long', log_msg) != None
dataset.metadata.long_reads = long_reads
short2_reads = re.search('-short(Paired)?2', log_msg) != None
dataset.metadata.short2_reads = short2_reads
dataset.info = re.sub('.*velveth \S+','hash_length',re.sub('\n',' ',log_msg))
if paired_end_reads:
gen_msg = gen_msg + ' Paired-End Reads'
if long_reads:
gen_msg = gen_msg + ' Long Reads'
if len(gen_msg) > 0:
gen_msg = 'Uses: ' + gen_msg
except:
log.debug( "Velveth could not read Log file in %s" % efp)
log.debug( "Velveth log info %s" % gen_msg)
rval = ['<html><head><title>Velvet Galaxy Composite Dataset </title></head><p/>']
# rval.append('<div>Generated:<p/><code> %s </code></div>' %(re.sub('\n','<br>',log_msg)))
rval.append('<div>Generated:<p/> %s </div>' %(gen_msg))
rval.append('<div>Velveth dataset:<p/><ul>')
for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems():
fn = composite_name
log.debug( "Velvet log info %s %s %s" % ('JJ regenerate_primary_file',fn,composite_file))
if re.search('Log',fn) == None:
opt_text = ''
if composite_file.optional:
opt_text = ' (optional)'
if composite_file.get('description'):
rval.append( '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) )
else:
rval.append( '<li><a href="%s" type="text/plain">%s</a>%s</li>' % ( fn, fn, opt_text ) )
rval.append( '</ul></div></html>' )
f = file(dataset.file_name,'w')
f.write("\n".join( rval ))
f.write('\n')
f.close()
if __name__ == '__main__':
import doctest
doctest.testmod(sys.modules[__name__])