sequence Package

fasta Module

class galaxy_utils.sequence.fasta.fastaNamedReader(fh)[source]

Bases: object

close()[source]
get(sequence_id)[source]
has_data()[source]
class galaxy_utils.sequence.fasta.fastaReader(fh)[source]

Bases: object

close()[source]
next()[source]
class galaxy_utils.sequence.fasta.fastaSequence[source]

Bases: object

class galaxy_utils.sequence.fasta.fastaWriter(fh)[source]

Bases: object

close()[source]
write(fastq_read)[source]

fastq Module

class galaxy_utils.sequence.fastq.ReadlineCountFile(f)[source]

Bases: object

readline(*args, **kwds)[source]
class galaxy_utils.sequence.fastq.fastqAggregator[source]

Bases: object

VALID_FORMATS = ['solexa', 'sanger', 'cssanger', 'illumina']
consume_read(fastq_read)[source]
get_ascii_range()[source]
get_base_counts_for_column(column)[source]
get_decimal_range()[source]
get_length_counts()[source]
get_max_read_length()[source]
get_read_count()[source]
get_read_count_for_column(column)[source]
get_score_at_position_for_column(column, position)[source]
get_score_list_for_column(column)[source]
get_score_max_for_column(column)[source]
get_score_min_for_column(column)[source]
get_score_sum_for_column(column)[source]
get_summary_statistics_for_column(i)[source]
get_valid_formats(check_list=None)[source]
class galaxy_utils.sequence.fastq.fastqCSSangerRead[source]

Bases: galaxy_utils.sequence.fastq.fastqSequencingRead

apply_galaxy_conventions()[source]
ascii_max = 126
ascii_min = 33
assert_sequence_quality_lengths()[source]
change_adapter(new_adapter, clone=True)[source]
complement(clone=True)[source]
format = 'cssanger'
get_sequence()[source]
has_adapter_base()[source]
insufficient_quality_length()[source]
quality_max = 93
quality_min = 0
reverse(clone=True)[source]
score_system = 'phred'
sequence_space = 'color'
valid_sequence_list = ['0', '1', '2', '3', '4', '5', '6', '.']
class galaxy_utils.sequence.fastq.fastqCombiner(format)[source]

Bases: object

combine(fasta_seq, quality_seq)[source]
class galaxy_utils.sequence.fastq.fastqFakeFastaScoreReader(format='sanger', quality_encoding=None)[source]

Bases: object

close()[source]
get(sequence)[source]
has_data()[source]
class galaxy_utils.sequence.fastq.fastqIlluminaRead[source]

Bases: galaxy_utils.sequence.fastq.fastqSequencingRead

ascii_max = 126
ascii_min = 64
format = 'illumina'
quality_max = 62
quality_min = 0
score_system = 'phred'
sequence_space = 'base'
class galaxy_utils.sequence.fastq.fastqJoiner(format, force_quality_encoding=None)[source]

Bases: object

get_paired_identifier(fastq_read)[source]
is_first_mate(sequence_id)[source]
join(read1, read2)[source]
class galaxy_utils.sequence.fastq.fastqNamedReader(fh, format='sanger', apply_galaxy_conventions=False)[source]

Bases: object

close()[source]
get(sequence_identifier)[source]
has_data()[source]
class galaxy_utils.sequence.fastq.fastqReader(fh, format='sanger', apply_galaxy_conventions=False)[source]

Bases: object

close()[source]
next()[source]
class galaxy_utils.sequence.fastq.fastqSangerRead[source]

Bases: galaxy_utils.sequence.fastq.fastqSequencingRead

ascii_max = 126
ascii_min = 33
format = 'sanger'
quality_max = 93
quality_min = 0
score_system = 'phred'
sequence_space = 'base'
class galaxy_utils.sequence.fastq.fastqSequencingRead[source]

Bases: galaxy_utils.sequence.sequence.SequencingRead

apply_galaxy_conventions()[source]
ascii_max = 126
ascii_min = 33
assert_sequence_quality_lengths()[source]
classmethod convert_base_to_color_space(sequence)[source]
classmethod convert_color_to_base_space(sequence)[source]
convert_read_to_format(format, force_quality_encoding=None)[source]
classmethod convert_score_phred_to_solexa(decimal_score_list)[source]
classmethod convert_score_solexa_to_phred(decimal_score_list)[source]
format = 'sanger'
get_ascii_quality_scores()[source]
get_ascii_quality_scores_len()[source]

Compute ascii quality score length, without generating relatively expensive qualty score array.

classmethod get_class_by_format(format)[source]
get_decimal_quality_scores()[source]
get_sequence()[source]
insufficient_quality_length()[source]
is_ascii_encoded()[source]
is_valid_format()[source]
is_valid_sequence()[source]
quality_max = 93
quality_min = 0
classmethod restrict_scores_to_valid_range(decimal_score_list)[source]
reverse(clone=True)[source]
score_system = 'phred'
sequence_space = 'base'
slice(left_column_offset, right_column_offset)[source]
classmethod transform_scores_to_valid_range(decimal_score_list)[source]
classmethod transform_scores_to_valid_range_ascii(decimal_score_list)[source]
class galaxy_utils.sequence.fastq.fastqSolexaRead[source]

Bases: galaxy_utils.sequence.fastq.fastqSequencingRead

ascii_max = 126
ascii_min = 59
format = 'solexa'
quality_max = 62
quality_min = -5
score_system = 'solexa'
sequence_space = 'base'
class galaxy_utils.sequence.fastq.fastqSplitter[source]

Bases: object

split(fastq_read)[source]
class galaxy_utils.sequence.fastq.fastqVerboseErrorReader(fh, **kwds)[source]

Bases: galaxy_utils.sequence.fastq.fastqReader

MAX_PRINT_ERROR_BYTES = 1024
next()[source]
class galaxy_utils.sequence.fastq.fastqWriter(fh, format=None, force_quality_encoding=None)[source]

Bases: object

close()[source]
write(fastq_read)[source]
galaxy_utils.sequence.fastq.format

alias of fastqCSSangerRead

sequence Module

class galaxy_utils.sequence.sequence.SequencingRead[source]

Bases: object

append_quality(quality)[source]
append_sequence(sequence)[source]
clone()[source]
color_space_converter = <galaxy_utils.sequence.transform.ColorSpaceConverter object>
complement(clone=True)[source]
is_DNA()[source]
reverse(clone=True)[source]
reverse_complement(clone=True)[source]
sequence_as_DNA(clone=True)[source]
sequence_as_RNA(clone=True)[source]
valid_sequence_list = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'

transform Module

class galaxy_utils.sequence.transform.ColorSpaceConverter(fake_adapter_base='G')[source]

Bases: object

base = 'N'
base_to_color_dict = {'A': {'A': '0', 'C': '1', 'T': '3', 'G': '2', 'N': '4'}, 'C': {'A': '1', 'C': '0', 'T': '2', 'G': '3', 'N': '4'}, 'T': {'A': '3', 'C': '2', 'T': '0', 'G': '1', 'N': '4'}, 'G': {'A': '2', 'C': '3', 'T': '1', 'G': '0', 'N': '4'}, 'N': {'A': '5', 'C': '5', 'T': '5', 'G': '5', 'N': '6'}}
color_dict = {'1': 'N', '0': 'N', '3': 'N', '2': 'N', '5': 'N', '4': 'N', '6': 'N', '.': 'N'}
color_to_base_dict = {'A': {'1': 'C', '0': 'A', '3': 'T', '2': 'G', '5': 'N', '4': 'N', '6': 'N', '.': 'N'}, 'C': {'1': 'A', '0': 'C', '3': 'G', '2': 'T', '5': 'N', '4': 'N', '6': 'N', '.': 'N'}, 'T': {'1': 'G', '0': 'T', '3': 'A', '2': 'C', '5': 'N', '4': 'N', '6': 'N', '.': 'N'}, 'G': {'1': 'T', '0': 'G', '3': 'C', '2': 'A', '5': 'N', '4': 'N', '6': 'N', '.': 'N'}, 'N': {'1': 'N', '0': 'N', '3': 'N', '2': 'N', '5': 'N', '4': 'N', '6': 'N', '.': 'N'}}
key = '.'
to_base_space(sequence)[source]
to_color_space(sequence, adapter_base=None)[source]
unknown_base = 'N'
unknown_color = '.'
value = 'N'
galaxy_utils.sequence.transform.DNA_complement(sequence)[source]
galaxy_utils.sequence.transform.DNA_reverse_complement(sequence)[source]
galaxy_utils.sequence.transform.RNA_complement(sequence)[source]
galaxy_utils.sequence.transform.RNA_reverse_complement(sequence)[source]
galaxy_utils.sequence.transform.reverse(sequence)[source]
galaxy_utils.sequence.transform.to_DNA(sequence)[source]
galaxy_utils.sequence.transform.to_RNA(sequence)[source]

vcf Module

class galaxy_utils.sequence.vcf.Reader(fh)[source]

Bases: object

next()[source]
class galaxy_utils.sequence.vcf.VariantCall(vcf_line, metadata, sample_names)[source]

Bases: object

classmethod get_class_by_format(format)[source]
header_startswith = None
required_header_fields = None
required_header_length = None
version = None
class galaxy_utils.sequence.vcf.VariantCall33(vcf_line, metadata, sample_names)[source]

Bases: galaxy_utils.sequence.vcf.VariantCall

header_startswith = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO'
required_header_fields = ['#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO']
required_header_length = 8
version = 'VCFv3.3'
class galaxy_utils.sequence.vcf.VariantCall40(vcf_line, metadata, sample_names)[source]

Bases: galaxy_utils.sequence.vcf.VariantCall33

version = 'VCFv4.0'
class galaxy_utils.sequence.vcf.VariantCall41(vcf_line, metadata, sample_names)[source]

Bases: galaxy_utils.sequence.vcf.VariantCall40

version = 'VCFv4.1'
galaxy_utils.sequence.vcf.format

alias of VariantCall41