

Utility and Misc. Functions

# NextGenSeqUtils.print_fasta — Function.

print_fasta(seqs, names)

Prints fasta format to the terminal, for copypasting into alignment/blast etc.

source

# NextGenSeqUtils.degap — Function.

degap(s::String)

Returns given string without '-' gap symbols.

source

degap(s::DNASequence)

Returns given string without '-' gap symbols.

source

# NextGenSeqUtils.dash_count — Function.

dash_count(inStr::String)

Counts number of gap symbols '-' in given string.

source

# NextGenSeqUtils.single_gap — Function.

single_gap(str::String, ind::Int)

True if str has a single gap '-' at index ind, else false.

source

# NextGenSeqUtils.single_mod_three_gap — Function.

single_mod_three_gap(str::String, ind::Int)

True if str has a gap length of 1 mod 3 at given index.

source

# NextGenSeqUtils.seq_details — Function.

seq_details(fasta_path)

Gives names, sequences, error rates, and lengths from given filepath, which may end in '.fasta' or '.fastq'.

source

# NextGenSeqUtils.print_rgb — Function.

print_rgb(r, g, b, t)

Prints in colors r,g,b to terminal.

source

# BioSequences.reverse_complement — Function.

reverse_complement(seq)

Make a reversed complement sequence of seq.

Ambiguous nucleotides are left as-is.

source

reverse_complement(kmer::Kmer)

Return the reverse complement of kmer

source

reverse_complement(dna_string::String)

Returns the complement of the reverse of given nucleotide sequence.

source

# NextGenSeqUtils.print_diffs — Function.

print_diffs(s1, s2; width=5, prefix="")

Prints two already aligned sequences with differences in color to terminal.

source

# NextGenSeqUtils.trim_ends_indices — Function.

trim_ends_indices(seq, ref; edge_reduction=0.1)

Align seq to ref with default low penalties for gaps on ends, and trim insertions on the ends of seq. Returns (start, stop) indices.

source

# NextGenSeqUtils.translate_to_aa — Function.

translate_to_aa(s::String)

Return amino acid string translation of nucleotide sequence using BioSequences conversion.

source

# NextGenSeqUtils.generate_aa_seqs — Function.

generate_aa_seqs(str::String)

Return sequence translated to amino acids in each reading frame (returns three amino acid sequences).

source

# NextGenSeqUtils.filter_by_length — Function.

filter_by_length(args...)

Deprecated. See length_filter.

source

# NextGenSeqUtils.length_filter — Function.

length_filter(seqs::Array{String, 1}, phreds::Union{Array{Vector{Phred},1},Void}, names::Union{Array{String,1},Void},
              minlength::Int, maxlength::Int)

Filter sequences and corresponding names and phreds (which may be nothing) by length.

source

length_filter(seqs::Array{String, 1}, minlength::Int64, maxlength::Int64)

Filter sequences by length.

source

# NextGenSeqUtils.concat_fastas — Function.

concat_fastas(filepaths::Array{String, 1}, outfile::String)

Write contents of all given files to a single .fasta file.

source

# NextGenSeqUtils.maxfreq — Function.

maxfreq(vec)

Return the frequency of the most common element in vec.

source

# NextGenSeqUtils.freq — Function.

freq(vec, elem)

Return the frequency of given element in given array; if the element is not present, return 0.0.

source

# NextGenSeqUtils.sorted_freqs — Function.

sorted_freqs(vec)

Return tuples of (freq, elem) of unique elements of vec in order of decreasing frequency.

source

# NextGenSeqUtils.freq_dict_print — Function.

freq_dict_print(dictin; thresh=0)

Prints frequency:element of elements of dictin above given threshold, where dictin is a proportionmap of elements (see proportionmap in StatsBase).

source