Skip to content

Utility and Misc. Functions

# NextGenSeqUtils.print_fastaFunction.

print_fasta(seqs, names)

Prints fasta format to the terminal, for copypasting into alignment/blast etc.

source

# NextGenSeqUtils.degapFunction.

degap(s::String)

Returns given string without '-' gap symbols.

source

degap(s::DNASequence)

Returns given string without '-' gap symbols.

source

# NextGenSeqUtils.dash_countFunction.

dash_count(inStr::String)

Counts number of gap symbols '-' in given string.

source

# NextGenSeqUtils.single_gapFunction.

single_gap(str::String, ind::Int)

True if str has a single gap '-' at index ind, else false.

source

# NextGenSeqUtils.single_mod_three_gapFunction.

single_mod_three_gap(str::String, ind::Int)

True if str has a gap length of 1 mod 3 at given index.

source

# NextGenSeqUtils.seq_detailsFunction.

seq_details(fasta_path)

Gives names, sequences, error rates, and lengths from given filepath, which may end in '.fasta' or '.fastq'.

source

# NextGenSeqUtils.print_rgbFunction.

print_rgb(r, g, b, t)

Prints in colors r,g,b to terminal.

source

# BioSequences.reverse_complementFunction.

reverse_complement(seq)

Make a reversed complement sequence of seq.

Ambiguous nucleotides are left as-is.

source

reverse_complement(kmer::Kmer)

Return the reverse complement of kmer

source

reverse_complement(dna_string::String)

Returns the complement of the reverse of given nucleotide sequence.

source

# NextGenSeqUtils.print_diffsFunction.

print_diffs(s1, s2; width=5, prefix="")

Prints two already aligned sequences with differences in color to terminal.

source

# NextGenSeqUtils.trim_ends_indicesFunction.

trim_ends_indices(seq, ref; edge_reduction=0.1)

Align seq to ref with default low penalties for gaps on ends, and trim insertions on the ends of seq. Returns (start, stop) indices.

source

# NextGenSeqUtils.translate_to_aaFunction.

translate_to_aa(s::String)

Return amino acid string translation of nucleotide sequence using BioSequences conversion.

source

# NextGenSeqUtils.generate_aa_seqsFunction.

generate_aa_seqs(str::String)

Return sequence translated to amino acids in each reading frame (returns three amino acid sequences).

source

# NextGenSeqUtils.filter_by_lengthFunction.

filter_by_length(args...)

Deprecated. See length_filter.

source

# NextGenSeqUtils.length_filterFunction.

length_filter(seqs::Array{String, 1}, phreds::Union{Array{Vector{Phred},1},Void}, names::Union{Array{String,1},Void},
              minlength::Int, maxlength::Int)

Filter sequences and corresponding names and phreds (which may be nothing) by length.

source

length_filter(seqs::Array{String, 1}, minlength::Int64, maxlength::Int64)

Filter sequences by length.

source

# NextGenSeqUtils.concat_fastasFunction.

concat_fastas(filepaths::Array{String, 1}, outfile::String)

Write contents of all given files to a single .fasta file.

source

# NextGenSeqUtils.maxfreqFunction.

maxfreq(vec)

Return the frequency of the most common element in vec.

source

# NextGenSeqUtils.freqFunction.

freq(vec, elem)

Return the frequency of given element in given array; if the element is not present, return 0.0.

source

# NextGenSeqUtils.sorted_freqsFunction.

sorted_freqs(vec)

Return tuples of (freq, elem) of unique elements of vec in order of decreasing frequency.

source

# NextGenSeqUtils.freq_dict_printFunction.

freq_dict_print(dictin; thresh=0)

Prints frequency:element of elements of dictin above given threshold, where dictin is a proportionmap of elements (see proportionmap in StatsBase).

source