Struct LdBG

Source
pub struct LdBG {
    pub name: String,
    pub kmer_size: usize,
    pub kmers: HashMap<Vec<u8>, Record>,
    pub scores: HashMap<Vec<u8>, f32>,
    pub links: HashMap<Vec<u8>, HashMap<Link, u16>>,
    pub sources: HashMap<Vec<u8>, Vec<usize>>,
    pub noise: HashSet<Vec<u8>>,
    pub verbose: bool,
}
Expand description

Represents a linked de Bruijn graph with a k-mer size specified at construction time.

Fields§

§name: String§kmer_size: usize§kmers: HashMap<Vec<u8>, Record>§scores: HashMap<Vec<u8>, f32>§links: HashMap<Vec<u8>, HashMap<Link, u16>>§sources: HashMap<Vec<u8>, Vec<usize>>§noise: HashSet<Vec<u8>>§verbose: bool

Implementations§

Source§

impl LdBG

Source

pub fn new(name: String, kmer_size: usize) -> Self

Source

pub fn from_file(name: String, kmer_size: usize, seq_path: &PathBuf) -> Self

Create a de Bruijn graph (and optional links) from a file path.

§Arguments
  • name - A string representing the name of the graph.
  • kmer_size - The k-mer size.
  • seq_path - A path to the sequence file.
§Returns

A new instance of LdBG.

§Panics

This function will panic if it cannot open a file.

Source

pub fn from_files( name: String, kmer_size: usize, seq_paths: &Vec<PathBuf>, ) -> Self

Create a de Bruijn graph (and optional links) from many file paths.

§Arguments
  • name - A string representing the name of the graph.
  • kmer_size - The k-mer size.
  • seq_paths - Paths to sequence files.
§Returns

A new instance of LdBG.

§Panics

This function will panic if it cannot open a file.

Source

pub fn from_sequence(name: String, kmer_size: usize, fwd_seq: &Vec<u8>) -> Self

Create a de Bruijn graph (and optional links) from a sequence.

§Arguments
  • name - A string representing the name of the graph.
  • kmer_size - The k-mer size.
  • fwd_seq - A forward sequence.
§Returns

A new instance of LdBG.

Source

pub fn from_sequences( name: String, kmer_size: usize, fwd_seqs: &Vec<Vec<u8>>, ) -> Self

Create a de Bruijn graph (and optional links) from a list of sequences.

§Arguments
  • name - A string representing the name of the graph.
  • kmer_size - The k-mer size.
  • fwd_seqs - A vector of forward sequences.
§Returns

A new instance of LdBG.

Source

pub fn name(&self) -> &String

Get the name of the graph.

§Returns

A reference to the name of the graph.

Source

pub fn verbose(self, verbose: bool) -> Self

Build the links for a de Bruijn graph from a vector of sequences.

§Arguments
  • k - The k-mer size.
  • fwd_seqs - A vector of forward sequences.
§Returns

A map of links.

Source

pub fn remove(&mut self, kmer: &[u8]) -> Option<Record>

Source

pub fn score_kmers(self, model_path: &PathBuf) -> Self

Score k-mers using a Gradient Boosting Decision Tree model.

§Arguments
  • model_path - A path to the model file.
§Returns

A new instance of LdBG with updated k-mer scores.

§Panics

This function will panic if it cannot load the model from the specified path.

Source

pub fn infer_edges(&mut self)

Source

pub fn correct_seqs(&self, seqs: &Vec<Vec<u8>>) -> Vec<Vec<u8>>

Source

pub fn correct_seq(&self, g: &DiGraph<String, f32>, seq: &[u8]) -> Vec<u8>

Source

pub fn correct_seq_old(&self, seq: &[u8]) -> Vec<Vec<u8>>

Source

pub fn assemble(&self, kmer: &[u8]) -> Vec<u8>

Starting at a given k-mer, assemble a contig.

§Arguments
  • kmer - A slice representing the starting k-mer.
§Returns

A vector containing the assembled contig.

§Panics
  • This function will panic if the k-mer length does not match the expected length.
Source

pub fn assemble_all(&self) -> Vec<Vec<u8>>

Assemble all contigs from the linked de Bruijn graph.

§Returns

A vector of contigs.

§Panics
  • This function will panic if the node weight for a unique node cannot be retrieved.
  • If self.kmers.get(cn_kmer).unwrap() returns None.
Source

pub fn assemble_at_bubbles(&self) -> Vec<Vec<u8>>

Assemble contigs at superbubbles in the graph.

This function traverses all k-mers in the graph, identifies superbubbles, and assembles contigs from unique nodes within these superbubbles.

§Returns

A vector of contigs, where each contig is represented as a vector of bytes.

§Panics
  • This function will panic if the node weight for a unique node cannot be retrieved.
  • if g.node_weight(*unique_node).unwrap().as_bytes() returns None.
Source

pub fn clean(self, threshold: f32) -> Self

Source

pub fn clean_color_specific_paths(self, color: usize, min_score: f32) -> Self

Clean color-specific paths from the graph based on a minimum score threshold.

This function removes paths that are specific to a given color and have a score below the specified threshold.

§Arguments
  • color - The color index to filter paths by.
  • min_score - The minimum score threshold for paths to be retained.
§Returns

A new instance of the graph with the specified paths removed.

§Panics

This function will panic if the assemble_forward or assemble_backward methods fail to assemble a contig.

Source

pub fn clean_branches(self, min_score: f32) -> Self

This function removes tips from the graph. A tip is defined as a k-mer with an in-degree or out-degree of 0.

§Arguments
  • max_tip_length - The maximum length of a tip to remove.
  • min_score - The minimum score threshold for k-mers to be considered part of a tip.
§Returns

The modified de Bruijn graph with tips removed.

§Panics
  • This line could panic if self.kmers.get(cn_kmer).unwrap().in_degree() returns None, meaning the in-degree of the k-mer is not available.
Source

pub fn clean_tangles(self, color: usize, limit: usize, min_score: f32) -> Self

Clean tangles from the de Bruijn graph.

This function identifies and removes tangles from the de Bruijn graph based on a specified color, traversal limit, and minimum score threshold. A tangle is defined as a region in the graph where the in-degree and out-degree of a k-mer sum to 4 or more, indicating a complex branching structure.

§Arguments
  • color - The color to filter k-mers by.
  • limit - The maximum number of nodes to traverse before giving up.
  • min_score - The minimum score threshold for k-mers to be considered part of a tangle.
§Returns

The modified de Bruijn graph with tangles removed.

§Panics
  • This line could panic if g.node_weight(node) returns None, meaning the node does not have an associated weight.
  • This line could panic if crate::utils::canonicalize_kmer(current_kmer) encounters an unexpected input that it cannot process.
  • This line could panic if crate::utils::canonicalize_kmer(kmer) encounters an unexpected input that it cannot process.
Source

pub fn clean_hairballs(self) -> Self

Source

pub fn clean_tips(self, limit: usize, min_score: f32) -> Self

This method will remove tips that have a score below the specified minimum score. A tip is defined as a region of the graph where there is only one path from the source to the sink.

§Arguments
  • limit - The maximum number of nodes to traverse before giving up.
  • min_score - The minimum score for a tip to be kept.
§Returns

A new LdBG with the specified tips removed.

§Panics

If self.kmers.get(cn_kmer) returns None, the call to unwrap() will cause a panic.

Source

pub fn clean_superbubbles(self, color: usize, min_score: f32) -> Self

This method will remove bubbles that have a score below the specified minimum score. A bubble is defined as a region of the graph where there are two paths from the same source to the same sink.

§Arguments
  • min_score - The minimum score for a bubble to be kept.
§Returns

A new LdBG with the specified bubbles removed.

§Panics
  • When calling unwrap on the result of g.node_weight(*node). If the node does not exist in the graph, this will cause a panic.
  • When calling unwrap_or on the result of self.scores.get(&cn_kmer). If the canonical k-mer is not found in the scores, it will return the default value 1.0 instead of panicking.
Source

pub fn clean_contigs(self, min_contig_length: usize) -> Self

This method will remove contigs that are shorter than the specified minimum length and that are not connected to the rest of the graph.

§Arguments
  • min_contig_length - The minimum length of a contig to keep.
§Returns

A new LdBG with the specified contigs removed.

§Panics

This method will panic if the k-mer size is not set.

Source

pub fn traverse_kmers_until_condition<F>( &self, start_kmer: &[u8], color: usize, limit: usize, stopping_condition: F, ) -> DiGraph<String, f32>
where F: Fn(&[u8], usize, &Self) -> bool,

Source

pub fn traverse_kmers(&self, start_kmer: &[u8]) -> DiGraph<String, f32>

Traverse kmers starting from a given kmer and build a graph.

Source

pub fn traverse_contigs(&self, start_kmer: &[u8]) -> DiGraph<String, f32>

The traverse_contigs function traverses kmers starting from a given kmer and builds a directed graph of contigs. It marks all kmers in the start contig as visited, then traverses forward and backward to build the graph, ensuring that each kmer is only visited once. The function returns the constructed graph.

§Arguments
  • start_kmer - A vector of bytes representing the start kmer.
§Returns

A directed graph of contigs.

§Panics
  1. Unwrapping Option values:

    • If graph.node_weight(node) returns None, the call to unwrap() will panic.
    • If self.last_kmer(this_contig) or self.first_kmer(this_contig) returns None, the call to unwrap() will panic.
    • If visited.get(&canonical_kmer) returns None, the call to unwrap() will panic.
  2. Indexing operations:

    • If contig.windows(self.kmer_size) is called with a kmer_size larger than the length of contig, it will panic.
Source

pub fn traverse_all_kmers(&self) -> DiGraph<String, f32>

Traverse all kmers in the graph and return a new graph with all kmers merged. This function is useful for collapsing kmers that are separated by bubbles. The new graph will contain all kmers as nodes and edges between kmers as weights.

§Returns

A new graph with all kmers merged.

§Panics

Panics if the graph is not a directed graph.

Source

pub fn traverse_all_contigs(&self) -> DiGraph<String, f32>

Traverse all contigs in the graph and return a new graph with all contigs merged. This function is useful for collapsing contigs that are separated by bubbles. The new graph will contain all contigs as nodes and edges between contigs as weights.

§Returns

A new graph with all contigs merged.

§Panics

Panics if the graph is not a directed graph.

Trait Implementations§

Source§

impl Clone for LdBG

Source§

fn clone(&self) -> LdBG

Returns a copy of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for LdBG

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

§

impl Freeze for LdBG

§

impl RefUnwindSafe for LdBG

§

impl Send for LdBG

§

impl Sync for LdBG

§

impl Unpin for LdBG

§

impl UnwindSafe for LdBG

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dst: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dst. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

§

impl<T> Instrument for T

§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided [Span], returning an Instrumented wrapper. Read more
§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
§

impl<T> Pointable for T

§

const ALIGN: usize

The alignment of pointer.
§

type Init = T

The type for initializers.
§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
§

impl<SS, SP> SupersetOf<SS> for SP
where SS: SubsetOf<SP>,

§

fn to_subset(&self) -> Option<SS>

The inverse inclusion map: attempts to construct self from the equivalent element of its superset. Read more
§

fn is_in_subset(&self) -> bool

Checks if self is actually part of its subset T (and can be converted to it).
§

fn to_subset_unchecked(&self) -> SS

Use with care! Same as self.to_subset but without any property checks. Always succeeds.
§

fn from_subset(element: &SS) -> SP

The inclusion map: converts self to the equivalent element of its superset.
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

§

fn vzip(self) -> V

§

impl<T> WithSubscriber for T

§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a [WithDispatch] wrapper. Read more
§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a [WithDispatch] wrapper. Read more
§

impl<T> Allocation for T
where T: RefUnwindSafe + Send + Sync,