skydive/
utils.rs

1use std::borrow::Cow;
2
3use std::collections::HashMap;
4use std::fs::File;
5use std::io::{BufRead, BufReader};
6use std::path::{Path, PathBuf};
7
8use needletail::Sequence;
9use parquet::data_type::AsBytes;
10use petgraph::graph::DiGraph;
11use petgraph::visit::{EdgeRef, NodeIndexable, NodeRef};
12
13/// This function takes a sequence URL and a list of possible extensions, and returns the base name of the file
14/// without any of the provided extensions. It does this by first extracting the last segment of the URL path,
15/// and then iteratively removing any of the specified extensions from the end of the base name.
16///
17/// # Arguments
18///
19/// * `seq_url` - A reference to a URL object representing the sequence file URL.
20/// * `extensions` - A slice of string slices representing the possible file extensions to be removed.
21///
22/// # Returns
23///
24/// * A `String` containing the base name of the file without any of the specified extensions.
25///
26/// # Panics
27/// This function will panic:
28/// 1. If `seq_url.path_segments()` returns `None`, indicating that the URL does not have a path.
29/// 2. If `seq_url.path_segments().map(|c| c.collect::<Vec<_>>()).unwrap().last()` returns `None`,
30/// indicating that the path does not have any segments.
31#[must_use]
32pub fn basename_without_extension(seq_url: &url::Url, extensions: &[&str]) -> String {
33    let mut basename = seq_url
34        .path_segments()
35        .map(|c| c.collect::<Vec<_>>())
36        .unwrap()
37        .last()
38        .unwrap()
39        .to_string();
40
41    let mut sorted_extensions = extensions.to_vec();
42    sorted_extensions.sort_by_key(|b| std::cmp::Reverse(b.len()));
43
44    for ext in sorted_extensions {
45        basename = basename.trim_end_matches(ext).to_string();
46    }
47
48    basename
49}
50
51/// Given fasta files this function will read and return a list of lists containing the contents
52/// of the fasta files
53///
54/// # Arguments
55/// * `path`: paths to fasta files
56///
57/// # Returns
58/// A list of lists containing the contents of the fasta files
59///
60/// # Panics
61/// This function will panic if it cannot read a given file path
62#[must_use]
63pub fn read_fasta(paths: &Vec<PathBuf>) -> Vec<Vec<u8>> {
64    paths
65        .iter()
66        .map(|p| {
67            let reader = bio::io::fasta::Reader::from_file(p).expect("Failed to open file");
68            reader
69                .records()
70                .filter_map(|r| r.ok())
71                .map(|r| r.seq().to_vec())
72                .collect::<Vec<Vec<u8>>>()
73        })
74        .flatten()
75        .collect::<Vec<Vec<u8>>>()
76}
77
78#[must_use]
79pub fn default_hidden_progress_bar() -> indicatif::ProgressBar {
80    indicatif::ProgressBar::hidden()
81}
82
83/// Create a new bounded progress bar with the specified message and length.
84/// The progress bar will be a bar with a spinner.
85/// The progress bar will display the elapsed time, the progress bar, the current position,
86/// the total length, and the estimated time remaining.
87///
88/// # Arguments
89/// * `msg`: The message to display on the progress bar.
90/// * `len`: The total length of the progress bar.
91///
92/// # Returns
93/// A new bounded progress bar.
94///
95/// This will create a new bounded progress bar with the message "Processing sequences" and a total length of 100.
96/// The progress bar will be a bar with a spinner.
97///
98/// # Panics
99/// This function will panic if the progress bar style cannot be created.
100pub fn default_bounded_progress_bar(
101    msg: impl Into<Cow<'static, str>>,
102    len: u64,
103) -> indicatif::ProgressBar {
104    let progress_bar_style = indicatif::ProgressStyle::default_bar()
105        .template(
106            "{msg} ... [{elapsed_precise}] [{bar:40.white/white}] {human_pos}/{human_len} ({eta})",
107        )
108        .unwrap()
109        .progress_chars("#>-");
110
111    let progress_bar = indicatif::ProgressBar::new(len);
112    progress_bar.set_style(progress_bar_style);
113    progress_bar.set_message(msg);
114
115    progress_bar
116}
117
118/// Create a new unbounded progress bar with the specified message.
119///
120/// # Arguments
121/// `msg`: The message to display on the progress bar.
122///
123/// # Returns
124/// A new unbounded progress bar.
125///
126/// This will create a new unbounded progress bar with the message "Processing sequences".
127/// The progress bar will be a spinner.
128///
129/// # Panics
130/// This function will panic if the progress bar style cannot be created.
131///
132pub fn default_unbounded_progress_bar(msg: impl Into<Cow<'static, str>>) -> indicatif::ProgressBar {
133    let progress_bar_style = indicatif::ProgressStyle::default_bar()
134        .template("{msg} ... [{elapsed_precise}] {human_pos}")
135        .unwrap()
136        .progress_chars("#>-");
137
138    let progress_bar = indicatif::ProgressBar::new_spinner();
139    progress_bar.set_style(progress_bar_style);
140    progress_bar.set_message(msg);
141
142    progress_bar
143}
144
145/// Get the canonical (lexicographically-lowest) version of a k-mer.
146///
147/// # Arguments
148///
149/// * `kmer` - A slice representing the k-mer.
150///
151/// # Returns
152///
153/// A vector containing the canonical k-mer.
154#[inline(always)]
155#[must_use]
156pub fn canonicalize_kmer(kmer: &[u8]) -> Vec<u8> {
157    let rc_kmer = kmer.reverse_complement();
158    if kmer < rc_kmer.as_bytes() {
159        kmer.to_vec()
160    } else {
161        rc_kmer.as_bytes().to_vec()
162    }
163}
164
165#[must_use]
166pub fn homopolymer_compressed(seq: &[u8]) -> Vec<u8> {
167    let mut compressed = Vec::new();
168    let mut prev = None;
169
170    for &base in seq {
171        if Some(base) != prev {
172            compressed.push(base);
173        }
174        prev = Some(base);
175    }
176
177    compressed
178}
179
180#[must_use]
181pub fn shannon_entropy(seq: &[u8]) -> f32 {
182    let mut freq = HashMap::new();
183    let len = seq.len() as f32;
184
185    for &base in seq {
186        *freq.entry(base).or_insert(0) += 1;
187    }
188
189    -freq
190        .values()
191        .map(|&count| {
192            let p = count as f32 / len;
193            p * p.log2()
194        })
195        .sum::<f32>()
196}
197
198#[must_use]
199pub fn gc_content(seq: &[u8]) -> f32 {
200    let gc_count = seq
201        .iter()
202        .filter(|&&base| base == b'G' || base == b'C')
203        .count();
204    gc_count as f32 / seq.len() as f32
205}
206
207/// Writes a GFA file from a directed graph.
208///
209/// # Arguments
210///
211/// * `writer` - A mutable reference to an object implementing the `Write` trait.
212/// * `graph` - A reference to a directed graph where nodes are sequences and edges are links with weights.
213///
214/// # Returns
215///
216/// A `Result` which is `Ok` if the file was written successfully, or an `Err` if an I/O error occurred.
217///
218/// # Errors
219///
220/// This function will return an error if any I/O operation fails.
221///
222/// # Panics
223///
224/// This function will panic if:
225/// 1. The file cannot be opened.
226/// 2. Any line in the file cannot be read.
227pub fn write_gfa<W: std::io::Write>(
228    writer: &mut W,
229    graph: &DiGraph<String, f32>,
230) -> std::io::Result<()> {
231    // Write header
232    writeln!(writer, "H\tVN:Z:1.0")?;
233
234    // Write segments
235    for (node_index, node_label) in graph.node_indices().zip(graph.node_weights()) {
236        writeln!(writer, "S\t{}\t{}", node_index.index(), node_label)?;
237    }
238
239    // Write links
240    for edge in graph.edge_references() {
241        let (from, to) = (edge.source().index(), edge.target().index());
242        let weight = edge.weight();
243        writeln!(
244            writer,
245            "L\t{}\t+\t{}\t+\t0M\tRC:f:{}",
246            from,
247            to,
248            (100.0 * weight).round() as u8
249        )?;
250    }
251
252    Ok(())
253}
254
255/// Reads a GFA file and constructs a directed graph from it.
256///
257/// # Arguments
258///
259/// * `path` - A path to the GFA file.
260///
261/// # Returns
262///
263/// A `DiGraph` where nodes are sequences and edges are links with weights.
264///
265/// # Errors
266///
267/// This function returns an error if the file cannot be opened or read.
268///
269/// # Panics
270///
271/// This function will panic if:
272/// 1. The file cannot be opened.
273/// 2. Any line in the file cannot be read.
274pub fn read_gfa<P: AsRef<Path>>(path: P) -> std::io::Result<DiGraph<String, f32>> {
275    let file = File::open(path)?;
276    let reader = BufReader::new(file);
277    let mut graph = DiGraph::new();
278    let mut node_map = HashMap::new();
279
280    for line in reader.lines() {
281        let line = line?;
282        let fields: Vec<&str> = line.split('\t').collect();
283
284        match fields[0] {
285            "S" => {
286                let id = fields[1];
287                let sequence = fields[2].to_string();
288                let node_index = graph.add_node(sequence);
289                node_map.insert(id.to_string(), node_index);
290            }
291            "L" => {
292                if fields.len() < 6 {
293                    continue; // Skip malformed lines
294                }
295                let from_id = fields[1];
296                // let from_orient = fields[2];
297                let to_id = fields[3];
298                // let to_orient = fields[4];
299
300                let weight = fields
301                    .get(5)
302                    .and_then(|s| s.split(':').last())
303                    .and_then(|s| s.parse::<f32>().ok())
304                    .unwrap_or(1.0);
305
306                if let (Some(&from), Some(&to)) = (node_map.get(from_id), node_map.get(to_id)) {
307                    graph.add_edge(from, to, weight);
308                }
309            }
310            _ => {} // Ignore other lines
311        }
312    }
313
314    Ok(graph)
315}
316
317#[cfg(test)]
318mod tests {
319    use sdust::symmetric_dust;
320
321    use super::*;
322
323    #[test]
324    fn test_canonicalize_kmer() {
325        let kmer1 = b"CGTA";
326        let kmer2 = b"TACG";
327        let kmer3 = b"AAAA";
328        let kmer4 = b"TTTT";
329
330        // Test canonical k-mer for kmer1 and kmer2
331        assert_eq!(canonicalize_kmer(kmer1), b"CGTA".to_vec());
332        assert_eq!(canonicalize_kmer(kmer2), b"CGTA".to_vec());
333
334        // Test canonical k-mer for kmer3 and kmer4
335        assert_eq!(canonicalize_kmer(kmer3), b"AAAA".to_vec());
336        assert_eq!(canonicalize_kmer(kmer4), b"AAAA".to_vec());
337    }
338
339    #[test]
340    fn test_dust() {
341        let read = b"TGGCAGCCATAGGTTTTCCCTGGAGTTGTGGCATCTGGAACTACAGGGATGAGCATTTGAGTACATATTACAGTGAGGTGGCCACACTGTGACCCGCAGTTCTGCAGACTGGAAGGCACTGAATGCCAGGATTTTTGCAGAGTGTCACTATGAAGTCCTGACTTGGCTCAGAGACCTTCTTAGAGCAGTAATTCGGGACCAGTGGATTTCTGATAAAGTTATTCTAATTTTCTAATAATTGTTTTCTAATAAAAGCCATATGGCAGGTCCTGCTCCCTTGGTAGCATGACCAGTACCTGGCGCAGTGCTAGTGCTGAGCTGACAGGAAGTGCCTCACCTTCATCTCTCACTTGACAGTGGGTGGAAGGTTCTTGGCTCGGTATCCCTCAGTCATGACTGCACACTGTCCTGAGCTTTTCTCCCAACTTCATCCACTTCATACTATTTTAATAAAGCGGTGCTGTGTATTATAACATTGTGCAGCTGAGCATTACACTCATGGCTCCCATTATCAAGCCCCTGCTATATACAGGGCATTTCACAAAGAAGCAAACTTCCAAGCAGTCACTCAGCAACCTCCTCCTAGGAGCATTTGGGGAAGAGAATCTTGGGGCAAGTTTCCTTTACCACCTGCAGTCACCTGGGATGCTGGGAAAAATTTTGATTTCTGTTGTCTTCCCTTCCAGAAAATTATTTGAGAGTGGGGCCAACAAATCTGCACTTGAGTCCATACCTAGGATAGGTTTTTCTGTGCAGTTTTTTAAGTTTAAGAGGTTTTTAAAGTTTAAGACACACTGGTTAGGGTTTTGGGCTCTGGAGGATGAGAAACCTTGCTTGGGTTATCAGATAACAGATTCTTCTCTGGTTTCCCTCCGATGTTATCAGGGGAATTGTTGGTTGTTTCACATTTGGGTGCTCCTGGGCCTTTTAAGAGCCAGGCTGGGAGGGCTGGTGATGGCAACCCTGGCTGGCAACAGAGGCTGTTTCCACCCCTGGGTGGCTCCCCACCTGCTTTCTGCCCTGGTAGGGTTCAAGGCTCCGGGAATTGGCACTCAGTGAAAGAATTTTGATTTCCAGTGGAATTTGTGCTGTCACAAGATTTGACCCATGGGACTAGTGAATAGATAGATGGGTTAGGTGAGCATGTGACTTGGCTGGTGGCCGAGAGAGTGATAAATGTGAGAGTAGCTGGGGAAAATGGAAACGGATTAAGATAGAAGAGGGGCATTGTCCATCTGGCCGATGGCAAGGGCTGGTGGAGCAGCAGTTCTAGACTATTCTGAGGTTAGTTCAGAAACTGACCTAACAACGTGGGAAGTCTCTCCCAAATTGTTTATAGTTTCTCACAGTGGGTGCCTTTTGAAGTGATTGTATTTGACAGCCCAGAGTGTTGGGCACACAGCTTTGTGCTATCTAAGGTCACGGTCCAATTGTGATTCCTAGCAATAGCTTCAAGGCATATTTCATAGCTCTAATAGTTTTCAAGTATAAGGGTGTGAGAATGAGCTTTAAGAATATTTATGCCATGAAATCTTCCAATTGCTCTTCAACACGGGTGCACCATAGTAGGTGTGAATAGAAGTGGTGGCAACAGACCTGAATTCAGGTCTGCCACTGACTATAATACTAGCTTGAGAAGTAACTTGAACTCTGTGAGCCTCAGTTTCCTGTCTGTAGAATGAAGACAATGATACTGCCTTCATAGGATTATTATTAGGATTAAATGAAATATTATAGTGAGGCATTCAGCAAAGTGTTCTATAAATTGGGGTAGGATGTGAGGTAATTGGCATTGTTAGATGCGTCTCTGGGTAAACAACCAAATTTTCTGCTTATTTGGCTGTTTCCCTAGCTGCCTTGTTTAAAACAAAACACCTGAGTTGACCAGAACACCTCTGTTTTTAGAATCTAACTTTGCAGTTGTATTAGTCTCTTCTTGCATTGCCATAAAGAAATACCTGACACCTTCATAAAGAAATGAGGTTTAACTGGCTCACGGTCCTGCAGGCTTTACAGGAAGCATGGTACTGACATCTACTTGGCTTCTAGAGAGGCCTCAGGAAGCTTACAGTCATGGCAGAAAGTGAAGCAGTAGCAGGCACATCACATGGTGAAAGCGGGAGCAAGAGAGACAGTTGGGAGAGGAGGCGCCACACACCTTTTAAACAACCAGATCTCCCAAGAACTCACTCACTATCGTGAAGATAGCACCAAGCCATGAGGGGTCTGTCCCCATGATCCAGACACCTCTCACCAGGCCCCACCTCCAGCACTGGGGATTACAATTGAGCATGAGATTTGGGTAGGGACAACTATCTGAACTGTATCAGCAATAGAGTGTGATTATAAGTTATGCTGTAGGAATAGAATTGTTGTCACTGAAAGATTCCCTTGGCCATGGGAGCCTCCTGGCTCTATGAAGGATCAGCCAATGCTTATCCAGGGAGGTAATGATAAGGTCGAAGTTTGACAAGAAATCTACGTTTTCTTAAGCTAAGTAGTAGGTTAACAGAAGATATGTTGTGTGTTAATAGTTCTATTTACATCTCTTTCTCCAAGGTTATACACACTCTGCATCACTAAGTCAAGACACCATTCTTTGACACTGGCTAATAGTAATAGCAATCATAGCCACTGTGCATTAGCACTTACTCCACATTCCTTGTACTGAGCACTACTTACATTATGTTGGTGTTGTCATTGTCACCATTTCATACATAAAGAAACCAAGGTTTTCAGAGATTGAATAACTTGTGCAAGATCACACATCTGGTAGGGCAGATCCAAGATCTGTTTGTCTCCAAAATCTGCTTCTGTCCTGCCTGGGAGACCTTGGGAATGACGGCAAGTGGTTGTAGGAAGGAGGGCTGATGTCAAGGTGGCTGTGGGGGCAGGAGGCTGAGGGAACTCACTGACCCTTGAGGGACTCCTTAGGTGGGGGATTCTGGGTTTCCTGTTGGCAGCTGGAGGGGGAGTGCCAGTTCCCATAAGTGGTTATTGCCCAGGTTGTGACCTTGGCTTGGCCAGTGATTGGTTCATTTTGGAATTTCATGAGTGACCCCCAGGCAGGGTTCTTACAATCACGCTGGAAGACCACCCAGGAAGTTCCTGTTGGGGTAAAATGATGCAGCAGCCTGCTTTCCTCAGGAGGTCTGAACCCTCCCCATGTACACACACACACACACACAAACACACACACACCCACCCACACACCCTCCACCCCTCTTGGTGTCTTTGGCCTTTTTTCCTAGCTTGTTTGTTTCTATGGTGTCTTCAAGTTCAACTAGAACCTATGGGAATGACTTAGTTTTGAACCTGTAAGAATGAGAAGTAAACAATTCTTGTACTGACTTTGAATTTCCTTTCTTCTGTTGTCCAAAGGTGAAGGGTGACAATGTGTCCCAGATTTTTTTGGATATTCTACAAAAAAATAGATATTTTTTTGTAGAAAAAAGCTTATTCTACAGTGTTGTCCCAATTTTAAAAGCCCTAGAAAACTGGTTAAGGCAAATTATAACCAAATCAAATCACTAATTATTACAATAAAGTGTAACTAGCTACAAAAATCCTAAATTACAATTTTAGGCTTTGAGAAAATATCACTGATGATAGAGGAAGAGTGACAGTCTTTGTTTTGGGTCTTGGGATGGCAGAAAGAAAATATTTAGTAGGGAGTAAAGATCAGTGTACCCCTTGAAGTGTGGTGAAGATGGGTGGGTTTTGATGCTCTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTATGTAGAATTTCCCTAAATTCAAAATGATACTTACATTTTGATAAACCCAGAGAGTTAATTAACATGGCATAAATGTGCTAGCTCATTGTTGTTTTTCAGGGAAAGAACTAGAGAGAGACACAAGTCACCCCGGAATAAGACGGCAGAGGGTCAGAAAAGTCTGTCACCATTCAACCTCCCACTGGAGAGCCCCTGTTGGGAAATGATTCTACTCGGACAGAGGAAGTTCAGGTAAGGATCAAAGGTGGTCTGTAAGCACACTGCCACTTGGCCAAGACTCCTGTCTGTAAATGTTTTCCTTAGGTTCTCTTGGATTTTTGTCTTTATTTTTCTAGCATGCTAATTAGTTTATCTATGTCCCATGGTCTCTTGTTGGTCTAAGGACAGCCTGTCCTGCCCTTTGGTAGCGTGGGGATTCTTCTGAAGTATGATTGGTTGGCCTGCTTTACATGGTGTGGAAAAGTAGCCAGCAAGGTTGACGACAGGGTTGGGAAGGGAAAAGCTGAAGTCTCCCACGACTCATTTCAAAATGGAACAGTATAAGGGGGAGAAGGAAACTCAGAAAAACCTAAGAAGTTTAAAAAACATGGGGCAGCCCAGACTGACCATTACTACTAGAGCTATGCAGGAATTGAGAGGCCGCACGCTCAGATGCCTGGTAGAGAAACGTAAGTTAATTAAGGAGGCCCTGGGTCGAAAAGAGGGGGCAAAAATATATTAACCTAGCTTTGGGTTAACAGCAATCTGTGCAGTGCCTCAGTGTCAGTGCTGTAGTGTGGTGTGAAGGAGCCTATGGCTAACTGGAGAATGCATTTCCTCTGTAAAGGAAACAGCAGCTCCGCAGCTCCAGACACCTACTGTTGCTCAGGAATGCAGGGATTCATTGTTTGAGAAAAGCTGCAAATCAGGATTTTATGTGGAATCCTAACTTTACAGTATTTTTGAAATACTGATACTTTATTAATTTTTTTGAACCATTGAGTGGGTTCTTCTCCAGGTTCCAACTGTCTGGCAGCCCACCTGATTTAAATAAACATCTGAGCTATACACAGAAACATGTCTGCATACCCTCTGCACATCCTGAAGTATATATACACATGTCCAGCCTTGCCCCTCATAAACAAAGTGGTGTATGATACACAGCTGTAAAGATAGATATAGGACTATAGATAAGCATACATCTGTACATACCTGTGCATACACACAGGTAAGCATTTATAATCAAATAGGTGGACTGAAACTGGAATTCCTCAGAGTACACAAGGTGTTCTTGGGACACCAAAACTACAATTGTGGGGTTGAACGTGGGATTCATTGAGCAATGAGCAAATGCCTTTAGTGCTGCCTGCCTTGGCTCTGGATGGCTGATGGTCGGATGGGGCCAGTCTTAGGATTGGATCACCCTGGAGTACTTGAAGGGGTCAGTTTCCTCCTGGATGTGGGTTCAGAGGTGCCAGTGGCCTACAGCAAAGGCTCTTCTTTCTCTGCATCTCCTCTGCACCTCGTAGCTGAGAACACTTTGAGAAGCTCTTGGTGTTGCCCCAGGATGATCTGGTGTGAAAAGCATTGAGATGGGTGTTTGGAGGCTGTATTTTTTAGTAGCTCTGTTACCTTGAGCAGTCACAGCCTTTGTAGGCCTCAATTTCTTTATTGAAAATCTAGGGTTTTGATGAAAGCATCTTAGGTGCTTTTTCTTCTAAGAACCTGAAGCTTAACAGGATCCTTTGTGTATCTACATGTTTTAGGCATACATGTGCACCCCAGGAAATTCTCTCATGCCCTTTCTAGTCAATCTCTGCCCCACCCTCACCTCTCCAAGGCAACCACTGTGTTGATTTCTATCACTGTAGATCAATTTTGCCTGTTTTTGAATTTAGTATAATTAGAATCATATGGTCCATCTCCTGGCCCCCACCCACCCGCCCTGCTTAGCATAATGATTTTGAGATTTATCCATGTTTTGGTATGGTTTCAACAGCTTATTCTTTTTATTTTTGCTGAGTAGTATTCCATTGTATCAGTCTACCACAATTTGTTATCCATTCTCCTAGTGGATGGACATTTGGGTTTTTTTTGTTGTTGTTTGTTTGTTTTTTGAGGCAGAGTCTTGTTCTGTCGCCCAGGCTGGAGTGCAGCGCATGATATCACCTCACTGCAGCCTCTACCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAATAGCCGGGGATTACAGGCACGCACCACCACGCCCAGCTAATTTTTGTATTTTCAGTAGAGATGGGCTTTCACCATGTTGGCCAGGCTTGGTCTCGAACTCCTGACCTCAGGTGATTTGCCTGCCTTGGCCTCCCAAAGTGCTAGGATTACAGGCGTGAGCCACCGTGCCTGGCTGGATGTATATTTTTATTAATTTTTGGATAAAACCTGAGAGTGGAATTGCTGGGCCATATAGCTAAGTGTATATTTAGATTTATATGAAACCGCCAGAGTGGTTTTCCAGAGGCACTGTACCACGGTCCACTTCCACCAGCAGTGTTGGAGAGTCCTGGCTGCTTCTGGCCATCGTCTGAAATAGGAATTTCTCTCACTGTAGGTGATACTTCTGACTTTGCAAGTTGAAGGATTATTAGTTTATGGGATTGAGACCTTCACCACCACCACTTCTTACCATAGCCCATACATTTCATAAATCATGGTTTTTTTGGTCATTACTAGATTCGGAGTTATTTGATGATGAGCGATGTCTGTCTTGCTGATTTAGCTACTAACTGAAACTAGCTTTTTCTAAGTTGGTGTCCTAATTTCACCCCCTTTGCCACTGCATCTGACTGTTTTCTTTCCGAGTGAAAGGATACATACAAATTTCAGAGGCAGAAACCTCTTTGGCCTCCTGTGTCTTTTCAGCGCCTTGCTCTTATTGCTTCATTATTGTTGCCAGTTGGTTTTTAAACAACAAAATCCTTTAAAATTCTATCAACTGGGTTTTGCTAAGTGAATAGACTAATTGCTTTAACTAGCAACGGCCTAGAAGTTTAAAAAGAGAGGAAGCTAGAAAGTAAAAGATAACATTTTAATAATCCTGGTTGTTTCTATGCCCTTGATGTTTAGTTCCTCGTGAAAACATGTTTTAGAAAGAATTTTTAAGCCAATCTGGCCATACACGGATTCCTGGATTTGCTTAGCTTGGTCCATGAGAAATATTGTTAAAGAGTGCTTGACACTGATGCTTGTTAAGTGGATCTTGTGAACATCATAAGGAGATTTTTTTTTTTTTTTTTGAGACGGAGTCTCACTCTGTAGCCCAGGCTGGAGTACAGTGGCACGATCTCAGCTCACTGCAACCTCCGACTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCATGTGCCACCACGCCTTGCTAATTTTTGTATTTTTAGTAGAGACGGGGGTTTCACCATGGTTGGCCAGGATGTTCTTGATCTCCTGACCTCCTGATCCGTCCGCCTCGGCCTTCCAAAGTACGGGGATTACAGGTGTGAGCCACCGCGCCCCAGTCCATAAAGAGATTTTAAAATGTGGGTCCTAGCTACAGGTAAGCTTGGGTTTGTGTAGTGGTGTAAGTTCCCTTGCTACGCCCTTTGCTCTTCTGGGCTGCTAGAGGGTGTAGTAATACTCCCACCTCCAAAAGTTGGACTTCGTAAGCCTTTATAACCCAGCGTGAATTGGAAAGAAGATGCAGGAGGTTTATCTCTATAGATGAGCTCTCACCAAGATTAGTCTAATACCTGGGTTGCGCATTGCAGGGCAAACAGCTCCAGGCCCTCAGAGCTGCTCAAGGCTTTTCAACCCAGGGGATGATAATCAATGTTATGTCAATGAATCAGCCAAACAGACAGAAAGATCACATTATGTTTTCTCTGTTTGAAAGGTAAATACCTCATACATTTTGAAAATTTCAATGAAAATCGTTTGAGTTAAGAAGTTCTAATATTTAAAGAGTTAAGCCTTTCATTTTCTGGAAGCCTTTGTGAATAGGGCTGGGTAGATGCAGCGGGCCCTGCATGTTCACTGCCCTTTGTAGCTTTTACAAATGACCTGTGTCATGTCATCCTCACTGTCTTCTCCCCACCAGGATGACAACTGGGGAGAGACCACCACGGCCATCACAGGCACCTCGGAGCACAGCATATCCCAAGAGGACATTGCCAGGATCAGCAAGGACATGGAGGACAGCGTGGGGCTGGATTGCAAACGCTACCTGGGCCTCACCGTCGCCTCTTTTCTTGGACTTCTAGTTTTCCTCACCCCTATTGCCTTCATCCTTTTACCTCCGATCCTGTGGAGGATGAGCTGGAGCCTTGTGGCACAATTTGTGAGGGGCTCTTTATCTCCATGGCATTCAAACTCCTCATTCTGCTCATAGGGACCTGGGCACCTTTTTTTCCGCAAGCGGAGAGCTGACATGCCACGGGTGTTTGTGTTTCGTGCCCTTTTGTTGGTCCTCATCTTTCTCTTTGTGGTTTCCTATTGGCTTTTTTACGGGGTCCGCATTTTGGACTCTCGGGACCGGAATTACCAGGGCATTGTGCAATATGCAGTCTCCCTTGTGGATGCCCTCCTCTTCATCCATTACCTGGCATCGTCCTGCTGGAGCTCAGGCCAGCTGCAGCCCATGTTCACGCTGCAGGTGGTCCGCTTCCACCGATGGCGAGTCCCGCTTCTACAGCCTGGACACCTGAGGTAAGAGGCAACATCCAGGAGGCAGAAAGGATGGCTGATGTCTTGCTGGGAGACAGCTGCTCTGTAGCACGTGAGGGGTGGTGACAGATGCCAAGAGCTAGGACCAGAGTCTGACTCTTTTCTGGTTTTGGGGAGGAGATGCGAGGGTGGGGAGGGTTGTCCATGTTCATTGAGTTTCTGGACTTCTAGATGGTGCGGGGCAGTTGCTGGCTCTCACCCAGGTTGAGATTTTGCTGGGCTTGTTCTCAAAGTTATTGGCAGCTCCCAAAAATGATGGAGAAAGGAGATGCATAGTGATGGCTGCCTTCTTTGACTCTGAAATTGGCCAATGGACAACAGATAAAGTGACCAGCAGCTCCATTTTGTCCCAAATGTGACATCTGGTTTACCATGTTGTCCCAGTGGAATAATGAATTGTTCCTTTTTTCCCACTCTCAGAGGCCTGGTTTGGGCAGTAAATTATATGGTCATCCGAGGGACCCTTCCAATAAAGAATCAAGTGCAGGTTAGAGACTCCAAATGTGTAATCCTTGAGTGTTGTGAAAATGTATGCCGTGAGAAAAAGTTAGAAGTCAGTTGGGTTGTCATACTTACATCTTTGCATAAAATCTCATTATTTTGTGGTTAAATAAGAGTGATTACCATCATTTTATTTGCTTCAAGGTAAGCACTTTATATATAGATTGTGTATTTAGTCTTCATAGAACCCGTGACCTAGGTATTATTAATCCCTGTGTCACAGATGAAGAAACTAGGGCTTAGGGGATTTAAGTAATTTGCACAAAAACATATGGCTAGCCTCATTTAGGATTCACTCAGATGTCATGAGGCCAGGGCTGAGTGAATGCCCCCATAATGGCATCTCTCACTTTGTGGTTAGTGGCCTATTTTTCCATCTGTTTTCTTCCACAGACTATGAACTCCCTGAGGCCAGGGGCCACCCTTATACCTCATTACATCCTCAGTGCCTGGCATGGAGCATGGCTTGCACTGAGATGTTCTCTGGGTGAATGCAGAGCCTGGGACATTTGACTTCAAAGCCTTTACCCTCTCCCAGGCTCTCTGCCTCCTTAGGCAGTATATGCTGATGTGTGTAGCCTGCTTGGGGCAGGGTAGGCACTTAGTTCATTGCAGCTATTACTGCTGTGATCATGTAGCTGGCAGAGCAGCCAGAATCAGCAAGGGCACACCTTAGTGGGTATCAGAACAATCGGCTTTGTCATAGATTTGGCTGGGCTCCAGGAAGGTGGCTCAGCCTGTATTTGGAGTCAGGCCATGCTGCCAAACCATCTTCATGTTGGTGTGTACCCCCTCCTCCATTCCTCTGGCTTGGCTTGTGCTACGAGAACGGGATGATCTAGCGTTCAAGGTTGCTGCCACCCTAACTGATCCTTGGTGGAAACTGGTGTCCAAGTCACATGTCTGTGCACCAAAAATCTGGGGTTTAGAGTCCTTTCACAGATGCCTGTAGGGCTCTGAAGACAAGTAGGTCACCGCTTTGCTGCATATTCATCTCAGAAGGCTTTCTTTTCCCATGTTTTGCATCAGGGAATGACCAGCAGTTTTGTGTTAAACATCTGCTGTGTGCAGAGCCCTTGGACACACCAGGCTGGCTGCCTTCAGAGCTCTATCTCAGCACCTGTGGCACTCACAGTCACTTGGAAAGAGACCAGTGCACCGCTGTCTGGTGGACAGGTTTCCAGGAAACAGGCCTGGGGGTATAGGTGATAGGAACACAGGAGGACAGAGAATTTCAGATTGTGGCAGCAATAAAGCCGAGCAGGGAGACAGTCTGTCTCAGAACAGGTTTTGCTGCAGTTAAAGTGGTAGAGAAAATCCGGCTGTGGTCTCAGTGGAGATGAATGATATTTGGAACTCTGTATATGTAAGTAGCCAAGACACTTGGCCAGGAGTGAGGTCTATGGTGGTTTTGTTTTTTGGCCCTTAGCCCTAGTTGGTGTGAATTCCACCTGTGTAGGTGGGAAAGGGCAGGGCATCTTCTCACCATAGGTCATGCAGGGTGGTGGGACCGACTTACCCCCATGGGCTCCCACATCGCTCCTCCCTGTACGACTGGTTGAGCTGCACACTGCATCTGAGTGGGAGTGGAGAGGGGACAAACCAAACAGCCCGAGGAAAGTATGCCTGTGGCATGTTCAGGAAAGCATGATTAGCAGCAGGCCCTCGCCTCCCACCACACAGCTCTGCTGGTCAGGGCAGAGCTGGATGGGAGAAGCCAGACTGATTGTGCTGCATGGCTCCCAGGCTTGCTGCAAACCTTTCAGTCTGCTCTTACCATGACCAACAACTGTCCAGGCTTTTAAAAAACTCAAGTCAGTCACCCCAGCTCCCCAGGGAGAACTGAAAGGTGGCAAGTGCCCATCTGCCCTGGGGAGAGCGTTTTGAGGTTGGTCCCCAGCCTCATCCTTTCGGCTTCTTTTTAGGACCATTGGTGTTCCTCCTCTCCCTGCCTTTAATAAGGCCCCCTTTGTCCCTCTCGTGGAGAGCCTGAGTTAGGAGGTGGAAAGAATGGCTGGGGAAAGAGGGACATCTTTACTGACAAATGGAGCCCTCAGGGAGAGCCAGATGCCCAAGTGTCAGCCAGTCTGCCAGAAGCTGGAGCAGGCTTGGCACCTTTCCTCCTGGCATTGTGTGGGCCTGGTCACCTGCCGATCCTTGGGCTAAATCTGGTCTGAACCCAGCAGTGGCTGGAAGAGTTACTAGGCCAGAAATACAACTTCTAAGGCCTTTTGTAAGTGTAGAAACAGACAGGAGGGAAGAGGGAGCGGGAATAGACAAAGCAAGCCTCGGAAATCAGAATAGCAGGTCTCCAATTAGACCCAGCAGAATCACAGGCTGTTGGCTCTCCCTTTATGTAAAGCCTTCACCGTGGCAGCACCCTATTGGGCTTAGGTGCCAAGCGATGGTGAGTTCTTTTTTATGTTTTCAAAGATGATTTTATCGAATTGACTGAGCTATTTTTGAGAGTTGTCTAAAGAATGTCTACTCTTTAGTTTCTTAAAGAAAATAGGCTTCTCATTAGTTCATAAAAGGTGCTTGCTGTGGCCCTGCTTGTTGGCAGGAATGAAGTTTTGGGCTTATTTGAAAACTTTCAAAAATGTAAAAAGTTGTTGCAGAAAGTAAGATACCATAAATAGATTGAGATACTTCCTAACCTCTGCCCAGTGCCCTAGGAGTTATGAAAAGCTTTTCATAGGTTTGGACTCATTTACCCCTCCTTGCTGGCCCTGTCAGAGGTCAGAGCAGTGGGGTAGAGGTGTCCCCTCTTACAGTTGAGGTCCCCAACCCCCAGGGTAAAGGGACCTGCTCAAGGTCACAGGGAATCAGCACCTTCCTGTGCATCACACCGCTTACCCCACCGCACTTTCTACAGCGTCCTGGTGTCTCACACAGTCGCTTTGTCATTTTCCATACACACCTTGCTCGTCACTTTTCTTGGCCCCGTTCTCACAATAAGTTGCTAACTTTTCCAGGATGTTACCAGAGACTAATGACTGTTGATATGACTTTATTTGAGGAGAAACCCAGAAGAATAGAAGAGCCTAAAAATTGGCATTCAATTATCTTAATCATTTTTCAGTTTTGAAACCTCTAAAGGGAAATAAGTGTGAATACTGGTGCACAGGCACTAGTGTAATTGACTGGTTGAATGTGAAATGGTAGAACAACAGAGACAGATAAAGAAAACCTTAACAATAAACAATTACATAGTCTTACTGGGAGCCAGGTACCTTCTGATCTTTTAATGTGCAACTCAGTTTTCACAACAATCATGTGAGGTAGGTTCTGGTAGCCTCCTTTTCAGATGAGCAAACCGAGGCGTGGGAGAGTTAGGTAAGTCTAAGGCCCCACACTTTGTAATTGTGGGAACCAGGATTTGAGCCAGGCTCTCTGGTTCCAGAATGATCTTACCCATTTCACCATACTACCTCTGAATAGATAGTTGCATGTTCACGTCACTGCTTTAGGAACACATGAACAAACCCAGAAGCATTTTTTGAGTGTTTCCCATGTGCCAGCCTCGGTGCCAGATAATTTTTGTATACATTATCCTGTTTCACTTAACACAGAGCTTAGGGCTGGAACAGAGAGAATGTGCAGGATTGTCAAGATGGCTTGCACTCTGAGGTCCATTCCTTAGCTCCACTGGTTATTTTATTCACTCAGTTGACAAAGCCTGGTCCTTAAAGTCATAGCAGTAGGTTTGAAGCCTTTGGTCAGACCTTTTTAACTTCCTATACTTCACATGTCTCAAAGAAAATATATTTTATCTATTGTTGTGTAACAACTACCTCACCACTTAGTAGTTTAAAACAACAATTCAGTATTTCTCACAATTCTGCAGCATGGATTGGGCTCAGCCAGGAGGTTCTTTAGCTGATCCTGCCCATGGTCCTGTGTGATTATACTCACATGGTGAGTCAGCTGAGGCTGACTTTTTTTTCCTGTGTATTCTCTCCTCCTTCTTTCCTGTATATTCTCTCCTTCTCATGGCCTCTCCTTGTTGTTTCCTCATTAGGGTTGCTAGACTTCTTAAATGGCAGCTCAGGGCTCCCAGGAGCACAAAAGCAGAAGCTCCCAGGCCTTTTAAGGTCTGGGCCTGGAACTGTCCCAGTAGTATTTCCACTGCATTCTCTTGGTGAAAGCAAGCCACAGCCCTATCCCACATTTAAGGAGAGAGGACGACACAAGACAGAGGCTACTAGGATGTTAAGTTCAATGTCACAGACCCCCACAACCACCTCGGGCTGTCTTTCTCAGAGTCTGTTTATTCTCATTTTGCATATAGTTCTAACAAATTTAATTATTGATTTCTACATCTTAAAAAGCCCAGAAGTAATATATTTTGAGGGGGGGAAAAAGTGCTGCTTTAAGGAGGTATATGAACATCAATGGAAAAATGATAGCTGATAGTCATCAACAAGGAGGGAGACAGAGAAACCACAAAAGCAGGTATGACTCAGCACTCTGGGAAGCTTTCCACAGTGACCCATTCTATAGGATATTTATATTGCTGAAGCTCCCTTGTACCTAATTCAGCCAGCAGGTTTTAACTGTTTGGGTTTTTAAGCTTCAGGGTCAAAGTTTTGGGGTAAAAAATGCTTCATTCATTGAGACTGAGAGAGAGTAGCTTATAAATTGACACTGACCATAGACCTTGATTTTGTGTCCCCACCCAAATCTCATCTTGAATTATAGCTCCCATAATTCCCACATGTTGGTGGGAGGGACCCAGTGGGAGTTCATTGAATCACGGGGGCGATTTCCCCCATACTGTTCCTGTTGGTAGTGAGTAAGTCTCACAAGATCTGATCGTTTTATAAGAGGAAACCTTATAAAAGGTGGCTCTCATTTTCTTCGTTGTCTGTAAGATGTGCCTTTTGCCTTCTGCCATGATTGTGAGGCCTCCCCAGCCACATGGAACTGTGAGTCCATTAAACCTCTTTTTCTATATAAATTACCCAGTCTTGGGTATGTCTTTATCAGCAGCATAAAAATGGACTAATACAGATCTTTTAATCAAAGATGTGGTAACGAATCACAGAACCACCTGTGCTTTAGAAGAAATGATCCTTGGATTGCTTTTCAAGCAATGGAAGTTTATGATTGTCACATTGTCAATTGTGATTATATTCAAGGAAGTGTTATGGACTGAATGTTTGTGTCCCCCCAAATTCCATATGTGGAAGCCCTAACCTCCAGTGTGGCATATTGGAGATGGAGCCCCTAAGGAAGTAATTGAGGTTAAATGTAGTCATAGTGTGGGGCCCTGATCCCATAGGACTGGCATCCTTACAAGAACAGATACCAGAGAGCTTGCTCTGTCTCTGCACACACCCTTAGAAAAGGCTGTGTGAGGCCCCAGAAAGAAGGTGGCCATCTGTAAGCCCAGAAGAGAGCCCTCAATAGGAACCAGGTTGGCTAGAACGTTGATCTTGGACCCCCAACCTCCAGAACTGTAAGAAAATAAATTTCTGTTGTTTAAGCTACCTAGGCTGTAGCATTTTGATACAGCAAGCCTGAAGCTGAGACAGGAATATTACATACACTGGAGACTTGTGACCCCAAAGACTTTTGACCTGTTGAATAGAGCTCATCTTGTCTCTCTCCAGCTCATGCATGCATCCTCCCAGCTTGCAAGGGGGCCTTGCTTCTCTGGATTGCACTTTGATTTTCTAGTTTTAAGTGACAAAGGGAGAGTCTTCTAGGGATGTTAAAGTTACTCCAGTAATTCCAGGATATTTCCAGCTCCTTTTGAAATCTTATGTTTGTAATTCTGGGTCAAGTAATGTCCAAGCCAGTGATTACATTACTGGTAGGCATGTCTCTCATGCTGGGCCACGCCCTTCCATCCCATGTTCACGATGAGCACCAACGGTTCTCTGAGAGCCCAGAGCCAGTGGCTGCAACGTTGGGAAAATTCTTAAATGACCATCAGTGGTTTTGGCTCATGTTCCTACGATTGTGGGGTTCATATACCATCTCATTTTTAGAAATGTGTGTTTTTGTACTCCTGTAATTACTTTTTAATAAAGATATTTTGCCAGTCCTTAGCTCCACTCCAATAGCAAAGCAAAGGACAAGAACAAGTAAGGGCTGAAACATAGAGCGTGGAGGGTTTTGCTCAGGCCATGCTTTGCTGTGGGAGAATTTTGAAGGCGGGAGTGGAGCTGCCGTTTGTGGTTTGGTGCTGTGGTGCCTGTTAAAAGTGGCTTTAATGAGAGTGTAAGGTGCTGCACACTGAAGCCCTGTGTTTATTCAGCTGCCTCCTGCCAGCGGCTACAGCTGGGATGGCTTCCCTCGCACGGCGTCTGCCCACAGCCTTGCGCCCGGAGCCCAGAGGACTCACAGGAAAGGAGCTGGCAAAGGTGGAAGCTGGTTTTCATGGTCTCCTGAGGGCCCCTGGCCCCTGGGAGATGGGTCACACTCCCTGAATGCTGTGCTGTTGGTTTCCCTGGAGGATTCTTGCTGCAGGCCAGGTCCCGTATTCTCCACACTCACCACAAGTGGCTGGGTGTGACTTGACACGGTGTGAAAGTGGAGGGGCGCGAGCACTCAGGTGGGTGAACAGCCTGCGGGCCTCCTTTCCCTGGCTGCAAAGCCGCCACTCAACTCTGCTCCAGCCCAGGTTTCGGGGAGCCGGGATCCACTTGGGCAGGCCGGGAGCCTCAGACTCCAGACTTTTCATGGTGCGCTCCTTCCTGCTTACTCACGAGGAAGGCGAGGCAGTCCAGCATCCTGGGTGGAGTGGAGGGTGTTTCGAGTCCATATCTAAATCTTTTTCTTAGAGCACCCTAAGCAGGCTGCTGTCTTTGATCCCCATGCCTCTGCTGTTTATCTTGGTGTGATTCATCACTGTAATTTAAGACGTGGAGAGAGCAGAGTTCCCATCCCAGGCAAGGGGAGGCGCAGTGCAGGTTGGACTGTGTAAGGAAATGGCAGCATGGCGAGGTTTGTGCCGCGGCCTACAAGCAGGGCTGCATGCTCCCCAGGCAGACTGTGGCAGAGCCAAGCCCCTCACTTGTAGGGAAGCGGTGTCTCCTAGGTGCCCCAGTAGGGGAGGTCTGCCAGCATCGCTGTGCTGTGGGGAAGGCAGCCAGAGGGCTTCATCCATAACTGGCTCAGCTCCTCAGGAGGAGACCAGCAGTGTGTCTCTGCACTCAGAACTGCCACTGGGTCGTGGTGTTAAGCCCAGGAGGGGTGCATATGTGACAACCTTGTATTGCTTAGTTGCTGAGACCAGAAGATCCAGGTAATTGCATGAGCTATTAGCTACTTCTGGTTCTCACAAACTCCTCCCAGTGTTGATAGAGAATGGTGTCCTCCGGGCATGCTCTGGGTATAGTTTTATTTGTATTATTAGGGACTCATGGAGAAGTGCTCTGGGTGTCCTGCACACTGCACTTTGGAGATCATTCTGTGATTCCCAAGTCCTGCTGATTCCACTTCCTTGGCGCTCTGGGATTAGATATCCTAGGCTGCCAATCTGCATGTTCATCTTTCAGTGGGGATACCCTGCAGGGCTTGTCCACAGCTTGAATTTCAAACCAAAAGCCAGGTACGCTTTCCAAGCCTTCCGATATTGGTTCAAAGAATTTGGCTGCCGAAGCTTTTGTGTAGCTGAGGCACCAGCAGGCCGAGGCACGAGTGAATCCATGTGGCCCGAGGAAGAGCCTTCCCATGGGCCTCAGCAGCCACACAGAGCCTCTGATCTGTTTCCCTTTGCGGGATGGTCAGTCTCCTGTGTCTCAAGACCTCAAGCAGAAACGTGTGGATCTCCCCCTCTATCTTGAAAGTCCAACCAAGTCCAGGCCTTTGTTGTGCAGTTTAAACCAGACCTGTCAGTAAACATGAGCTAATTCCAGTTTTTGTCCCTCTTTGTCCTTCTCAAGTTCCAAGGTGATCATTGCCTGTTATCTATGGGACTTGTGTAAGCTAACTTCCCAAATGCAGCTGTGAGACAAACATTTTAATTAAAAGGCAGAAGGGCCAGGAGATATAAACACTCATGTGCCTGGTTGTCAGTGAAGGCCGGGTGGCGTTCAGCGTCCAGGGGCTAATTATATTCTCTTCTCTGGGACTCACACAAATATTGCCACAAATGTACCTGACTGTCAGACTGAAGTCATTTATCTCCAAGTGTGGGGAGCAGTGAAGCCCACACGTCCAGGTAGATTTAGCTCTTACGGACTCTTCTGGGAAGCGGCAGGTGGGTAAAACTGAAAGCATCAGCTATTGCACCCTAGCTGCAGGTTTTCACAGAAAGCTGAATCAACTTGTATTGGGGATTCTGCATTTTAGAGTTCTCTCAAAGACCTAGGTTTGGGCCCTAAAATGCAGCCACCAGAGCAGGCACACCTTAAAAAGTAGGTAATGAGTGGCCTTAGTGCCTGGGCAGCTGTCAGTACTGGCCTCCTTTGGTTGTCCCTGTCCACTGACCCTCCTTCCTCCCGTTCTCTCACGTTTGCATTCATCTGCAGCCTCCATTACCATTGACCAGCTTTGCCGCTTACCTGCCTCCACCCTTCCTTCCCTAAGTTCGAGTAGTTTCCTAAGTAGCTTCCCCTTAGTTTCCTAAGGCTGAAGTAATAGAGTATAGCACAAACTGGATGGCTTAAAAGTTCTCCTTCTCAGAAAGTACAAAATCTAGGTGTTGGCACAGCTGTGCTCCCCGCAAAGCCTCTAGGGAAGAATCCTTCCTTGCCTCTTCCTAGTTTCTGGTGGCTGCTGGCAACCCTTCGTGTTCCTTTATGGCTGTGTCACTCCAGTTTCTGCCCCCATCATTACATGACCTCCTCTCTGTGGGTCTCTGTATGTCCTCTTTTTTTTTTTTTTTTTTTTTTTGAGACAGAGTCTTACTCTTTCGCCCAGGCTGGAGTGCCTTAGTGCGATCTCAGCTCTCTGCAACCTCTGTCTCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTCCCTAGTAGCTGGGATTACAGATGCATGCCACCACACCCAGCTGCTTTTTGTATTTTTAGTAGAGATGGGGTTTCACTTTGTTGGTCAGGCTGGTCTTGAACTCCTGACCTCAAATGATCTGCCTGTGTCGGCCTCCCAAAGTGCTTGGGATTACAGGCATGAGCCAAAGAGGCCTGGCCACCTCTTTTCTTATACGGATACCAGAATGACTCTACCTTAACTAAACATAACATCTGCAAAGACCCTGTGTCCAAATAAGGTCACATTCTGAAGTTCTGGGTAGATGCAAATTTTGGGAGACACTATTTGCCACTACGGGTTTCTCCAAGTTGTGCTGTCCTGTGACTGAACACTGACCCTGTTGCTCTATTGTGGATGTTTGGGAGGATTAACACCAGCCTGTTTCTGCTCACACTGCTTTCTCTGCCTGGAATTTGCTTTACAGCCTGTTGTCCCTGGTGTGACCATCATTCATCTTGTAAGACTTAACAGCCCAGCTTTGCCCCCACTGTGGATTCCTGGCGGAAACACCGTCCCTCTTCTCCTGGTGTACATGTTCTGTCACGGTCTCTGGATCACTTGACTGTACTTATTGTTTGAGTGGTACTCACCACTAATTGGAGCATAAGCCCTTGAAGGTTTCATTTACATCCGGGTCTCAGTACTAGGGACTTAGCAGGTGCTCATAATTCATAACTGCTTTTGATGGAGTTGGAGAGGCTAGTTATTAAGATTTTTCTGAATGTAGCATCCTTAGCTGGCTTTCAGGATAGTGACTGCATGCTCTAAAAGGAGATCTTTGCAGATTTTACTCTATGATGAGAAAACTTTTTAGTACCTTTTTCTGTCAGCATGTACCTAGGTAATAAGAAGAATGACGTGACATGTATTTGGGGAATTAGCATACAAGAAGTACTTTGACATTTTCCACGTGTGAAGAAAGCTGTTTTTATTGACCTAGCTGAAGGGATCAAATTCATATTTGAAAAGATGTGGCTAAAACTTGAAAAGGACTTGTCCATGGGGGATGTCTTCATCCCTTCTCCCTGCTCAGGGAGAAGTTGCAGGCCATAAAGTGTGAGGGCATCAGCCTGGATGAGCTCTAAAGCTCAATGTTAATTTCGGATTTTAATAACATGTAAGAATACAAGTTTGATTGCAGAAGCCACAATCAAGTTCAAGGGAGGGTGAATGAATGGGAAGTAGAAGGGACCATGTGTACGTCTGTGTGTGTGTGTGCATGTATGCACACATTTTTTTAGGCTTTGGTTCCTTTAGCTATAAAATAAGGGAGTTAAAATGGGGGTGGTTTATGGTCACGATTGTTTCCTTCAAGTATCTAAAAGGCTTTCACCAGCAAGGGGGACTTGGGTTCAGAACAAGGCATTGGGTGAACCAGTGGTAGAACTGAAAGGTCGTTTCAGTGTGAAAGTGAACTTTCTTTTCAAGCAAAGGCTGGATGTCATTTGGGAGCTTGTAAAAAGAATTGTTATATGGCTGCCAGGCGCAATGGCTCACACCTGTAATCCTAGCACTTTGGTAGGCTGAAATGGATGGATCCCTTAAGGCCAGAAGTTTGAGCCAGCCTGAGCAACATGGTGAAACCCCGTCTCTACTAAAATACAAAAAATGAGCCAGGCGTGTGGCATGTGCCTGTAGTCCCAGCTACTCCGGTAGGCCCGAGGCACGAGAATTGCTTGAACCTGGGAAGCAGAGGTTGCAGTTAGCTGAGATTGCACCGCTGCACTCCATCCTGGACGACCAAAGGAGACTGTCTCAAAAATAAAAATAAAAATTGGCCAGGTTTGGTGACTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGGCAGGTGGATCATCTGAGGTCAGGAGTGCAAGACCAGCCTGGCCAACATGGCGAAACCCCATCTCCACTAAAAATCAGCCAGGCATTGGTGGCGTGCGCCTGTAATCCCAGCTACTTGGGATTTGAGGCAGGAGAATTGCTCGGACCTGGGAGACAGAGGTGCAGTGAGCTGAGATACGCCACTGCACTCCAGCTTGGGCGACAGAGCGAGACTCCATCCCTAAATAAATAAAATAATTAAAAAAAACAATTGGTATATGGATTGGGAGATTGGACTGATGGACTCTTGATTTCTCTGATTCTTGAATCTCTGCTTGGGTGAACACTAAGATTCCTTCCATTTCTAAAATTTTGAGGTTCAGTATAAATAGAAATGGGGTCATGCTTTTGTGGCTAGGAGCTGTGTGTATATTCTAGCCACCAA";
342
343        let ranges = symmetric_dust(read);
344        let expected_ranges = vec![
345            742..808,
346            3169..3223,
347            3406..3413,
348            3424..3431,
349            3437..3444,
350            3729..3764,
351            4729..4736,
352            5831..5862,
353            6449..6456,
354            7014..7031,
355            8194..8201,
356            12955..12963,
357            12971..13033,
358            13369..13376,
359            17841..17864,
360            19193..19207,
361            19221..19228,
362            19746..19763,
363            20037..20063,
364        ];
365        assert_eq!(ranges, expected_ranges);
366    }
367}