skydive/
utils.rs

1use std::borrow::Cow;
2
3use std::collections::HashMap;
4use std::fs::File;
5use std::io::{BufRead, BufReader};
6use std::path::{Path, PathBuf};
7
8use needletail::Sequence;
9use parquet::data_type::AsBytes;
10use petgraph::graph::DiGraph;
11use petgraph::visit::{EdgeRef, NodeIndexable, NodeRef};
12
13/// This function takes a sequence URL and a list of possible extensions, and returns the base name of the file
14/// without any of the provided extensions. It does this by first extracting the last segment of the URL path,
15/// and then iteratively removing any of the specified extensions from the end of the base name.
16///
17/// # Arguments
18///
19/// * `seq_url` - A reference to a URL object representing the sequence file URL.
20/// * `extensions` - A slice of string slices representing the possible file extensions to be removed.
21///
22/// # Returns
23///
24/// * A `String` containing the base name of the file without any of the specified extensions.
25///
26/// # Panics
27/// This function will panic:
28/// 1. If `seq_url.path_segments()` returns `None`, indicating that the URL does not have a path.
29/// 2. If `seq_url.path_segments().map(|c| c.collect::<Vec<_>>()).unwrap().last()` returns `None`,
30/// indicating that the path does not have any segments.
31#[must_use]
32pub fn basename_without_extension(seq_url: &url::Url, extensions: &[&str]) -> String {
33    let mut basename = seq_url
34        .path_segments()
35        .map(|c| c.collect::<Vec<_>>())
36        .unwrap()
37        .last()
38        .unwrap()
39        .to_string();
40
41    let mut sorted_extensions = extensions.to_vec();
42    sorted_extensions.sort_by_key(|b| std::cmp::Reverse(b.len()));
43
44    for ext in sorted_extensions {
45        basename = basename.trim_end_matches(ext).to_string();
46    }
47
48    basename
49}
50
51/// Given fasta files this function will read and return a list of lists containing the contents
52/// of the fasta files
53///
54/// # Arguments
55/// * `path`: paths to fasta files
56///
57/// # Returns
58/// A list of lists containing the contents of the fasta files
59///
60/// # Panics
61/// This function will panic if it cannot read a given file path
62#[must_use]
63pub fn read_fasta(paths: &Vec<PathBuf>) -> Vec<Vec<u8>> {
64    paths
65        .iter()
66        .map(|p| {
67            let reader = bio::io::fasta::Reader::from_file(p).expect("Failed to open file");
68            reader
69                .records()
70                .filter_map(|r| r.ok())
71                .map(|r| r.seq().to_vec())
72                .collect::<Vec<Vec<u8>>>()
73        })
74        .flatten()
75        .collect::<Vec<Vec<u8>>>()
76}
77
78#[must_use]
79pub fn default_hidden_progress_bar() -> indicatif::ProgressBar {
80    indicatif::ProgressBar::hidden()
81}
82
83/// Create a new bounded progress bar with the specified message and length.
84/// The progress bar will be a bar with a spinner.
85/// The progress bar will display the elapsed time, the progress bar, the current position,
86/// the total length, and the estimated time remaining.
87///
88/// # Arguments
89/// * `msg`: The message to display on the progress bar.
90/// * `len`: The total length of the progress bar.
91///
92/// # Returns
93/// A new bounded progress bar.
94///
95/// This will create a new bounded progress bar with the message "Processing sequences" and a total length of 100.
96/// The progress bar will be a bar with a spinner.
97///
98/// # Panics
99/// This function will panic if the progress bar style cannot be created.
100pub fn default_bounded_progress_bar(
101    msg: impl Into<Cow<'static, str>>,
102    len: u64,
103) -> indicatif::ProgressBar {
104    let progress_bar_style = indicatif::ProgressStyle::default_bar()
105        .template(
106            "{msg} ... [{elapsed_precise}] [{bar:40.white/white}] {human_pos}/{human_len} ({eta})",
107        )
108        .unwrap()
109        .progress_chars("#>-");
110
111    let progress_bar = indicatif::ProgressBar::new(len);
112    progress_bar.set_style(progress_bar_style);
113    progress_bar.set_message(msg);
114
115    progress_bar
116}
117
118/// Create a new unbounded progress bar with the specified message.
119///
120/// # Arguments
121/// `msg`: The message to display on the progress bar.
122///
123/// # Returns
124/// A new unbounded progress bar.
125///
126/// This will create a new unbounded progress bar with the message "Processing sequences".
127/// The progress bar will be a spinner.
128///
129/// # Panics
130/// This function will panic if the progress bar style cannot be created.
131///
132pub fn default_unbounded_progress_bar(msg: impl Into<Cow<'static, str>>) -> indicatif::ProgressBar {
133    let progress_bar_style = indicatif::ProgressStyle::default_bar()
134        .template("{msg} ... [{elapsed_precise}] {human_pos}")
135        .unwrap()
136        .progress_chars("#>-");
137
138    let progress_bar = indicatif::ProgressBar::new_spinner();
139    progress_bar.set_style(progress_bar_style);
140    progress_bar.set_message(msg);
141
142    progress_bar
143}
144
145/// Get the canonical (lexicographically-lowest) version of a k-mer.
146///
147/// # Arguments
148///
149/// * `kmer` - A slice representing the k-mer.
150///
151/// # Returns
152///
153/// A vector containing the canonical k-mer.
154#[inline(always)]
155#[must_use]
156pub fn canonicalize_kmer(kmer: &[u8]) -> Vec<u8> {
157    let rc_kmer = kmer.reverse_complement();
158    if kmer < rc_kmer.as_bytes() {
159        kmer.to_vec()
160    } else {
161        rc_kmer.as_bytes().to_vec()
162    }
163}
164
165#[must_use]
166pub fn homopolymer_compressed(seq: &[u8]) -> Vec<u8> {
167    let mut compressed = Vec::new();
168    let mut prev = None;
169
170    for &base in seq {
171        if Some(base) != prev {
172            compressed.push(base);
173        }
174        prev = Some(base);
175    }
176
177    compressed
178}
179
180#[must_use]
181pub fn shannon_entropy(seq: &[u8]) -> f32 {
182    let mut freq = HashMap::new();
183    let len = seq.len() as f32;
184
185    for &base in seq {
186        *freq.entry(base).or_insert(0) += 1;
187    }
188
189    -freq.values().map(|&count| {
190        let p = count as f32 / len;
191        p * p.log2()
192    }).sum::<f32>()
193}
194
195#[must_use]
196pub fn gc_content(seq: &[u8]) -> f32 {
197    let gc_count = seq.iter().filter(|&&base| base == b'G' || base == b'C').count();
198    gc_count as f32 / seq.len() as f32
199}
200
201/// Writes a GFA file from a directed graph.
202///
203/// # Arguments
204///
205/// * `writer` - A mutable reference to an object implementing the `Write` trait.
206/// * `graph` - A reference to a directed graph where nodes are sequences and edges are links with weights.
207///
208/// # Returns
209///
210/// A `Result` which is `Ok` if the file was written successfully, or an `Err` if an I/O error occurred.
211///
212/// # Errors
213///
214/// This function will return an error if any I/O operation fails.
215///
216/// # Panics
217///
218/// This function will panic if:
219/// 1. The file cannot be opened.
220/// 2. Any line in the file cannot be read.
221pub fn write_gfa<W: std::io::Write>(writer: &mut W, graph: &DiGraph<String, f32>) -> std::io::Result<()> {
222    // Write header
223    writeln!(writer, "H\tVN:Z:1.0")?;
224
225    // Write segments
226    for (node_index, node_label) in graph.node_indices().zip(graph.node_weights()) {
227        writeln!(writer, "S\t{}\t{}", node_index.index(), node_label)?;
228    }
229
230    // Write links
231    for edge in graph.edge_references() {
232        let (from, to) = (edge.source().index(), edge.target().index());
233        let weight = edge.weight();
234        writeln!(writer, "L\t{}\t+\t{}\t+\t0M\tRC:f:{}", from, to, (100.0*weight).round() as u8)?;
235    }
236
237    Ok(())
238}
239
240/// Reads a GFA file and constructs a directed graph from it.
241///
242/// # Arguments
243///
244/// * `path` - A path to the GFA file.
245///
246/// # Returns
247///
248/// A `DiGraph` where nodes are sequences and edges are links with weights.
249///
250/// # Errors
251///
252/// This function returns an error if the file cannot be opened or read.
253///
254/// # Panics
255///
256/// This function will panic if:
257/// 1. The file cannot be opened.
258/// 2. Any line in the file cannot be read.
259pub fn read_gfa<P: AsRef<Path>>(path: P) -> std::io::Result<DiGraph<String, f32>> {
260    let file = File::open(path)?;
261    let reader = BufReader::new(file);
262    let mut graph = DiGraph::new();
263    let mut node_map = HashMap::new();
264
265    for line in reader.lines() {
266        let line = line?;
267        let fields: Vec<&str> = line.split('\t').collect();
268        
269        match fields[0] {
270            "S" => {
271                let id = fields[1];
272                let sequence = fields[2].to_string();
273                let node_index = graph.add_node(sequence);
274                node_map.insert(id.to_string(), node_index);
275            },
276            "L" => {
277                if fields.len() < 6 {
278                    continue; // Skip malformed lines
279                }
280                let from_id = fields[1];
281                // let from_orient = fields[2];
282                let to_id = fields[3];
283                // let to_orient = fields[4];
284
285                let weight = fields.get(5)
286                    .and_then(|s| s.split(':').last())
287                    .and_then(|s| s.parse::<f32>().ok())
288                    .unwrap_or(1.0);
289
290                if let (Some(&from), Some(&to)) = (node_map.get(from_id), node_map.get(to_id)) {
291                    graph.add_edge(from, to, weight);
292                }
293            },
294            _ => {} // Ignore other lines
295        }
296    }
297
298    Ok(graph)
299}
300
301#[cfg(test)]
302mod tests {
303    use sdust::symmetric_dust;
304
305    use super::*;
306
307    #[test]
308    fn test_canonicalize_kmer() {
309        let kmer1 = b"CGTA";
310        let kmer2 = b"TACG";
311        let kmer3 = b"AAAA";
312        let kmer4 = b"TTTT";
313
314        // Test canonical k-mer for kmer1 and kmer2
315        assert_eq!(canonicalize_kmer(kmer1), b"CGTA".to_vec());
316        assert_eq!(canonicalize_kmer(kmer2), b"CGTA".to_vec());
317
318        // Test canonical k-mer for kmer3 and kmer4
319        assert_eq!(canonicalize_kmer(kmer3), b"AAAA".to_vec());
320        assert_eq!(canonicalize_kmer(kmer4), b"AAAA".to_vec());
321    }
322
323    #[test]
324    fn test_dust() {
325        let read = b"TGGCAGCCATAGGTTTTCCCTGGAGTTGTGGCATCTGGAACTACAGGGATGAGCATTTGAGTACATATTACAGTGAGGTGGCCACACTGTGACCCGCAGTTCTGCAGACTGGAAGGCACTGAATGCCAGGATTTTTGCAGAGTGTCACTATGAAGTCCTGACTTGGCTCAGAGACCTTCTTAGAGCAGTAATTCGGGACCAGTGGATTTCTGATAAAGTTATTCTAATTTTCTAATAATTGTTTTCTAATAAAAGCCATATGGCAGGTCCTGCTCCCTTGGTAGCATGACCAGTACCTGGCGCAGTGCTAGTGCTGAGCTGACAGGAAGTGCCTCACCTTCATCTCTCACTTGACAGTGGGTGGAAGGTTCTTGGCTCGGTATCCCTCAGTCATGACTGCACACTGTCCTGAGCTTTTCTCCCAACTTCATCCACTTCATACTATTTTAATAAAGCGGTGCTGTGTATTATAACATTGTGCAGCTGAGCATTACACTCATGGCTCCCATTATCAAGCCCCTGCTATATACAGGGCATTTCACAAAGAAGCAAACTTCCAAGCAGTCACTCAGCAACCTCCTCCTAGGAGCATTTGGGGAAGAGAATCTTGGGGCAAGTTTCCTTTACCACCTGCAGTCACCTGGGATGCTGGGAAAAATTTTGATTTCTGTTGTCTTCCCTTCCAGAAAATTATTTGAGAGTGGGGCCAACAAATCTGCACTTGAGTCCATACCTAGGATAGGTTTTTCTGTGCAGTTTTTTAAGTTTAAGAGGTTTTTAAAGTTTAAGACACACTGGTTAGGGTTTTGGGCTCTGGAGGATGAGAAACCTTGCTTGGGTTATCAGATAACAGATTCTTCTCTGGTTTCCCTCCGATGTTATCAGGGGAATTGTTGGTTGTTTCACATTTGGGTGCTCCTGGGCCTTTTAAGAGCCAGGCTGGGAGGGCTGGTGATGGCAACCCTGGCTGGCAACAGAGGCTGTTTCCACCCCTGGGTGGCTCCCCACCTGCTTTCTGCCCTGGTAGGGTTCAAGGCTCCGGGAATTGGCACTCAGTGAAAGAATTTTGATTTCCAGTGGAATTTGTGCTGTCACAAGATTTGACCCATGGGACTAGTGAATAGATAGATGGGTTAGGTGAGCATGTGACTTGGCTGGTGGCCGAGAGAGTGATAAATGTGAGAGTAGCTGGGGAAAATGGAAACGGATTAAGATAGAAGAGGGGCATTGTCCATCTGGCCGATGGCAAGGGCTGGTGGAGCAGCAGTTCTAGACTATTCTGAGGTTAGTTCAGAAACTGACCTAACAACGTGGGAAGTCTCTCCCAAATTGTTTATAGTTTCTCACAGTGGGTGCCTTTTGAAGTGATTGTATTTGACAGCCCAGAGTGTTGGGCACACAGCTTTGTGCTATCTAAGGTCACGGTCCAATTGTGATTCCTAGCAATAGCTTCAAGGCATATTTCATAGCTCTAATAGTTTTCAAGTATAAGGGTGTGAGAATGAGCTTTAAGAATATTTATGCCATGAAATCTTCCAATTGCTCTTCAACACGGGTGCACCATAGTAGGTGTGAATAGAAGTGGTGGCAACAGACCTGAATTCAGGTCTGCCACTGACTATAATACTAGCTTGAGAAGTAACTTGAACTCTGTGAGCCTCAGTTTCCTGTCTGTAGAATGAAGACAATGATACTGCCTTCATAGGATTATTATTAGGATTAAATGAAATATTATAGTGAGGCATTCAGCAAAGTGTTCTATAAATTGGGGTAGGATGTGAGGTAATTGGCATTGTTAGATGCGTCTCTGGGTAAACAACCAAATTTTCTGCTTATTTGGCTGTTTCCCTAGCTGCCTTGTTTAAAACAAAACACCTGAGTTGACCAGAACACCTCTGTTTTTAGAATCTAACTTTGCAGTTGTATTAGTCTCTTCTTGCATTGCCATAAAGAAATACCTGACACCTTCATAAAGAAATGAGGTTTAACTGGCTCACGGTCCTGCAGGCTTTACAGGAAGCATGGTACTGACATCTACTTGGCTTCTAGAGAGGCCTCAGGAAGCTTACAGTCATGGCAGAAAGTGAAGCAGTAGCAGGCACATCACATGGTGAAAGCGGGAGCAAGAGAGACAGTTGGGAGAGGAGGCGCCACACACCTTTTAAACAACCAGATCTCCCAAGAACTCACTCACTATCGTGAAGATAGCACCAAGCCATGAGGGGTCTGTCCCCATGATCCAGACACCTCTCACCAGGCCCCACCTCCAGCACTGGGGATTACAATTGAGCATGAGATTTGGGTAGGGACAACTATCTGAACTGTATCAGCAATAGAGTGTGATTATAAGTTATGCTGTAGGAATAGAATTGTTGTCACTGAAAGATTCCCTTGGCCATGGGAGCCTCCTGGCTCTATGAAGGATCAGCCAATGCTTATCCAGGGAGGTAATGATAAGGTCGAAGTTTGACAAGAAATCTACGTTTTCTTAAGCTAAGTAGTAGGTTAACAGAAGATATGTTGTGTGTTAATAGTTCTATTTACATCTCTTTCTCCAAGGTTATACACACTCTGCATCACTAAGTCAAGACACCATTCTTTGACACTGGCTAATAGTAATAGCAATCATAGCCACTGTGCATTAGCACTTACTCCACATTCCTTGTACTGAGCACTACTTACATTATGTTGGTGTTGTCATTGTCACCATTTCATACATAAAGAAACCAAGGTTTTCAGAGATTGAATAACTTGTGCAAGATCACACATCTGGTAGGGCAGATCCAAGATCTGTTTGTCTCCAAAATCTGCTTCTGTCCTGCCTGGGAGACCTTGGGAATGACGGCAAGTGGTTGTAGGAAGGAGGGCTGATGTCAAGGTGGCTGTGGGGGCAGGAGGCTGAGGGAACTCACTGACCCTTGAGGGACTCCTTAGGTGGGGGATTCTGGGTTTCCTGTTGGCAGCTGGAGGGGGAGTGCCAGTTCCCATAAGTGGTTATTGCCCAGGTTGTGACCTTGGCTTGGCCAGTGATTGGTTCATTTTGGAATTTCATGAGTGACCCCCAGGCAGGGTTCTTACAATCACGCTGGAAGACCACCCAGGAAGTTCCTGTTGGGGTAAAATGATGCAGCAGCCTGCTTTCCTCAGGAGGTCTGAACCCTCCCCATGTACACACACACACACACACAAACACACACACACCCACCCACACACCCTCCACCCCTCTTGGTGTCTTTGGCCTTTTTTCCTAGCTTGTTTGTTTCTATGGTGTCTTCAAGTTCAACTAGAACCTATGGGAATGACTTAGTTTTGAACCTGTAAGAATGAGAAGTAAACAATTCTTGTACTGACTTTGAATTTCCTTTCTTCTGTTGTCCAAAGGTGAAGGGTGACAATGTGTCCCAGATTTTTTTGGATATTCTACAAAAAAATAGATATTTTTTTGTAGAAAAAAGCTTATTCTACAGTGTTGTCCCAATTTTAAAAGCCCTAGAAAACTGGTTAAGGCAAATTATAACCAAATCAAATCACTAATTATTACAATAAAGTGTAACTAGCTACAAAAATCCTAAATTACAATTTTAGGCTTTGAGAAAATATCACTGATGATAGAGGAAGAGTGACAGTCTTTGTTTTGGGTCTTGGGATGGCAGAAAGAAAATATTTAGTAGGGAGTAAAGATCAGTGTACCCCTTGAAGTGTGGTGAAGATGGGTGGGTTTTGATGCTCTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTATGTAGAATTTCCCTAAATTCAAAATGATACTTACATTTTGATAAACCCAGAGAGTTAATTAACATGGCATAAATGTGCTAGCTCATTGTTGTTTTTCAGGGAAAGAACTAGAGAGAGACACAAGTCACCCCGGAATAAGACGGCAGAGGGTCAGAAAAGTCTGTCACCATTCAACCTCCCACTGGAGAGCCCCTGTTGGGAAATGATTCTACTCGGACAGAGGAAGTTCAGGTAAGGATCAAAGGTGGTCTGTAAGCACACTGCCACTTGGCCAAGACTCCTGTCTGTAAATGTTTTCCTTAGGTTCTCTTGGATTTTTGTCTTTATTTTTCTAGCATGCTAATTAGTTTATCTATGTCCCATGGTCTCTTGTTGGTCTAAGGACAGCCTGTCCTGCCCTTTGGTAGCGTGGGGATTCTTCTGAAGTATGATTGGTTGGCCTGCTTTACATGGTGTGGAAAAGTAGCCAGCAAGGTTGACGACAGGGTTGGGAAGGGAAAAGCTGAAGTCTCCCACGACTCATTTCAAAATGGAACAGTATAAGGGGGAGAAGGAAACTCAGAAAAACCTAAGAAGTTTAAAAAACATGGGGCAGCCCAGACTGACCATTACTACTAGAGCTATGCAGGAATTGAGAGGCCGCACGCTCAGATGCCTGGTAGAGAAACGTAAGTTAATTAAGGAGGCCCTGGGTCGAAAAGAGGGGGCAAAAATATATTAACCTAGCTTTGGGTTAACAGCAATCTGTGCAGTGCCTCAGTGTCAGTGCTGTAGTGTGGTGTGAAGGAGCCTATGGCTAACTGGAGAATGCATTTCCTCTGTAAAGGAAACAGCAGCTCCGCAGCTCCAGACACCTACTGTTGCTCAGGAATGCAGGGATTCATTGTTTGAGAAAAGCTGCAAATCAGGATTTTATGTGGAATCCTAACTTTACAGTATTTTTGAAATACTGATACTTTATTAATTTTTTTGAACCATTGAGTGGGTTCTTCTCCAGGTTCCAACTGTCTGGCAGCCCACCTGATTTAAATAAACATCTGAGCTATACACAGAAACATGTCTGCATACCCTCTGCACATCCTGAAGTATATATACACATGTCCAGCCTTGCCCCTCATAAACAAAGTGGTGTATGATACACAGCTGTAAAGATAGATATAGGACTATAGATAAGCATACATCTGTACATACCTGTGCATACACACAGGTAAGCATTTATAATCAAATAGGTGGACTGAAACTGGAATTCCTCAGAGTACACAAGGTGTTCTTGGGACACCAAAACTACAATTGTGGGGTTGAACGTGGGATTCATTGAGCAATGAGCAAATGCCTTTAGTGCTGCCTGCCTTGGCTCTGGATGGCTGATGGTCGGATGGGGCCAGTCTTAGGATTGGATCACCCTGGAGTACTTGAAGGGGTCAGTTTCCTCCTGGATGTGGGTTCAGAGGTGCCAGTGGCCTACAGCAAAGGCTCTTCTTTCTCTGCATCTCCTCTGCACCTCGTAGCTGAGAACACTTTGAGAAGCTCTTGGTGTTGCCCCAGGATGATCTGGTGTGAAAAGCATTGAGATGGGTGTTTGGAGGCTGTATTTTTTAGTAGCTCTGTTACCTTGAGCAGTCACAGCCTTTGTAGGCCTCAATTTCTTTATTGAAAATCTAGGGTTTTGATGAAAGCATCTTAGGTGCTTTTTCTTCTAAGAACCTGAAGCTTAACAGGATCCTTTGTGTATCTACATGTTTTAGGCATACATGTGCACCCCAGGAAATTCTCTCATGCCCTTTCTAGTCAATCTCTGCCCCACCCTCACCTCTCCAAGGCAACCACTGTGTTGATTTCTATCACTGTAGATCAATTTTGCCTGTTTTTGAATTTAGTATAATTAGAATCATATGGTCCATCTCCTGGCCCCCACCCACCCGCCCTGCTTAGCATAATGATTTTGAGATTTATCCATGTTTTGGTATGGTTTCAACAGCTTATTCTTTTTATTTTTGCTGAGTAGTATTCCATTGTATCAGTCTACCACAATTTGTTATCCATTCTCCTAGTGGATGGACATTTGGGTTTTTTTTGTTGTTGTTTGTTTGTTTTTTGAGGCAGAGTCTTGTTCTGTCGCCCAGGCTGGAGTGCAGCGCATGATATCACCTCACTGCAGCCTCTACCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAATAGCCGGGGATTACAGGCACGCACCACCACGCCCAGCTAATTTTTGTATTTTCAGTAGAGATGGGCTTTCACCATGTTGGCCAGGCTTGGTCTCGAACTCCTGACCTCAGGTGATTTGCCTGCCTTGGCCTCCCAAAGTGCTAGGATTACAGGCGTGAGCCACCGTGCCTGGCTGGATGTATATTTTTATTAATTTTTGGATAAAACCTGAGAGTGGAATTGCTGGGCCATATAGCTAAGTGTATATTTAGATTTATATGAAACCGCCAGAGTGGTTTTCCAGAGGCACTGTACCACGGTCCACTTCCACCAGCAGTGTTGGAGAGTCCTGGCTGCTTCTGGCCATCGTCTGAAATAGGAATTTCTCTCACTGTAGGTGATACTTCTGACTTTGCAAGTTGAAGGATTATTAGTTTATGGGATTGAGACCTTCACCACCACCACTTCTTACCATAGCCCATACATTTCATAAATCATGGTTTTTTTGGTCATTACTAGATTCGGAGTTATTTGATGATGAGCGATGTCTGTCTTGCTGATTTAGCTACTAACTGAAACTAGCTTTTTCTAAGTTGGTGTCCTAATTTCACCCCCTTTGCCACTGCATCTGACTGTTTTCTTTCCGAGTGAAAGGATACATACAAATTTCAGAGGCAGAAACCTCTTTGGCCTCCTGTGTCTTTTCAGCGCCTTGCTCTTATTGCTTCATTATTGTTGCCAGTTGGTTTTTAAACAACAAAATCCTTTAAAATTCTATCAACTGGGTTTTGCTAAGTGAATAGACTAATTGCTTTAACTAGCAACGGCCTAGAAGTTTAAAAAGAGAGGAAGCTAGAAAGTAAAAGATAACATTTTAATAATCCTGGTTGTTTCTATGCCCTTGATGTTTAGTTCCTCGTGAAAACATGTTTTAGAAAGAATTTTTAAGCCAATCTGGCCATACACGGATTCCTGGATTTGCTTAGCTTGGTCCATGAGAAATATTGTTAAAGAGTGCTTGACACTGATGCTTGTTAAGTGGATCTTGTGAACATCATAAGGAGATTTTTTTTTTTTTTTTTGAGACGGAGTCTCACTCTGTAGCCCAGGCTGGAGTACAGTGGCACGATCTCAGCTCACTGCAACCTCCGACTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCATGTGCCACCACGCCTTGCTAATTTTTGTATTTTTAGTAGAGACGGGGGTTTCACCATGGTTGGCCAGGATGTTCTTGATCTCCTGACCTCCTGATCCGTCCGCCTCGGCCTTCCAAAGTACGGGGATTACAGGTGTGAGCCACCGCGCCCCAGTCCATAAAGAGATTTTAAAATGTGGGTCCTAGCTACAGGTAAGCTTGGGTTTGTGTAGTGGTGTAAGTTCCCTTGCTACGCCCTTTGCTCTTCTGGGCTGCTAGAGGGTGTAGTAATACTCCCACCTCCAAAAGTTGGACTTCGTAAGCCTTTATAACCCAGCGTGAATTGGAAAGAAGATGCAGGAGGTTTATCTCTATAGATGAGCTCTCACCAAGATTAGTCTAATACCTGGGTTGCGCATTGCAGGGCAAACAGCTCCAGGCCCTCAGAGCTGCTCAAGGCTTTTCAACCCAGGGGATGATAATCAATGTTATGTCAATGAATCAGCCAAACAGACAGAAAGATCACATTATGTTTTCTCTGTTTGAAAGGTAAATACCTCATACATTTTGAAAATTTCAATGAAAATCGTTTGAGTTAAGAAGTTCTAATATTTAAAGAGTTAAGCCTTTCATTTTCTGGAAGCCTTTGTGAATAGGGCTGGGTAGATGCAGCGGGCCCTGCATGTTCACTGCCCTTTGTAGCTTTTACAAATGACCTGTGTCATGTCATCCTCACTGTCTTCTCCCCACCAGGATGACAACTGGGGAGAGACCACCACGGCCATCACAGGCACCTCGGAGCACAGCATATCCCAAGAGGACATTGCCAGGATCAGCAAGGACATGGAGGACAGCGTGGGGCTGGATTGCAAACGCTACCTGGGCCTCACCGTCGCCTCTTTTCTTGGACTTCTAGTTTTCCTCACCCCTATTGCCTTCATCCTTTTACCTCCGATCCTGTGGAGGATGAGCTGGAGCCTTGTGGCACAATTTGTGAGGGGCTCTTTATCTCCATGGCATTCAAACTCCTCATTCTGCTCATAGGGACCTGGGCACCTTTTTTTCCGCAAGCGGAGAGCTGACATGCCACGGGTGTTTGTGTTTCGTGCCCTTTTGTTGGTCCTCATCTTTCTCTTTGTGGTTTCCTATTGGCTTTTTTACGGGGTCCGCATTTTGGACTCTCGGGACCGGAATTACCAGGGCATTGTGCAATATGCAGTCTCCCTTGTGGATGCCCTCCTCTTCATCCATTACCTGGCATCGTCCTGCTGGAGCTCAGGCCAGCTGCAGCCCATGTTCACGCTGCAGGTGGTCCGCTTCCACCGATGGCGAGTCCCGCTTCTACAGCCTGGACACCTGAGGTAAGAGGCAACATCCAGGAGGCAGAAAGGATGGCTGATGTCTTGCTGGGAGACAGCTGCTCTGTAGCACGTGAGGGGTGGTGACAGATGCCAAGAGCTAGGACCAGAGTCTGACTCTTTTCTGGTTTTGGGGAGGAGATGCGAGGGTGGGGAGGGTTGTCCATGTTCATTGAGTTTCTGGACTTCTAGATGGTGCGGGGCAGTTGCTGGCTCTCACCCAGGTTGAGATTTTGCTGGGCTTGTTCTCAAAGTTATTGGCAGCTCCCAAAAATGATGGAGAAAGGAGATGCATAGTGATGGCTGCCTTCTTTGACTCTGAAATTGGCCAATGGACAACAGATAAAGTGACCAGCAGCTCCATTTTGTCCCAAATGTGACATCTGGTTTACCATGTTGTCCCAGTGGAATAATGAATTGTTCCTTTTTTCCCACTCTCAGAGGCCTGGTTTGGGCAGTAAATTATATGGTCATCCGAGGGACCCTTCCAATAAAGAATCAAGTGCAGGTTAGAGACTCCAAATGTGTAATCCTTGAGTGTTGTGAAAATGTATGCCGTGAGAAAAAGTTAGAAGTCAGTTGGGTTGTCATACTTACATCTTTGCATAAAATCTCATTATTTTGTGGTTAAATAAGAGTGATTACCATCATTTTATTTGCTTCAAGGTAAGCACTTTATATATAGATTGTGTATTTAGTCTTCATAGAACCCGTGACCTAGGTATTATTAATCCCTGTGTCACAGATGAAGAAACTAGGGCTTAGGGGATTTAAGTAATTTGCACAAAAACATATGGCTAGCCTCATTTAGGATTCACTCAGATGTCATGAGGCCAGGGCTGAGTGAATGCCCCCATAATGGCATCTCTCACTTTGTGGTTAGTGGCCTATTTTTCCATCTGTTTTCTTCCACAGACTATGAACTCCCTGAGGCCAGGGGCCACCCTTATACCTCATTACATCCTCAGTGCCTGGCATGGAGCATGGCTTGCACTGAGATGTTCTCTGGGTGAATGCAGAGCCTGGGACATTTGACTTCAAAGCCTTTACCCTCTCCCAGGCTCTCTGCCTCCTTAGGCAGTATATGCTGATGTGTGTAGCCTGCTTGGGGCAGGGTAGGCACTTAGTTCATTGCAGCTATTACTGCTGTGATCATGTAGCTGGCAGAGCAGCCAGAATCAGCAAGGGCACACCTTAGTGGGTATCAGAACAATCGGCTTTGTCATAGATTTGGCTGGGCTCCAGGAAGGTGGCTCAGCCTGTATTTGGAGTCAGGCCATGCTGCCAAACCATCTTCATGTTGGTGTGTACCCCCTCCTCCATTCCTCTGGCTTGGCTTGTGCTACGAGAACGGGATGATCTAGCGTTCAAGGTTGCTGCCACCCTAACTGATCCTTGGTGGAAACTGGTGTCCAAGTCACATGTCTGTGCACCAAAAATCTGGGGTTTAGAGTCCTTTCACAGATGCCTGTAGGGCTCTGAAGACAAGTAGGTCACCGCTTTGCTGCATATTCATCTCAGAAGGCTTTCTTTTCCCATGTTTTGCATCAGGGAATGACCAGCAGTTTTGTGTTAAACATCTGCTGTGTGCAGAGCCCTTGGACACACCAGGCTGGCTGCCTTCAGAGCTCTATCTCAGCACCTGTGGCACTCACAGTCACTTGGAAAGAGACCAGTGCACCGCTGTCTGGTGGACAGGTTTCCAGGAAACAGGCCTGGGGGTATAGGTGATAGGAACACAGGAGGACAGAGAATTTCAGATTGTGGCAGCAATAAAGCCGAGCAGGGAGACAGTCTGTCTCAGAACAGGTTTTGCTGCAGTTAAAGTGGTAGAGAAAATCCGGCTGTGGTCTCAGTGGAGATGAATGATATTTGGAACTCTGTATATGTAAGTAGCCAAGACACTTGGCCAGGAGTGAGGTCTATGGTGGTTTTGTTTTTTGGCCCTTAGCCCTAGTTGGTGTGAATTCCACCTGTGTAGGTGGGAAAGGGCAGGGCATCTTCTCACCATAGGTCATGCAGGGTGGTGGGACCGACTTACCCCCATGGGCTCCCACATCGCTCCTCCCTGTACGACTGGTTGAGCTGCACACTGCATCTGAGTGGGAGTGGAGAGGGGACAAACCAAACAGCCCGAGGAAAGTATGCCTGTGGCATGTTCAGGAAAGCATGATTAGCAGCAGGCCCTCGCCTCCCACCACACAGCTCTGCTGGTCAGGGCAGAGCTGGATGGGAGAAGCCAGACTGATTGTGCTGCATGGCTCCCAGGCTTGCTGCAAACCTTTCAGTCTGCTCTTACCATGACCAACAACTGTCCAGGCTTTTAAAAAACTCAAGTCAGTCACCCCAGCTCCCCAGGGAGAACTGAAAGGTGGCAAGTGCCCATCTGCCCTGGGGAGAGCGTTTTGAGGTTGGTCCCCAGCCTCATCCTTTCGGCTTCTTTTTAGGACCATTGGTGTTCCTCCTCTCCCTGCCTTTAATAAGGCCCCCTTTGTCCCTCTCGTGGAGAGCCTGAGTTAGGAGGTGGAAAGAATGGCTGGGGAAAGAGGGACATCTTTACTGACAAATGGAGCCCTCAGGGAGAGCCAGATGCCCAAGTGTCAGCCAGTCTGCCAGAAGCTGGAGCAGGCTTGGCACCTTTCCTCCTGGCATTGTGTGGGCCTGGTCACCTGCCGATCCTTGGGCTAAATCTGGTCTGAACCCAGCAGTGGCTGGAAGAGTTACTAGGCCAGAAATACAACTTCTAAGGCCTTTTGTAAGTGTAGAAACAGACAGGAGGGAAGAGGGAGCGGGAATAGACAAAGCAAGCCTCGGAAATCAGAATAGCAGGTCTCCAATTAGACCCAGCAGAATCACAGGCTGTTGGCTCTCCCTTTATGTAAAGCCTTCACCGTGGCAGCACCCTATTGGGCTTAGGTGCCAAGCGATGGTGAGTTCTTTTTTATGTTTTCAAAGATGATTTTATCGAATTGACTGAGCTATTTTTGAGAGTTGTCTAAAGAATGTCTACTCTTTAGTTTCTTAAAGAAAATAGGCTTCTCATTAGTTCATAAAAGGTGCTTGCTGTGGCCCTGCTTGTTGGCAGGAATGAAGTTTTGGGCTTATTTGAAAACTTTCAAAAATGTAAAAAGTTGTTGCAGAAAGTAAGATACCATAAATAGATTGAGATACTTCCTAACCTCTGCCCAGTGCCCTAGGAGTTATGAAAAGCTTTTCATAGGTTTGGACTCATTTACCCCTCCTTGCTGGCCCTGTCAGAGGTCAGAGCAGTGGGGTAGAGGTGTCCCCTCTTACAGTTGAGGTCCCCAACCCCCAGGGTAAAGGGACCTGCTCAAGGTCACAGGGAATCAGCACCTTCCTGTGCATCACACCGCTTACCCCACCGCACTTTCTACAGCGTCCTGGTGTCTCACACAGTCGCTTTGTCATTTTCCATACACACCTTGCTCGTCACTTTTCTTGGCCCCGTTCTCACAATAAGTTGCTAACTTTTCCAGGATGTTACCAGAGACTAATGACTGTTGATATGACTTTATTTGAGGAGAAACCCAGAAGAATAGAAGAGCCTAAAAATTGGCATTCAATTATCTTAATCATTTTTCAGTTTTGAAACCTCTAAAGGGAAATAAGTGTGAATACTGGTGCACAGGCACTAGTGTAATTGACTGGTTGAATGTGAAATGGTAGAACAACAGAGACAGATAAAGAAAACCTTAACAATAAACAATTACATAGTCTTACTGGGAGCCAGGTACCTTCTGATCTTTTAATGTGCAACTCAGTTTTCACAACAATCATGTGAGGTAGGTTCTGGTAGCCTCCTTTTCAGATGAGCAAACCGAGGCGTGGGAGAGTTAGGTAAGTCTAAGGCCCCACACTTTGTAATTGTGGGAACCAGGATTTGAGCCAGGCTCTCTGGTTCCAGAATGATCTTACCCATTTCACCATACTACCTCTGAATAGATAGTTGCATGTTCACGTCACTGCTTTAGGAACACATGAACAAACCCAGAAGCATTTTTTGAGTGTTTCCCATGTGCCAGCCTCGGTGCCAGATAATTTTTGTATACATTATCCTGTTTCACTTAACACAGAGCTTAGGGCTGGAACAGAGAGAATGTGCAGGATTGTCAAGATGGCTTGCACTCTGAGGTCCATTCCTTAGCTCCACTGGTTATTTTATTCACTCAGTTGACAAAGCCTGGTCCTTAAAGTCATAGCAGTAGGTTTGAAGCCTTTGGTCAGACCTTTTTAACTTCCTATACTTCACATGTCTCAAAGAAAATATATTTTATCTATTGTTGTGTAACAACTACCTCACCACTTAGTAGTTTAAAACAACAATTCAGTATTTCTCACAATTCTGCAGCATGGATTGGGCTCAGCCAGGAGGTTCTTTAGCTGATCCTGCCCATGGTCCTGTGTGATTATACTCACATGGTGAGTCAGCTGAGGCTGACTTTTTTTTCCTGTGTATTCTCTCCTCCTTCTTTCCTGTATATTCTCTCCTTCTCATGGCCTCTCCTTGTTGTTTCCTCATTAGGGTTGCTAGACTTCTTAAATGGCAGCTCAGGGCTCCCAGGAGCACAAAAGCAGAAGCTCCCAGGCCTTTTAAGGTCTGGGCCTGGAACTGTCCCAGTAGTATTTCCACTGCATTCTCTTGGTGAAAGCAAGCCACAGCCCTATCCCACATTTAAGGAGAGAGGACGACACAAGACAGAGGCTACTAGGATGTTAAGTTCAATGTCACAGACCCCCACAACCACCTCGGGCTGTCTTTCTCAGAGTCTGTTTATTCTCATTTTGCATATAGTTCTAACAAATTTAATTATTGATTTCTACATCTTAAAAAGCCCAGAAGTAATATATTTTGAGGGGGGGAAAAAGTGCTGCTTTAAGGAGGTATATGAACATCAATGGAAAAATGATAGCTGATAGTCATCAACAAGGAGGGAGACAGAGAAACCACAAAAGCAGGTATGACTCAGCACTCTGGGAAGCTTTCCACAGTGACCCATTCTATAGGATATTTATATTGCTGAAGCTCCCTTGTACCTAATTCAGCCAGCAGGTTTTAACTGTTTGGGTTTTTAAGCTTCAGGGTCAAAGTTTTGGGGTAAAAAATGCTTCATTCATTGAGACTGAGAGAGAGTAGCTTATAAATTGACACTGACCATAGACCTTGATTTTGTGTCCCCACCCAAATCTCATCTTGAATTATAGCTCCCATAATTCCCACATGTTGGTGGGAGGGACCCAGTGGGAGTTCATTGAATCACGGGGGCGATTTCCCCCATACTGTTCCTGTTGGTAGTGAGTAAGTCTCACAAGATCTGATCGTTTTATAAGAGGAAACCTTATAAAAGGTGGCTCTCATTTTCTTCGTTGTCTGTAAGATGTGCCTTTTGCCTTCTGCCATGATTGTGAGGCCTCCCCAGCCACATGGAACTGTGAGTCCATTAAACCTCTTTTTCTATATAAATTACCCAGTCTTGGGTATGTCTTTATCAGCAGCATAAAAATGGACTAATACAGATCTTTTAATCAAAGATGTGGTAACGAATCACAGAACCACCTGTGCTTTAGAAGAAATGATCCTTGGATTGCTTTTCAAGCAATGGAAGTTTATGATTGTCACATTGTCAATTGTGATTATATTCAAGGAAGTGTTATGGACTGAATGTTTGTGTCCCCCCAAATTCCATATGTGGAAGCCCTAACCTCCAGTGTGGCATATTGGAGATGGAGCCCCTAAGGAAGTAATTGAGGTTAAATGTAGTCATAGTGTGGGGCCCTGATCCCATAGGACTGGCATCCTTACAAGAACAGATACCAGAGAGCTTGCTCTGTCTCTGCACACACCCTTAGAAAAGGCTGTGTGAGGCCCCAGAAAGAAGGTGGCCATCTGTAAGCCCAGAAGAGAGCCCTCAATAGGAACCAGGTTGGCTAGAACGTTGATCTTGGACCCCCAACCTCCAGAACTGTAAGAAAATAAATTTCTGTTGTTTAAGCTACCTAGGCTGTAGCATTTTGATACAGCAAGCCTGAAGCTGAGACAGGAATATTACATACACTGGAGACTTGTGACCCCAAAGACTTTTGACCTGTTGAATAGAGCTCATCTTGTCTCTCTCCAGCTCATGCATGCATCCTCCCAGCTTGCAAGGGGGCCTTGCTTCTCTGGATTGCACTTTGATTTTCTAGTTTTAAGTGACAAAGGGAGAGTCTTCTAGGGATGTTAAAGTTACTCCAGTAATTCCAGGATATTTCCAGCTCCTTTTGAAATCTTATGTTTGTAATTCTGGGTCAAGTAATGTCCAAGCCAGTGATTACATTACTGGTAGGCATGTCTCTCATGCTGGGCCACGCCCTTCCATCCCATGTTCACGATGAGCACCAACGGTTCTCTGAGAGCCCAGAGCCAGTGGCTGCAACGTTGGGAAAATTCTTAAATGACCATCAGTGGTTTTGGCTCATGTTCCTACGATTGTGGGGTTCATATACCATCTCATTTTTAGAAATGTGTGTTTTTGTACTCCTGTAATTACTTTTTAATAAAGATATTTTGCCAGTCCTTAGCTCCACTCCAATAGCAAAGCAAAGGACAAGAACAAGTAAGGGCTGAAACATAGAGCGTGGAGGGTTTTGCTCAGGCCATGCTTTGCTGTGGGAGAATTTTGAAGGCGGGAGTGGAGCTGCCGTTTGTGGTTTGGTGCTGTGGTGCCTGTTAAAAGTGGCTTTAATGAGAGTGTAAGGTGCTGCACACTGAAGCCCTGTGTTTATTCAGCTGCCTCCTGCCAGCGGCTACAGCTGGGATGGCTTCCCTCGCACGGCGTCTGCCCACAGCCTTGCGCCCGGAGCCCAGAGGACTCACAGGAAAGGAGCTGGCAAAGGTGGAAGCTGGTTTTCATGGTCTCCTGAGGGCCCCTGGCCCCTGGGAGATGGGTCACACTCCCTGAATGCTGTGCTGTTGGTTTCCCTGGAGGATTCTTGCTGCAGGCCAGGTCCCGTATTCTCCACACTCACCACAAGTGGCTGGGTGTGACTTGACACGGTGTGAAAGTGGAGGGGCGCGAGCACTCAGGTGGGTGAACAGCCTGCGGGCCTCCTTTCCCTGGCTGCAAAGCCGCCACTCAACTCTGCTCCAGCCCAGGTTTCGGGGAGCCGGGATCCACTTGGGCAGGCCGGGAGCCTCAGACTCCAGACTTTTCATGGTGCGCTCCTTCCTGCTTACTCACGAGGAAGGCGAGGCAGTCCAGCATCCTGGGTGGAGTGGAGGGTGTTTCGAGTCCATATCTAAATCTTTTTCTTAGAGCACCCTAAGCAGGCTGCTGTCTTTGATCCCCATGCCTCTGCTGTTTATCTTGGTGTGATTCATCACTGTAATTTAAGACGTGGAGAGAGCAGAGTTCCCATCCCAGGCAAGGGGAGGCGCAGTGCAGGTTGGACTGTGTAAGGAAATGGCAGCATGGCGAGGTTTGTGCCGCGGCCTACAAGCAGGGCTGCATGCTCCCCAGGCAGACTGTGGCAGAGCCAAGCCCCTCACTTGTAGGGAAGCGGTGTCTCCTAGGTGCCCCAGTAGGGGAGGTCTGCCAGCATCGCTGTGCTGTGGGGAAGGCAGCCAGAGGGCTTCATCCATAACTGGCTCAGCTCCTCAGGAGGAGACCAGCAGTGTGTCTCTGCACTCAGAACTGCCACTGGGTCGTGGTGTTAAGCCCAGGAGGGGTGCATATGTGACAACCTTGTATTGCTTAGTTGCTGAGACCAGAAGATCCAGGTAATTGCATGAGCTATTAGCTACTTCTGGTTCTCACAAACTCCTCCCAGTGTTGATAGAGAATGGTGTCCTCCGGGCATGCTCTGGGTATAGTTTTATTTGTATTATTAGGGACTCATGGAGAAGTGCTCTGGGTGTCCTGCACACTGCACTTTGGAGATCATTCTGTGATTCCCAAGTCCTGCTGATTCCACTTCCTTGGCGCTCTGGGATTAGATATCCTAGGCTGCCAATCTGCATGTTCATCTTTCAGTGGGGATACCCTGCAGGGCTTGTCCACAGCTTGAATTTCAAACCAAAAGCCAGGTACGCTTTCCAAGCCTTCCGATATTGGTTCAAAGAATTTGGCTGCCGAAGCTTTTGTGTAGCTGAGGCACCAGCAGGCCGAGGCACGAGTGAATCCATGTGGCCCGAGGAAGAGCCTTCCCATGGGCCTCAGCAGCCACACAGAGCCTCTGATCTGTTTCCCTTTGCGGGATGGTCAGTCTCCTGTGTCTCAAGACCTCAAGCAGAAACGTGTGGATCTCCCCCTCTATCTTGAAAGTCCAACCAAGTCCAGGCCTTTGTTGTGCAGTTTAAACCAGACCTGTCAGTAAACATGAGCTAATTCCAGTTTTTGTCCCTCTTTGTCCTTCTCAAGTTCCAAGGTGATCATTGCCTGTTATCTATGGGACTTGTGTAAGCTAACTTCCCAAATGCAGCTGTGAGACAAACATTTTAATTAAAAGGCAGAAGGGCCAGGAGATATAAACACTCATGTGCCTGGTTGTCAGTGAAGGCCGGGTGGCGTTCAGCGTCCAGGGGCTAATTATATTCTCTTCTCTGGGACTCACACAAATATTGCCACAAATGTACCTGACTGTCAGACTGAAGTCATTTATCTCCAAGTGTGGGGAGCAGTGAAGCCCACACGTCCAGGTAGATTTAGCTCTTACGGACTCTTCTGGGAAGCGGCAGGTGGGTAAAACTGAAAGCATCAGCTATTGCACCCTAGCTGCAGGTTTTCACAGAAAGCTGAATCAACTTGTATTGGGGATTCTGCATTTTAGAGTTCTCTCAAAGACCTAGGTTTGGGCCCTAAAATGCAGCCACCAGAGCAGGCACACCTTAAAAAGTAGGTAATGAGTGGCCTTAGTGCCTGGGCAGCTGTCAGTACTGGCCTCCTTTGGTTGTCCCTGTCCACTGACCCTCCTTCCTCCCGTTCTCTCACGTTTGCATTCATCTGCAGCCTCCATTACCATTGACCAGCTTTGCCGCTTACCTGCCTCCACCCTTCCTTCCCTAAGTTCGAGTAGTTTCCTAAGTAGCTTCCCCTTAGTTTCCTAAGGCTGAAGTAATAGAGTATAGCACAAACTGGATGGCTTAAAAGTTCTCCTTCTCAGAAAGTACAAAATCTAGGTGTTGGCACAGCTGTGCTCCCCGCAAAGCCTCTAGGGAAGAATCCTTCCTTGCCTCTTCCTAGTTTCTGGTGGCTGCTGGCAACCCTTCGTGTTCCTTTATGGCTGTGTCACTCCAGTTTCTGCCCCCATCATTACATGACCTCCTCTCTGTGGGTCTCTGTATGTCCTCTTTTTTTTTTTTTTTTTTTTTTTGAGACAGAGTCTTACTCTTTCGCCCAGGCTGGAGTGCCTTAGTGCGATCTCAGCTCTCTGCAACCTCTGTCTCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTCCCTAGTAGCTGGGATTACAGATGCATGCCACCACACCCAGCTGCTTTTTGTATTTTTAGTAGAGATGGGGTTTCACTTTGTTGGTCAGGCTGGTCTTGAACTCCTGACCTCAAATGATCTGCCTGTGTCGGCCTCCCAAAGTGCTTGGGATTACAGGCATGAGCCAAAGAGGCCTGGCCACCTCTTTTCTTATACGGATACCAGAATGACTCTACCTTAACTAAACATAACATCTGCAAAGACCCTGTGTCCAAATAAGGTCACATTCTGAAGTTCTGGGTAGATGCAAATTTTGGGAGACACTATTTGCCACTACGGGTTTCTCCAAGTTGTGCTGTCCTGTGACTGAACACTGACCCTGTTGCTCTATTGTGGATGTTTGGGAGGATTAACACCAGCCTGTTTCTGCTCACACTGCTTTCTCTGCCTGGAATTTGCTTTACAGCCTGTTGTCCCTGGTGTGACCATCATTCATCTTGTAAGACTTAACAGCCCAGCTTTGCCCCCACTGTGGATTCCTGGCGGAAACACCGTCCCTCTTCTCCTGGTGTACATGTTCTGTCACGGTCTCTGGATCACTTGACTGTACTTATTGTTTGAGTGGTACTCACCACTAATTGGAGCATAAGCCCTTGAAGGTTTCATTTACATCCGGGTCTCAGTACTAGGGACTTAGCAGGTGCTCATAATTCATAACTGCTTTTGATGGAGTTGGAGAGGCTAGTTATTAAGATTTTTCTGAATGTAGCATCCTTAGCTGGCTTTCAGGATAGTGACTGCATGCTCTAAAAGGAGATCTTTGCAGATTTTACTCTATGATGAGAAAACTTTTTAGTACCTTTTTCTGTCAGCATGTACCTAGGTAATAAGAAGAATGACGTGACATGTATTTGGGGAATTAGCATACAAGAAGTACTTTGACATTTTCCACGTGTGAAGAAAGCTGTTTTTATTGACCTAGCTGAAGGGATCAAATTCATATTTGAAAAGATGTGGCTAAAACTTGAAAAGGACTTGTCCATGGGGGATGTCTTCATCCCTTCTCCCTGCTCAGGGAGAAGTTGCAGGCCATAAAGTGTGAGGGCATCAGCCTGGATGAGCTCTAAAGCTCAATGTTAATTTCGGATTTTAATAACATGTAAGAATACAAGTTTGATTGCAGAAGCCACAATCAAGTTCAAGGGAGGGTGAATGAATGGGAAGTAGAAGGGACCATGTGTACGTCTGTGTGTGTGTGTGCATGTATGCACACATTTTTTTAGGCTTTGGTTCCTTTAGCTATAAAATAAGGGAGTTAAAATGGGGGTGGTTTATGGTCACGATTGTTTCCTTCAAGTATCTAAAAGGCTTTCACCAGCAAGGGGGACTTGGGTTCAGAACAAGGCATTGGGTGAACCAGTGGTAGAACTGAAAGGTCGTTTCAGTGTGAAAGTGAACTTTCTTTTCAAGCAAAGGCTGGATGTCATTTGGGAGCTTGTAAAAAGAATTGTTATATGGCTGCCAGGCGCAATGGCTCACACCTGTAATCCTAGCACTTTGGTAGGCTGAAATGGATGGATCCCTTAAGGCCAGAAGTTTGAGCCAGCCTGAGCAACATGGTGAAACCCCGTCTCTACTAAAATACAAAAAATGAGCCAGGCGTGTGGCATGTGCCTGTAGTCCCAGCTACTCCGGTAGGCCCGAGGCACGAGAATTGCTTGAACCTGGGAAGCAGAGGTTGCAGTTAGCTGAGATTGCACCGCTGCACTCCATCCTGGACGACCAAAGGAGACTGTCTCAAAAATAAAAATAAAAATTGGCCAGGTTTGGTGACTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGGCAGGTGGATCATCTGAGGTCAGGAGTGCAAGACCAGCCTGGCCAACATGGCGAAACCCCATCTCCACTAAAAATCAGCCAGGCATTGGTGGCGTGCGCCTGTAATCCCAGCTACTTGGGATTTGAGGCAGGAGAATTGCTCGGACCTGGGAGACAGAGGTGCAGTGAGCTGAGATACGCCACTGCACTCCAGCTTGGGCGACAGAGCGAGACTCCATCCCTAAATAAATAAAATAATTAAAAAAAACAATTGGTATATGGATTGGGAGATTGGACTGATGGACTCTTGATTTCTCTGATTCTTGAATCTCTGCTTGGGTGAACACTAAGATTCCTTCCATTTCTAAAATTTTGAGGTTCAGTATAAATAGAAATGGGGTCATGCTTTTGTGGCTAGGAGCTGTGTGTATATTCTAGCCACCAA";
326
327        let ranges = symmetric_dust(read);
328        let expected_ranges = vec![
329            742..808,
330            3169..3223,
331            3406..3413,
332            3424..3431,
333            3437..3444,
334            3729..3764,
335            4729..4736,
336            5831..5862,
337            6449..6456,
338            7014..7031,
339            8194..8201,
340            12955..12963,
341            12971..13033,
342            13369..13376,
343            17841..17864,
344            19193..19207,
345            19221..19228,
346            19746..19763,
347            20037..20063
348        ];
349        assert_eq!(ranges, expected_ranges);
350    }
351}