1use std::borrow::Cow;
2
3use std::collections::HashMap;
4use std::fs::File;
5use std::io::{BufRead, BufReader};
6use std::path::{Path, PathBuf};
7
8use needletail::Sequence;
9use parquet::data_type::AsBytes;
10use petgraph::graph::DiGraph;
11use petgraph::visit::{EdgeRef, NodeIndexable, NodeRef};
12
13#[must_use]
32pub fn basename_without_extension(seq_url: &url::Url, extensions: &[&str]) -> String {
33 let mut basename = seq_url
34 .path_segments()
35 .map(|c| c.collect::<Vec<_>>())
36 .unwrap()
37 .last()
38 .unwrap()
39 .to_string();
40
41 let mut sorted_extensions = extensions.to_vec();
42 sorted_extensions.sort_by_key(|b| std::cmp::Reverse(b.len()));
43
44 for ext in sorted_extensions {
45 basename = basename.trim_end_matches(ext).to_string();
46 }
47
48 basename
49}
50
51#[must_use]
63pub fn read_fasta(paths: &Vec<PathBuf>) -> Vec<Vec<u8>> {
64 paths
65 .iter()
66 .map(|p| {
67 let reader = bio::io::fasta::Reader::from_file(p).expect("Failed to open file");
68 reader
69 .records()
70 .filter_map(|r| r.ok())
71 .map(|r| r.seq().to_vec())
72 .collect::<Vec<Vec<u8>>>()
73 })
74 .flatten()
75 .collect::<Vec<Vec<u8>>>()
76}
77
78#[must_use]
79pub fn default_hidden_progress_bar() -> indicatif::ProgressBar {
80 indicatif::ProgressBar::hidden()
81}
82
83pub fn default_bounded_progress_bar(
101 msg: impl Into<Cow<'static, str>>,
102 len: u64,
103) -> indicatif::ProgressBar {
104 let progress_bar_style = indicatif::ProgressStyle::default_bar()
105 .template(
106 "{msg} ... [{elapsed_precise}] [{bar:40.white/white}] {human_pos}/{human_len} ({eta})",
107 )
108 .unwrap()
109 .progress_chars("#>-");
110
111 let progress_bar = indicatif::ProgressBar::new(len);
112 progress_bar.set_style(progress_bar_style);
113 progress_bar.set_message(msg);
114
115 progress_bar
116}
117
118pub fn default_unbounded_progress_bar(msg: impl Into<Cow<'static, str>>) -> indicatif::ProgressBar {
133 let progress_bar_style = indicatif::ProgressStyle::default_bar()
134 .template("{msg} ... [{elapsed_precise}] {human_pos}")
135 .unwrap()
136 .progress_chars("#>-");
137
138 let progress_bar = indicatif::ProgressBar::new_spinner();
139 progress_bar.set_style(progress_bar_style);
140 progress_bar.set_message(msg);
141
142 progress_bar
143}
144
145#[inline(always)]
155#[must_use]
156pub fn canonicalize_kmer(kmer: &[u8]) -> Vec<u8> {
157 let rc_kmer = kmer.reverse_complement();
158 if kmer < rc_kmer.as_bytes() {
159 kmer.to_vec()
160 } else {
161 rc_kmer.as_bytes().to_vec()
162 }
163}
164
165#[must_use]
166pub fn homopolymer_compressed(seq: &[u8]) -> Vec<u8> {
167 let mut compressed = Vec::new();
168 let mut prev = None;
169
170 for &base in seq {
171 if Some(base) != prev {
172 compressed.push(base);
173 }
174 prev = Some(base);
175 }
176
177 compressed
178}
179
180#[must_use]
181pub fn shannon_entropy(seq: &[u8]) -> f32 {
182 let mut freq = HashMap::new();
183 let len = seq.len() as f32;
184
185 for &base in seq {
186 *freq.entry(base).or_insert(0) += 1;
187 }
188
189 -freq
190 .values()
191 .map(|&count| {
192 let p = count as f32 / len;
193 p * p.log2()
194 })
195 .sum::<f32>()
196}
197
198#[must_use]
199pub fn gc_content(seq: &[u8]) -> f32 {
200 let gc_count = seq
201 .iter()
202 .filter(|&&base| base == b'G' || base == b'C')
203 .count();
204 gc_count as f32 / seq.len() as f32
205}
206
207pub fn write_gfa<W: std::io::Write>(
228 writer: &mut W,
229 graph: &DiGraph<String, f32>,
230) -> std::io::Result<()> {
231 writeln!(writer, "H\tVN:Z:1.0")?;
233
234 for (node_index, node_label) in graph.node_indices().zip(graph.node_weights()) {
236 writeln!(writer, "S\t{}\t{}", node_index.index(), node_label)?;
237 }
238
239 for edge in graph.edge_references() {
241 let (from, to) = (edge.source().index(), edge.target().index());
242 let weight = edge.weight();
243 writeln!(
244 writer,
245 "L\t{}\t+\t{}\t+\t0M\tRC:f:{}",
246 from,
247 to,
248 (100.0 * weight).round() as u8
249 )?;
250 }
251
252 Ok(())
253}
254
255pub fn read_gfa<P: AsRef<Path>>(path: P) -> std::io::Result<DiGraph<String, f32>> {
275 let file = File::open(path)?;
276 let reader = BufReader::new(file);
277 let mut graph = DiGraph::new();
278 let mut node_map = HashMap::new();
279
280 for line in reader.lines() {
281 let line = line?;
282 let fields: Vec<&str> = line.split('\t').collect();
283
284 match fields[0] {
285 "S" => {
286 let id = fields[1];
287 let sequence = fields[2].to_string();
288 let node_index = graph.add_node(sequence);
289 node_map.insert(id.to_string(), node_index);
290 }
291 "L" => {
292 if fields.len() < 6 {
293 continue; }
295 let from_id = fields[1];
296 let to_id = fields[3];
298 let weight = fields
301 .get(5)
302 .and_then(|s| s.split(':').last())
303 .and_then(|s| s.parse::<f32>().ok())
304 .unwrap_or(1.0);
305
306 if let (Some(&from), Some(&to)) = (node_map.get(from_id), node_map.get(to_id)) {
307 graph.add_edge(from, to, weight);
308 }
309 }
310 _ => {} }
312 }
313
314 Ok(graph)
315}
316
317#[cfg(test)]
318mod tests {
319 use sdust::symmetric_dust;
320
321 use super::*;
322
323 #[test]
324 fn test_canonicalize_kmer() {
325 let kmer1 = b"CGTA";
326 let kmer2 = b"TACG";
327 let kmer3 = b"AAAA";
328 let kmer4 = b"TTTT";
329
330 assert_eq!(canonicalize_kmer(kmer1), b"CGTA".to_vec());
332 assert_eq!(canonicalize_kmer(kmer2), b"CGTA".to_vec());
333
334 assert_eq!(canonicalize_kmer(kmer3), b"AAAA".to_vec());
336 assert_eq!(canonicalize_kmer(kmer4), b"AAAA".to_vec());
337 }
338
339 #[test]
340 fn test_dust() {
341 let read = b"TGGCAGCCATAGGTTTTCCCTGGAGTTGTGGCATCTGGAACTACAGGGATGAGCATTTGAGTACATATTACAGTGAGGTGGCCACACTGTGACCCGCAGTTCTGCAGACTGGAAGGCACTGAATGCCAGGATTTTTGCAGAGTGTCACTATGAAGTCCTGACTTGGCTCAGAGACCTTCTTAGAGCAGTAATTCGGGACCAGTGGATTTCTGATAAAGTTATTCTAATTTTCTAATAATTGTTTTCTAATAAAAGCCATATGGCAGGTCCTGCTCCCTTGGTAGCATGACCAGTACCTGGCGCAGTGCTAGTGCTGAGCTGACAGGAAGTGCCTCACCTTCATCTCTCACTTGACAGTGGGTGGAAGGTTCTTGGCTCGGTATCCCTCAGTCATGACTGCACACTGTCCTGAGCTTTTCTCCCAACTTCATCCACTTCATACTATTTTAATAAAGCGGTGCTGTGTATTATAACATTGTGCAGCTGAGCATTACACTCATGGCTCCCATTATCAAGCCCCTGCTATATACAGGGCATTTCACAAAGAAGCAAACTTCCAAGCAGTCACTCAGCAACCTCCTCCTAGGAGCATTTGGGGAAGAGAATCTTGGGGCAAGTTTCCTTTACCACCTGCAGTCACCTGGGATGCTGGGAAAAATTTTGATTTCTGTTGTCTTCCCTTCCAGAAAATTATTTGAGAGTGGGGCCAACAAATCTGCACTTGAGTCCATACCTAGGATAGGTTTTTCTGTGCAGTTTTTTAAGTTTAAGAGGTTTTTAAAGTTTAAGACACACTGGTTAGGGTTTTGGGCTCTGGAGGATGAGAAACCTTGCTTGGGTTATCAGATAACAGATTCTTCTCTGGTTTCCCTCCGATGTTATCAGGGGAATTGTTGGTTGTTTCACATTTGGGTGCTCCTGGGCCTTTTAAGAGCCAGGCTGGGAGGGCTGGTGATGGCAACCCTGGCTGGCAACAGAGGCTGTTTCCACCCCTGGGTGGCTCCCCACCTGCTTTCTGCCCTGGTAGGGTTCAAGGCTCCGGGAATTGGCACTCAGTGAAAGAATTTTGATTTCCAGTGGAATTTGTGCTGTCACAAGATTTGACCCATGGGACTAGTGAATAGATAGATGGGTTAGGTGAGCATGTGACTTGGCTGGTGGCCGAGAGAGTGATAAATGTGAGAGTAGCTGGGGAAAATGGAAACGGATTAAGATAGAAGAGGGGCATTGTCCATCTGGCCGATGGCAAGGGCTGGTGGAGCAGCAGTTCTAGACTATTCTGAGGTTAGTTCAGAAACTGACCTAACAACGTGGGAAGTCTCTCCCAAATTGTTTATAGTTTCTCACAGTGGGTGCCTTTTGAAGTGATTGTATTTGACAGCCCAGAGTGTTGGGCACACAGCTTTGTGCTATCTAAGGTCACGGTCCAATTGTGATTCCTAGCAATAGCTTCAAGGCATATTTCATAGCTCTAATAGTTTTCAAGTATAAGGGTGTGAGAATGAGCTTTAAGAATATTTATGCCATGAAATCTTCCAATTGCTCTTCAACACGGGTGCACCATAGTAGGTGTGAATAGAAGTGGTGGCAACAGACCTGAATTCAGGTCTGCCACTGACTATAATACTAGCTTGAGAAGTAACTTGAACTCTGTGAGCCTCAGTTTCCTGTCTGTAGAATGAAGACAATGATACTGCCTTCATAGGATTATTATTAGGATTAAATGAAATATTATAGTGAGGCATTCAGCAAAGTGTTCTATAAATTGGGGTAGGATGTGAGGTAATTGGCATTGTTAGATGCGTCTCTGGGTAAACAACCAAATTTTCTGCTTATTTGGCTGTTTCCCTAGCTGCCTTGTTTAAAACAAAACACCTGAGTTGACCAGAACACCTCTGTTTTTAGAATCTAACTTTGCAGTTGTATTAGTCTCTTCTTGCATTGCCATAAAGAAATACCTGACACCTTCATAAAGAAATGAGGTTTAACTGGCTCACGGTCCTGCAGGCTTTACAGGAAGCATGGTACTGACATCTACTTGGCTTCTAGAGAGGCCTCAGGAAGCTTACAGTCATGGCAGAAAGTGAAGCAGTAGCAGGCACATCACATGGTGAAAGCGGGAGCAAGAGAGACAGTTGGGAGAGGAGGCGCCACACACCTTTTAAACAACCAGATCTCCCAAGAACTCACTCACTATCGTGAAGATAGCACCAAGCCATGAGGGGTCTGTCCCCATGATCCAGACACCTCTCACCAGGCCCCACCTCCAGCACTGGGGATTACAATTGAGCATGAGATTTGGGTAGGGACAACTATCTGAACTGTATCAGCAATAGAGTGTGATTATAAGTTATGCTGTAGGAATAGAATTGTTGTCACTGAAAGATTCCCTTGGCCATGGGAGCCTCCTGGCTCTATGAAGGATCAGCCAATGCTTATCCAGGGAGGTAATGATAAGGTCGAAGTTTGACAAGAAATCTACGTTTTCTTAAGCTAAGTAGTAGGTTAACAGAAGATATGTTGTGTGTTAATAGTTCTATTTACATCTCTTTCTCCAAGGTTATACACACTCTGCATCACTAAGTCAAGACACCATTCTTTGACACTGGCTAATAGTAATAGCAATCATAGCCACTGTGCATTAGCACTTACTCCACATTCCTTGTACTGAGCACTACTTACATTATGTTGGTGTTGTCATTGTCACCATTTCATACATAAAGAAACCAAGGTTTTCAGAGATTGAATAACTTGTGCAAGATCACACATCTGGTAGGGCAGATCCAAGATCTGTTTGTCTCCAAAATCTGCTTCTGTCCTGCCTGGGAGACCTTGGGAATGACGGCAAGTGGTTGTAGGAAGGAGGGCTGATGTCAAGGTGGCTGTGGGGGCAGGAGGCTGAGGGAACTCACTGACCCTTGAGGGACTCCTTAGGTGGGGGATTCTGGGTTTCCTGTTGGCAGCTGGAGGGGGAGTGCCAGTTCCCATAAGTGGTTATTGCCCAGGTTGTGACCTTGGCTTGGCCAGTGATTGGTTCATTTTGGAATTTCATGAGTGACCCCCAGGCAGGGTTCTTACAATCACGCTGGAAGACCACCCAGGAAGTTCCTGTTGGGGTAAAATGATGCAGCAGCCTGCTTTCCTCAGGAGGTCTGAACCCTCCCCATGTACACACACACACACACACAAACACACACACACCCACCCACACACCCTCCACCCCTCTTGGTGTCTTTGGCCTTTTTTCCTAGCTTGTTTGTTTCTATGGTGTCTTCAAGTTCAACTAGAACCTATGGGAATGACTTAGTTTTGAACCTGTAAGAATGAGAAGTAAACAATTCTTGTACTGACTTTGAATTTCCTTTCTTCTGTTGTCCAAAGGTGAAGGGTGACAATGTGTCCCAGATTTTTTTGGATATTCTACAAAAAAATAGATATTTTTTTGTAGAAAAAAGCTTATTCTACAGTGTTGTCCCAATTTTAAAAGCCCTAGAAAACTGGTTAAGGCAAATTATAACCAAATCAAATCACTAATTATTACAATAAAGTGTAACTAGCTACAAAAATCCTAAATTACAATTTTAGGCTTTGAGAAAATATCACTGATGATAGAGGAAGAGTGACAGTCTTTGTTTTGGGTCTTGGGATGGCAGAAAGAAAATATTTAGTAGGGAGTAAAGATCAGTGTACCCCTTGAAGTGTGGTGAAGATGGGTGGGTTTTGATGCTCTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTATGTAGAATTTCCCTAAATTCAAAATGATACTTACATTTTGATAAACCCAGAGAGTTAATTAACATGGCATAAATGTGCTAGCTCATTGTTGTTTTTCAGGGAAAGAACTAGAGAGAGACACAAGTCACCCCGGAATAAGACGGCAGAGGGTCAGAAAAGTCTGTCACCATTCAACCTCCCACTGGAGAGCCCCTGTTGGGAAATGATTCTACTCGGACAGAGGAAGTTCAGGTAAGGATCAAAGGTGGTCTGTAAGCACACTGCCACTTGGCCAAGACTCCTGTCTGTAAATGTTTTCCTTAGGTTCTCTTGGATTTTTGTCTTTATTTTTCTAGCATGCTAATTAGTTTATCTATGTCCCATGGTCTCTTGTTGGTCTAAGGACAGCCTGTCCTGCCCTTTGGTAGCGTGGGGATTCTTCTGAAGTATGATTGGTTGGCCTGCTTTACATGGTGTGGAAAAGTAGCCAGCAAGGTTGACGACAGGGTTGGGAAGGGAAAAGCTGAAGTCTCCCACGACTCATTTCAAAATGGAACAGTATAAGGGGGAGAAGGAAACTCAGAAAAACCTAAGAAGTTTAAAAAACATGGGGCAGCCCAGACTGACCATTACTACTAGAGCTATGCAGGAATTGAGAGGCCGCACGCTCAGATGCCTGGTAGAGAAACGTAAGTTAATTAAGGAGGCCCTGGGTCGAAAAGAGGGGGCAAAAATATATTAACCTAGCTTTGGGTTAACAGCAATCTGTGCAGTGCCTCAGTGTCAGTGCTGTAGTGTGGTGTGAAGGAGCCTATGGCTAACTGGAGAATGCATTTCCTCTGTAAAGGAAACAGCAGCTCCGCAGCTCCAGACACCTACTGTTGCTCAGGAATGCAGGGATTCATTGTTTGAGAAAAGCTGCAAATCAGGATTTTATGTGGAATCCTAACTTTACAGTATTTTTGAAATACTGATACTTTATTAATTTTTTTGAACCATTGAGTGGGTTCTTCTCCAGGTTCCAACTGTCTGGCAGCCCACCTGATTTAAATAAACATCTGAGCTATACACAGAAACATGTCTGCATACCCTCTGCACATCCTGAAGTATATATACACATGTCCAGCCTTGCCCCTCATAAACAAAGTGGTGTATGATACACAGCTGTAAAGATAGATATAGGACTATAGATAAGCATACATCTGTACATACCTGTGCATACACACAGGTAAGCATTTATAATCAAATAGGTGGACTGAAACTGGAATTCCTCAGAGTACACAAGGTGTTCTTGGGACACCAAAACTACAATTGTGGGGTTGAACGTGGGATTCATTGAGCAATGAGCAAATGCCTTTAGTGCTGCCTGCCTTGGCTCTGGATGGCTGATGGTCGGATGGGGCCAGTCTTAGGATTGGATCACCCTGGAGTACTTGAAGGGGTCAGTTTCCTCCTGGATGTGGGTTCAGAGGTGCCAGTGGCCTACAGCAAAGGCTCTTCTTTCTCTGCATCTCCTCTGCACCTCGTAGCTGAGAACACTTTGAGAAGCTCTTGGTGTTGCCCCAGGATGATCTGGTGTGAAAAGCATTGAGATGGGTGTTTGGAGGCTGTATTTTTTAGTAGCTCTGTTACCTTGAGCAGTCACAGCCTTTGTAGGCCTCAATTTCTTTATTGAAAATCTAGGGTTTTGATGAAAGCATCTTAGGTGCTTTTTCTTCTAAGAACCTGAAGCTTAACAGGATCCTTTGTGTATCTACATGTTTTAGGCATACATGTGCACCCCAGGAAATTCTCTCATGCCCTTTCTAGTCAATCTCTGCCCCACCCTCACCTCTCCAAGGCAACCACTGTGTTGATTTCTATCACTGTAGATCAATTTTGCCTGTTTTTGAATTTAGTATAATTAGAATCATATGGTCCATCTCCTGGCCCCCACCCACCCGCCCTGCTTAGCATAATGATTTTGAGATTTATCCATGTTTTGGTATGGTTTCAACAGCTTATTCTTTTTATTTTTGCTGAGTAGTATTCCATTGTATCAGTCTACCACAATTTGTTATCCATTCTCCTAGTGGATGGACATTTGGGTTTTTTTTGTTGTTGTTTGTTTGTTTTTTGAGGCAGAGTCTTGTTCTGTCGCCCAGGCTGGAGTGCAGCGCATGATATCACCTCACTGCAGCCTCTACCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAATAGCCGGGGATTACAGGCACGCACCACCACGCCCAGCTAATTTTTGTATTTTCAGTAGAGATGGGCTTTCACCATGTTGGCCAGGCTTGGTCTCGAACTCCTGACCTCAGGTGATTTGCCTGCCTTGGCCTCCCAAAGTGCTAGGATTACAGGCGTGAGCCACCGTGCCTGGCTGGATGTATATTTTTATTAATTTTTGGATAAAACCTGAGAGTGGAATTGCTGGGCCATATAGCTAAGTGTATATTTAGATTTATATGAAACCGCCAGAGTGGTTTTCCAGAGGCACTGTACCACGGTCCACTTCCACCAGCAGTGTTGGAGAGTCCTGGCTGCTTCTGGCCATCGTCTGAAATAGGAATTTCTCTCACTGTAGGTGATACTTCTGACTTTGCAAGTTGAAGGATTATTAGTTTATGGGATTGAGACCTTCACCACCACCACTTCTTACCATAGCCCATACATTTCATAAATCATGGTTTTTTTGGTCATTACTAGATTCGGAGTTATTTGATGATGAGCGATGTCTGTCTTGCTGATTTAGCTACTAACTGAAACTAGCTTTTTCTAAGTTGGTGTCCTAATTTCACCCCCTTTGCCACTGCATCTGACTGTTTTCTTTCCGAGTGAAAGGATACATACAAATTTCAGAGGCAGAAACCTCTTTGGCCTCCTGTGTCTTTTCAGCGCCTTGCTCTTATTGCTTCATTATTGTTGCCAGTTGGTTTTTAAACAACAAAATCCTTTAAAATTCTATCAACTGGGTTTTGCTAAGTGAATAGACTAATTGCTTTAACTAGCAACGGCCTAGAAGTTTAAAAAGAGAGGAAGCTAGAAAGTAAAAGATAACATTTTAATAATCCTGGTTGTTTCTATGCCCTTGATGTTTAGTTCCTCGTGAAAACATGTTTTAGAAAGAATTTTTAAGCCAATCTGGCCATACACGGATTCCTGGATTTGCTTAGCTTGGTCCATGAGAAATATTGTTAAAGAGTGCTTGACACTGATGCTTGTTAAGTGGATCTTGTGAACATCATAAGGAGATTTTTTTTTTTTTTTTTGAGACGGAGTCTCACTCTGTAGCCCAGGCTGGAGTACAGTGGCACGATCTCAGCTCACTGCAACCTCCGACTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCATGTGCCACCACGCCTTGCTAATTTTTGTATTTTTAGTAGAGACGGGGGTTTCACCATGGTTGGCCAGGATGTTCTTGATCTCCTGACCTCCTGATCCGTCCGCCTCGGCCTTCCAAAGTACGGGGATTACAGGTGTGAGCCACCGCGCCCCAGTCCATAAAGAGATTTTAAAATGTGGGTCCTAGCTACAGGTAAGCTTGGGTTTGTGTAGTGGTGTAAGTTCCCTTGCTACGCCCTTTGCTCTTCTGGGCTGCTAGAGGGTGTAGTAATACTCCCACCTCCAAAAGTTGGACTTCGTAAGCCTTTATAACCCAGCGTGAATTGGAAAGAAGATGCAGGAGGTTTATCTCTATAGATGAGCTCTCACCAAGATTAGTCTAATACCTGGGTTGCGCATTGCAGGGCAAACAGCTCCAGGCCCTCAGAGCTGCTCAAGGCTTTTCAACCCAGGGGATGATAATCAATGTTATGTCAATGAATCAGCCAAACAGACAGAAAGATCACATTATGTTTTCTCTGTTTGAAAGGTAAATACCTCATACATTTTGAAAATTTCAATGAAAATCGTTTGAGTTAAGAAGTTCTAATATTTAAAGAGTTAAGCCTTTCATTTTCTGGAAGCCTTTGTGAATAGGGCTGGGTAGATGCAGCGGGCCCTGCATGTTCACTGCCCTTTGTAGCTTTTACAAATGACCTGTGTCATGTCATCCTCACTGTCTTCTCCCCACCAGGATGACAACTGGGGAGAGACCACCACGGCCATCACAGGCACCTCGGAGCACAGCATATCCCAAGAGGACATTGCCAGGATCAGCAAGGACATGGAGGACAGCGTGGGGCTGGATTGCAAACGCTACCTGGGCCTCACCGTCGCCTCTTTTCTTGGACTTCTAGTTTTCCTCACCCCTATTGCCTTCATCCTTTTACCTCCGATCCTGTGGAGGATGAGCTGGAGCCTTGTGGCACAATTTGTGAGGGGCTCTTTATCTCCATGGCATTCAAACTCCTCATTCTGCTCATAGGGACCTGGGCACCTTTTTTTCCGCAAGCGGAGAGCTGACATGCCACGGGTGTTTGTGTTTCGTGCCCTTTTGTTGGTCCTCATCTTTCTCTTTGTGGTTTCCTATTGGCTTTTTTACGGGGTCCGCATTTTGGACTCTCGGGACCGGAATTACCAGGGCATTGTGCAATATGCAGTCTCCCTTGTGGATGCCCTCCTCTTCATCCATTACCTGGCATCGTCCTGCTGGAGCTCAGGCCAGCTGCAGCCCATGTTCACGCTGCAGGTGGTCCGCTTCCACCGATGGCGAGTCCCGCTTCTACAGCCTGGACACCTGAGGTAAGAGGCAACATCCAGGAGGCAGAAAGGATGGCTGATGTCTTGCTGGGAGACAGCTGCTCTGTAGCACGTGAGGGGTGGTGACAGATGCCAAGAGCTAGGACCAGAGTCTGACTCTTTTCTGGTTTTGGGGAGGAGATGCGAGGGTGGGGAGGGTTGTCCATGTTCATTGAGTTTCTGGACTTCTAGATGGTGCGGGGCAGTTGCTGGCTCTCACCCAGGTTGAGATTTTGCTGGGCTTGTTCTCAAAGTTATTGGCAGCTCCCAAAAATGATGGAGAAAGGAGATGCATAGTGATGGCTGCCTTCTTTGACTCTGAAATTGGCCAATGGACAACAGATAAAGTGACCAGCAGCTCCATTTTGTCCCAAATGTGACATCTGGTTTACCATGTTGTCCCAGTGGAATAATGAATTGTTCCTTTTTTCCCACTCTCAGAGGCCTGGTTTGGGCAGTAAATTATATGGTCATCCGAGGGACCCTTCCAATAAAGAATCAAGTGCAGGTTAGAGACTCCAAATGTGTAATCCTTGAGTGTTGTGAAAATGTATGCCGTGAGAAAAAGTTAGAAGTCAGTTGGGTTGTCATACTTACATCTTTGCATAAAATCTCATTATTTTGTGGTTAAATAAGAGTGATTACCATCATTTTATTTGCTTCAAGGTAAGCACTTTATATATAGATTGTGTATTTAGTCTTCATAGAACCCGTGACCTAGGTATTATTAATCCCTGTGTCACAGATGAAGAAACTAGGGCTTAGGGGATTTAAGTAATTTGCACAAAAACATATGGCTAGCCTCATTTAGGATTCACTCAGATGTCATGAGGCCAGGGCTGAGTGAATGCCCCCATAATGGCATCTCTCACTTTGTGGTTAGTGGCCTATTTTTCCATCTGTTTTCTTCCACAGACTATGAACTCCCTGAGGCCAGGGGCCACCCTTATACCTCATTACATCCTCAGTGCCTGGCATGGAGCATGGCTTGCACTGAGATGTTCTCTGGGTGAATGCAGAGCCTGGGACATTTGACTTCAAAGCCTTTACCCTCTCCCAGGCTCTCTGCCTCCTTAGGCAGTATATGCTGATGTGTGTAGCCTGCTTGGGGCAGGGTAGGCACTTAGTTCATTGCAGCTATTACTGCTGTGATCATGTAGCTGGCAGAGCAGCCAGAATCAGCAAGGGCACACCTTAGTGGGTATCAGAACAATCGGCTTTGTCATAGATTTGGCTGGGCTCCAGGAAGGTGGCTCAGCCTGTATTTGGAGTCAGGCCATGCTGCCAAACCATCTTCATGTTGGTGTGTACCCCCTCCTCCATTCCTCTGGCTTGGCTTGTGCTACGAGAACGGGATGATCTAGCGTTCAAGGTTGCTGCCACCCTAACTGATCCTTGGTGGAAACTGGTGTCCAAGTCACATGTCTGTGCACCAAAAATCTGGGGTTTAGAGTCCTTTCACAGATGCCTGTAGGGCTCTGAAGACAAGTAGGTCACCGCTTTGCTGCATATTCATCTCAGAAGGCTTTCTTTTCCCATGTTTTGCATCAGGGAATGACCAGCAGTTTTGTGTTAAACATCTGCTGTGTGCAGAGCCCTTGGACACACCAGGCTGGCTGCCTTCAGAGCTCTATCTCAGCACCTGTGGCACTCACAGTCACTTGGAAAGAGACCAGTGCACCGCTGTCTGGTGGACAGGTTTCCAGGAAACAGGCCTGGGGGTATAGGTGATAGGAACACAGGAGGACAGAGAATTTCAGATTGTGGCAGCAATAAAGCCGAGCAGGGAGACAGTCTGTCTCAGAACAGGTTTTGCTGCAGTTAAAGTGGTAGAGAAAATCCGGCTGTGGTCTCAGTGGAGATGAATGATATTTGGAACTCTGTATATGTAAGTAGCCAAGACACTTGGCCAGGAGTGAGGTCTATGGTGGTTTTGTTTTTTGGCCCTTAGCCCTAGTTGGTGTGAATTCCACCTGTGTAGGTGGGAAAGGGCAGGGCATCTTCTCACCATAGGTCATGCAGGGTGGTGGGACCGACTTACCCCCATGGGCTCCCACATCGCTCCTCCCTGTACGACTGGTTGAGCTGCACACTGCATCTGAGTGGGAGTGGAGAGGGGACAAACCAAACAGCCCGAGGAAAGTATGCCTGTGGCATGTTCAGGAAAGCATGATTAGCAGCAGGCCCTCGCCTCCCACCACACAGCTCTGCTGGTCAGGGCAGAGCTGGATGGGAGAAGCCAGACTGATTGTGCTGCATGGCTCCCAGGCTTGCTGCAAACCTTTCAGTCTGCTCTTACCATGACCAACAACTGTCCAGGCTTTTAAAAAACTCAAGTCAGTCACCCCAGCTCCCCAGGGAGAACTGAAAGGTGGCAAGTGCCCATCTGCCCTGGGGAGAGCGTTTTGAGGTTGGTCCCCAGCCTCATCCTTTCGGCTTCTTTTTAGGACCATTGGTGTTCCTCCTCTCCCTGCCTTTAATAAGGCCCCCTTTGTCCCTCTCGTGGAGAGCCTGAGTTAGGAGGTGGAAAGAATGGCTGGGGAAAGAGGGACATCTTTACTGACAAATGGAGCCCTCAGGGAGAGCCAGATGCCCAAGTGTCAGCCAGTCTGCCAGAAGCTGGAGCAGGCTTGGCACCTTTCCTCCTGGCATTGTGTGGGCCTGGTCACCTGCCGATCCTTGGGCTAAATCTGGTCTGAACCCAGCAGTGGCTGGAAGAGTTACTAGGCCAGAAATACAACTTCTAAGGCCTTTTGTAAGTGTAGAAACAGACAGGAGGGAAGAGGGAGCGGGAATAGACAAAGCAAGCCTCGGAAATCAGAATAGCAGGTCTCCAATTAGACCCAGCAGAATCACAGGCTGTTGGCTCTCCCTTTATGTAAAGCCTTCACCGTGGCAGCACCCTATTGGGCTTAGGTGCCAAGCGATGGTGAGTTCTTTTTTATGTTTTCAAAGATGATTTTATCGAATTGACTGAGCTATTTTTGAGAGTTGTCTAAAGAATGTCTACTCTTTAGTTTCTTAAAGAAAATAGGCTTCTCATTAGTTCATAAAAGGTGCTTGCTGTGGCCCTGCTTGTTGGCAGGAATGAAGTTTTGGGCTTATTTGAAAACTTTCAAAAATGTAAAAAGTTGTTGCAGAAAGTAAGATACCATAAATAGATTGAGATACTTCCTAACCTCTGCCCAGTGCCCTAGGAGTTATGAAAAGCTTTTCATAGGTTTGGACTCATTTACCCCTCCTTGCTGGCCCTGTCAGAGGTCAGAGCAGTGGGGTAGAGGTGTCCCCTCTTACAGTTGAGGTCCCCAACCCCCAGGGTAAAGGGACCTGCTCAAGGTCACAGGGAATCAGCACCTTCCTGTGCATCACACCGCTTACCCCACCGCACTTTCTACAGCGTCCTGGTGTCTCACACAGTCGCTTTGTCATTTTCCATACACACCTTGCTCGTCACTTTTCTTGGCCCCGTTCTCACAATAAGTTGCTAACTTTTCCAGGATGTTACCAGAGACTAATGACTGTTGATATGACTTTATTTGAGGAGAAACCCAGAAGAATAGAAGAGCCTAAAAATTGGCATTCAATTATCTTAATCATTTTTCAGTTTTGAAACCTCTAAAGGGAAATAAGTGTGAATACTGGTGCACAGGCACTAGTGTAATTGACTGGTTGAATGTGAAATGGTAGAACAACAGAGACAGATAAAGAAAACCTTAACAATAAACAATTACATAGTCTTACTGGGAGCCAGGTACCTTCTGATCTTTTAATGTGCAACTCAGTTTTCACAACAATCATGTGAGGTAGGTTCTGGTAGCCTCCTTTTCAGATGAGCAAACCGAGGCGTGGGAGAGTTAGGTAAGTCTAAGGCCCCACACTTTGTAATTGTGGGAACCAGGATTTGAGCCAGGCTCTCTGGTTCCAGAATGATCTTACCCATTTCACCATACTACCTCTGAATAGATAGTTGCATGTTCACGTCACTGCTTTAGGAACACATGAACAAACCCAGAAGCATTTTTTGAGTGTTTCCCATGTGCCAGCCTCGGTGCCAGATAATTTTTGTATACATTATCCTGTTTCACTTAACACAGAGCTTAGGGCTGGAACAGAGAGAATGTGCAGGATTGTCAAGATGGCTTGCACTCTGAGGTCCATTCCTTAGCTCCACTGGTTATTTTATTCACTCAGTTGACAAAGCCTGGTCCTTAAAGTCATAGCAGTAGGTTTGAAGCCTTTGGTCAGACCTTTTTAACTTCCTATACTTCACATGTCTCAAAGAAAATATATTTTATCTATTGTTGTGTAACAACTACCTCACCACTTAGTAGTTTAAAACAACAATTCAGTATTTCTCACAATTCTGCAGCATGGATTGGGCTCAGCCAGGAGGTTCTTTAGCTGATCCTGCCCATGGTCCTGTGTGATTATACTCACATGGTGAGTCAGCTGAGGCTGACTTTTTTTTCCTGTGTATTCTCTCCTCCTTCTTTCCTGTATATTCTCTCCTTCTCATGGCCTCTCCTTGTTGTTTCCTCATTAGGGTTGCTAGACTTCTTAAATGGCAGCTCAGGGCTCCCAGGAGCACAAAAGCAGAAGCTCCCAGGCCTTTTAAGGTCTGGGCCTGGAACTGTCCCAGTAGTATTTCCACTGCATTCTCTTGGTGAAAGCAAGCCACAGCCCTATCCCACATTTAAGGAGAGAGGACGACACAAGACAGAGGCTACTAGGATGTTAAGTTCAATGTCACAGACCCCCACAACCACCTCGGGCTGTCTTTCTCAGAGTCTGTTTATTCTCATTTTGCATATAGTTCTAACAAATTTAATTATTGATTTCTACATCTTAAAAAGCCCAGAAGTAATATATTTTGAGGGGGGGAAAAAGTGCTGCTTTAAGGAGGTATATGAACATCAATGGAAAAATGATAGCTGATAGTCATCAACAAGGAGGGAGACAGAGAAACCACAAAAGCAGGTATGACTCAGCACTCTGGGAAGCTTTCCACAGTGACCCATTCTATAGGATATTTATATTGCTGAAGCTCCCTTGTACCTAATTCAGCCAGCAGGTTTTAACTGTTTGGGTTTTTAAGCTTCAGGGTCAAAGTTTTGGGGTAAAAAATGCTTCATTCATTGAGACTGAGAGAGAGTAGCTTATAAATTGACACTGACCATAGACCTTGATTTTGTGTCCCCACCCAAATCTCATCTTGAATTATAGCTCCCATAATTCCCACATGTTGGTGGGAGGGACCCAGTGGGAGTTCATTGAATCACGGGGGCGATTTCCCCCATACTGTTCCTGTTGGTAGTGAGTAAGTCTCACAAGATCTGATCGTTTTATAAGAGGAAACCTTATAAAAGGTGGCTCTCATTTTCTTCGTTGTCTGTAAGATGTGCCTTTTGCCTTCTGCCATGATTGTGAGGCCTCCCCAGCCACATGGAACTGTGAGTCCATTAAACCTCTTTTTCTATATAAATTACCCAGTCTTGGGTATGTCTTTATCAGCAGCATAAAAATGGACTAATACAGATCTTTTAATCAAAGATGTGGTAACGAATCACAGAACCACCTGTGCTTTAGAAGAAATGATCCTTGGATTGCTTTTCAAGCAATGGAAGTTTATGATTGTCACATTGTCAATTGTGATTATATTCAAGGAAGTGTTATGGACTGAATGTTTGTGTCCCCCCAAATTCCATATGTGGAAGCCCTAACCTCCAGTGTGGCATATTGGAGATGGAGCCCCTAAGGAAGTAATTGAGGTTAAATGTAGTCATAGTGTGGGGCCCTGATCCCATAGGACTGGCATCCTTACAAGAACAGATACCAGAGAGCTTGCTCTGTCTCTGCACACACCCTTAGAAAAGGCTGTGTGAGGCCCCAGAAAGAAGGTGGCCATCTGTAAGCCCAGAAGAGAGCCCTCAATAGGAACCAGGTTGGCTAGAACGTTGATCTTGGACCCCCAACCTCCAGAACTGTAAGAAAATAAATTTCTGTTGTTTAAGCTACCTAGGCTGTAGCATTTTGATACAGCAAGCCTGAAGCTGAGACAGGAATATTACATACACTGGAGACTTGTGACCCCAAAGACTTTTGACCTGTTGAATAGAGCTCATCTTGTCTCTCTCCAGCTCATGCATGCATCCTCCCAGCTTGCAAGGGGGCCTTGCTTCTCTGGATTGCACTTTGATTTTCTAGTTTTAAGTGACAAAGGGAGAGTCTTCTAGGGATGTTAAAGTTACTCCAGTAATTCCAGGATATTTCCAGCTCCTTTTGAAATCTTATGTTTGTAATTCTGGGTCAAGTAATGTCCAAGCCAGTGATTACATTACTGGTAGGCATGTCTCTCATGCTGGGCCACGCCCTTCCATCCCATGTTCACGATGAGCACCAACGGTTCTCTGAGAGCCCAGAGCCAGTGGCTGCAACGTTGGGAAAATTCTTAAATGACCATCAGTGGTTTTGGCTCATGTTCCTACGATTGTGGGGTTCATATACCATCTCATTTTTAGAAATGTGTGTTTTTGTACTCCTGTAATTACTTTTTAATAAAGATATTTTGCCAGTCCTTAGCTCCACTCCAATAGCAAAGCAAAGGACAAGAACAAGTAAGGGCTGAAACATAGAGCGTGGAGGGTTTTGCTCAGGCCATGCTTTGCTGTGGGAGAATTTTGAAGGCGGGAGTGGAGCTGCCGTTTGTGGTTTGGTGCTGTGGTGCCTGTTAAAAGTGGCTTTAATGAGAGTGTAAGGTGCTGCACACTGAAGCCCTGTGTTTATTCAGCTGCCTCCTGCCAGCGGCTACAGCTGGGATGGCTTCCCTCGCACGGCGTCTGCCCACAGCCTTGCGCCCGGAGCCCAGAGGACTCACAGGAAAGGAGCTGGCAAAGGTGGAAGCTGGTTTTCATGGTCTCCTGAGGGCCCCTGGCCCCTGGGAGATGGGTCACACTCCCTGAATGCTGTGCTGTTGGTTTCCCTGGAGGATTCTTGCTGCAGGCCAGGTCCCGTATTCTCCACACTCACCACAAGTGGCTGGGTGTGACTTGACACGGTGTGAAAGTGGAGGGGCGCGAGCACTCAGGTGGGTGAACAGCCTGCGGGCCTCCTTTCCCTGGCTGCAAAGCCGCCACTCAACTCTGCTCCAGCCCAGGTTTCGGGGAGCCGGGATCCACTTGGGCAGGCCGGGAGCCTCAGACTCCAGACTTTTCATGGTGCGCTCCTTCCTGCTTACTCACGAGGAAGGCGAGGCAGTCCAGCATCCTGGGTGGAGTGGAGGGTGTTTCGAGTCCATATCTAAATCTTTTTCTTAGAGCACCCTAAGCAGGCTGCTGTCTTTGATCCCCATGCCTCTGCTGTTTATCTTGGTGTGATTCATCACTGTAATTTAAGACGTGGAGAGAGCAGAGTTCCCATCCCAGGCAAGGGGAGGCGCAGTGCAGGTTGGACTGTGTAAGGAAATGGCAGCATGGCGAGGTTTGTGCCGCGGCCTACAAGCAGGGCTGCATGCTCCCCAGGCAGACTGTGGCAGAGCCAAGCCCCTCACTTGTAGGGAAGCGGTGTCTCCTAGGTGCCCCAGTAGGGGAGGTCTGCCAGCATCGCTGTGCTGTGGGGAAGGCAGCCAGAGGGCTTCATCCATAACTGGCTCAGCTCCTCAGGAGGAGACCAGCAGTGTGTCTCTGCACTCAGAACTGCCACTGGGTCGTGGTGTTAAGCCCAGGAGGGGTGCATATGTGACAACCTTGTATTGCTTAGTTGCTGAGACCAGAAGATCCAGGTAATTGCATGAGCTATTAGCTACTTCTGGTTCTCACAAACTCCTCCCAGTGTTGATAGAGAATGGTGTCCTCCGGGCATGCTCTGGGTATAGTTTTATTTGTATTATTAGGGACTCATGGAGAAGTGCTCTGGGTGTCCTGCACACTGCACTTTGGAGATCATTCTGTGATTCCCAAGTCCTGCTGATTCCACTTCCTTGGCGCTCTGGGATTAGATATCCTAGGCTGCCAATCTGCATGTTCATCTTTCAGTGGGGATACCCTGCAGGGCTTGTCCACAGCTTGAATTTCAAACCAAAAGCCAGGTACGCTTTCCAAGCCTTCCGATATTGGTTCAAAGAATTTGGCTGCCGAAGCTTTTGTGTAGCTGAGGCACCAGCAGGCCGAGGCACGAGTGAATCCATGTGGCCCGAGGAAGAGCCTTCCCATGGGCCTCAGCAGCCACACAGAGCCTCTGATCTGTTTCCCTTTGCGGGATGGTCAGTCTCCTGTGTCTCAAGACCTCAAGCAGAAACGTGTGGATCTCCCCCTCTATCTTGAAAGTCCAACCAAGTCCAGGCCTTTGTTGTGCAGTTTAAACCAGACCTGTCAGTAAACATGAGCTAATTCCAGTTTTTGTCCCTCTTTGTCCTTCTCAAGTTCCAAGGTGATCATTGCCTGTTATCTATGGGACTTGTGTAAGCTAACTTCCCAAATGCAGCTGTGAGACAAACATTTTAATTAAAAGGCAGAAGGGCCAGGAGATATAAACACTCATGTGCCTGGTTGTCAGTGAAGGCCGGGTGGCGTTCAGCGTCCAGGGGCTAATTATATTCTCTTCTCTGGGACTCACACAAATATTGCCACAAATGTACCTGACTGTCAGACTGAAGTCATTTATCTCCAAGTGTGGGGAGCAGTGAAGCCCACACGTCCAGGTAGATTTAGCTCTTACGGACTCTTCTGGGAAGCGGCAGGTGGGTAAAACTGAAAGCATCAGCTATTGCACCCTAGCTGCAGGTTTTCACAGAAAGCTGAATCAACTTGTATTGGGGATTCTGCATTTTAGAGTTCTCTCAAAGACCTAGGTTTGGGCCCTAAAATGCAGCCACCAGAGCAGGCACACCTTAAAAAGTAGGTAATGAGTGGCCTTAGTGCCTGGGCAGCTGTCAGTACTGGCCTCCTTTGGTTGTCCCTGTCCACTGACCCTCCTTCCTCCCGTTCTCTCACGTTTGCATTCATCTGCAGCCTCCATTACCATTGACCAGCTTTGCCGCTTACCTGCCTCCACCCTTCCTTCCCTAAGTTCGAGTAGTTTCCTAAGTAGCTTCCCCTTAGTTTCCTAAGGCTGAAGTAATAGAGTATAGCACAAACTGGATGGCTTAAAAGTTCTCCTTCTCAGAAAGTACAAAATCTAGGTGTTGGCACAGCTGTGCTCCCCGCAAAGCCTCTAGGGAAGAATCCTTCCTTGCCTCTTCCTAGTTTCTGGTGGCTGCTGGCAACCCTTCGTGTTCCTTTATGGCTGTGTCACTCCAGTTTCTGCCCCCATCATTACATGACCTCCTCTCTGTGGGTCTCTGTATGTCCTCTTTTTTTTTTTTTTTTTTTTTTTGAGACAGAGTCTTACTCTTTCGCCCAGGCTGGAGTGCCTTAGTGCGATCTCAGCTCTCTGCAACCTCTGTCTCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTCCCTAGTAGCTGGGATTACAGATGCATGCCACCACACCCAGCTGCTTTTTGTATTTTTAGTAGAGATGGGGTTTCACTTTGTTGGTCAGGCTGGTCTTGAACTCCTGACCTCAAATGATCTGCCTGTGTCGGCCTCCCAAAGTGCTTGGGATTACAGGCATGAGCCAAAGAGGCCTGGCCACCTCTTTTCTTATACGGATACCAGAATGACTCTACCTTAACTAAACATAACATCTGCAAAGACCCTGTGTCCAAATAAGGTCACATTCTGAAGTTCTGGGTAGATGCAAATTTTGGGAGACACTATTTGCCACTACGGGTTTCTCCAAGTTGTGCTGTCCTGTGACTGAACACTGACCCTGTTGCTCTATTGTGGATGTTTGGGAGGATTAACACCAGCCTGTTTCTGCTCACACTGCTTTCTCTGCCTGGAATTTGCTTTACAGCCTGTTGTCCCTGGTGTGACCATCATTCATCTTGTAAGACTTAACAGCCCAGCTTTGCCCCCACTGTGGATTCCTGGCGGAAACACCGTCCCTCTTCTCCTGGTGTACATGTTCTGTCACGGTCTCTGGATCACTTGACTGTACTTATTGTTTGAGTGGTACTCACCACTAATTGGAGCATAAGCCCTTGAAGGTTTCATTTACATCCGGGTCTCAGTACTAGGGACTTAGCAGGTGCTCATAATTCATAACTGCTTTTGATGGAGTTGGAGAGGCTAGTTATTAAGATTTTTCTGAATGTAGCATCCTTAGCTGGCTTTCAGGATAGTGACTGCATGCTCTAAAAGGAGATCTTTGCAGATTTTACTCTATGATGAGAAAACTTTTTAGTACCTTTTTCTGTCAGCATGTACCTAGGTAATAAGAAGAATGACGTGACATGTATTTGGGGAATTAGCATACAAGAAGTACTTTGACATTTTCCACGTGTGAAGAAAGCTGTTTTTATTGACCTAGCTGAAGGGATCAAATTCATATTTGAAAAGATGTGGCTAAAACTTGAAAAGGACTTGTCCATGGGGGATGTCTTCATCCCTTCTCCCTGCTCAGGGAGAAGTTGCAGGCCATAAAGTGTGAGGGCATCAGCCTGGATGAGCTCTAAAGCTCAATGTTAATTTCGGATTTTAATAACATGTAAGAATACAAGTTTGATTGCAGAAGCCACAATCAAGTTCAAGGGAGGGTGAATGAATGGGAAGTAGAAGGGACCATGTGTACGTCTGTGTGTGTGTGTGCATGTATGCACACATTTTTTTAGGCTTTGGTTCCTTTAGCTATAAAATAAGGGAGTTAAAATGGGGGTGGTTTATGGTCACGATTGTTTCCTTCAAGTATCTAAAAGGCTTTCACCAGCAAGGGGGACTTGGGTTCAGAACAAGGCATTGGGTGAACCAGTGGTAGAACTGAAAGGTCGTTTCAGTGTGAAAGTGAACTTTCTTTTCAAGCAAAGGCTGGATGTCATTTGGGAGCTTGTAAAAAGAATTGTTATATGGCTGCCAGGCGCAATGGCTCACACCTGTAATCCTAGCACTTTGGTAGGCTGAAATGGATGGATCCCTTAAGGCCAGAAGTTTGAGCCAGCCTGAGCAACATGGTGAAACCCCGTCTCTACTAAAATACAAAAAATGAGCCAGGCGTGTGGCATGTGCCTGTAGTCCCAGCTACTCCGGTAGGCCCGAGGCACGAGAATTGCTTGAACCTGGGAAGCAGAGGTTGCAGTTAGCTGAGATTGCACCGCTGCACTCCATCCTGGACGACCAAAGGAGACTGTCTCAAAAATAAAAATAAAAATTGGCCAGGTTTGGTGACTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGGCAGGTGGATCATCTGAGGTCAGGAGTGCAAGACCAGCCTGGCCAACATGGCGAAACCCCATCTCCACTAAAAATCAGCCAGGCATTGGTGGCGTGCGCCTGTAATCCCAGCTACTTGGGATTTGAGGCAGGAGAATTGCTCGGACCTGGGAGACAGAGGTGCAGTGAGCTGAGATACGCCACTGCACTCCAGCTTGGGCGACAGAGCGAGACTCCATCCCTAAATAAATAAAATAATTAAAAAAAACAATTGGTATATGGATTGGGAGATTGGACTGATGGACTCTTGATTTCTCTGATTCTTGAATCTCTGCTTGGGTGAACACTAAGATTCCTTCCATTTCTAAAATTTTGAGGTTCAGTATAAATAGAAATGGGGTCATGCTTTTGTGGCTAGGAGCTGTGTGTATATTCTAGCCACCAA";
342
343 let ranges = symmetric_dust(read);
344 let expected_ranges = vec![
345 742..808,
346 3169..3223,
347 3406..3413,
348 3424..3431,
349 3437..3444,
350 3729..3764,
351 4729..4736,
352 5831..5862,
353 6449..6456,
354 7014..7031,
355 8194..8201,
356 12955..12963,
357 12971..13033,
358 13369..13376,
359 17841..17864,
360 19193..19207,
361 19221..19228,
362 19746..19763,
363 20037..20063,
364 ];
365 assert_eq!(ranges, expected_ranges);
366 }
367}