1use std::borrow::Cow;
2
3use std::collections::HashMap;
4use std::fs::File;
5use std::io::{BufRead, BufReader};
6use std::path::{Path, PathBuf};
7
8use needletail::Sequence;
9use parquet::data_type::AsBytes;
10use petgraph::graph::DiGraph;
11use petgraph::visit::{EdgeRef, NodeIndexable, NodeRef};
12
13#[must_use]
32pub fn basename_without_extension(seq_url: &url::Url, extensions: &[&str]) -> String {
33 let mut basename = seq_url
34 .path_segments()
35 .map(|c| c.collect::<Vec<_>>())
36 .unwrap()
37 .last()
38 .unwrap()
39 .to_string();
40
41 let mut sorted_extensions = extensions.to_vec();
42 sorted_extensions.sort_by_key(|b| std::cmp::Reverse(b.len()));
43
44 for ext in sorted_extensions {
45 basename = basename.trim_end_matches(ext).to_string();
46 }
47
48 basename
49}
50
51#[must_use]
63pub fn read_fasta(paths: &Vec<PathBuf>) -> Vec<Vec<u8>> {
64 paths
65 .iter()
66 .map(|p| {
67 let reader = bio::io::fasta::Reader::from_file(p).expect("Failed to open file");
68 reader
69 .records()
70 .filter_map(|r| r.ok())
71 .map(|r| r.seq().to_vec())
72 .collect::<Vec<Vec<u8>>>()
73 })
74 .flatten()
75 .collect::<Vec<Vec<u8>>>()
76}
77
78#[must_use]
79pub fn default_hidden_progress_bar() -> indicatif::ProgressBar {
80 indicatif::ProgressBar::hidden()
81}
82
83pub fn default_bounded_progress_bar(
101 msg: impl Into<Cow<'static, str>>,
102 len: u64,
103) -> indicatif::ProgressBar {
104 let progress_bar_style = indicatif::ProgressStyle::default_bar()
105 .template(
106 "{msg} ... [{elapsed_precise}] [{bar:40.white/white}] {human_pos}/{human_len} ({eta})",
107 )
108 .unwrap()
109 .progress_chars("#>-");
110
111 let progress_bar = indicatif::ProgressBar::new(len);
112 progress_bar.set_style(progress_bar_style);
113 progress_bar.set_message(msg);
114
115 progress_bar
116}
117
118pub fn default_unbounded_progress_bar(msg: impl Into<Cow<'static, str>>) -> indicatif::ProgressBar {
133 let progress_bar_style = indicatif::ProgressStyle::default_bar()
134 .template("{msg} ... [{elapsed_precise}] {human_pos}")
135 .unwrap()
136 .progress_chars("#>-");
137
138 let progress_bar = indicatif::ProgressBar::new_spinner();
139 progress_bar.set_style(progress_bar_style);
140 progress_bar.set_message(msg);
141
142 progress_bar
143}
144
145#[inline(always)]
155#[must_use]
156pub fn canonicalize_kmer(kmer: &[u8]) -> Vec<u8> {
157 let rc_kmer = kmer.reverse_complement();
158 if kmer < rc_kmer.as_bytes() {
159 kmer.to_vec()
160 } else {
161 rc_kmer.as_bytes().to_vec()
162 }
163}
164
165#[must_use]
166pub fn homopolymer_compressed(seq: &[u8]) -> Vec<u8> {
167 let mut compressed = Vec::new();
168 let mut prev = None;
169
170 for &base in seq {
171 if Some(base) != prev {
172 compressed.push(base);
173 }
174 prev = Some(base);
175 }
176
177 compressed
178}
179
180#[must_use]
181pub fn shannon_entropy(seq: &[u8]) -> f32 {
182 let mut freq = HashMap::new();
183 let len = seq.len() as f32;
184
185 for &base in seq {
186 *freq.entry(base).or_insert(0) += 1;
187 }
188
189 -freq.values().map(|&count| {
190 let p = count as f32 / len;
191 p * p.log2()
192 }).sum::<f32>()
193}
194
195#[must_use]
196pub fn gc_content(seq: &[u8]) -> f32 {
197 let gc_count = seq.iter().filter(|&&base| base == b'G' || base == b'C').count();
198 gc_count as f32 / seq.len() as f32
199}
200
201pub fn write_gfa<W: std::io::Write>(writer: &mut W, graph: &DiGraph<String, f32>) -> std::io::Result<()> {
222 writeln!(writer, "H\tVN:Z:1.0")?;
224
225 for (node_index, node_label) in graph.node_indices().zip(graph.node_weights()) {
227 writeln!(writer, "S\t{}\t{}", node_index.index(), node_label)?;
228 }
229
230 for edge in graph.edge_references() {
232 let (from, to) = (edge.source().index(), edge.target().index());
233 let weight = edge.weight();
234 writeln!(writer, "L\t{}\t+\t{}\t+\t0M\tRC:f:{}", from, to, (100.0*weight).round() as u8)?;
235 }
236
237 Ok(())
238}
239
240pub fn read_gfa<P: AsRef<Path>>(path: P) -> std::io::Result<DiGraph<String, f32>> {
260 let file = File::open(path)?;
261 let reader = BufReader::new(file);
262 let mut graph = DiGraph::new();
263 let mut node_map = HashMap::new();
264
265 for line in reader.lines() {
266 let line = line?;
267 let fields: Vec<&str> = line.split('\t').collect();
268
269 match fields[0] {
270 "S" => {
271 let id = fields[1];
272 let sequence = fields[2].to_string();
273 let node_index = graph.add_node(sequence);
274 node_map.insert(id.to_string(), node_index);
275 },
276 "L" => {
277 if fields.len() < 6 {
278 continue; }
280 let from_id = fields[1];
281 let to_id = fields[3];
283 let weight = fields.get(5)
286 .and_then(|s| s.split(':').last())
287 .and_then(|s| s.parse::<f32>().ok())
288 .unwrap_or(1.0);
289
290 if let (Some(&from), Some(&to)) = (node_map.get(from_id), node_map.get(to_id)) {
291 graph.add_edge(from, to, weight);
292 }
293 },
294 _ => {} }
296 }
297
298 Ok(graph)
299}
300
301#[cfg(test)]
302mod tests {
303 use sdust::symmetric_dust;
304
305 use super::*;
306
307 #[test]
308 fn test_canonicalize_kmer() {
309 let kmer1 = b"CGTA";
310 let kmer2 = b"TACG";
311 let kmer3 = b"AAAA";
312 let kmer4 = b"TTTT";
313
314 assert_eq!(canonicalize_kmer(kmer1), b"CGTA".to_vec());
316 assert_eq!(canonicalize_kmer(kmer2), b"CGTA".to_vec());
317
318 assert_eq!(canonicalize_kmer(kmer3), b"AAAA".to_vec());
320 assert_eq!(canonicalize_kmer(kmer4), b"AAAA".to_vec());
321 }
322
323 #[test]
324 fn test_dust() {
325 let read = b"TGGCAGCCATAGGTTTTCCCTGGAGTTGTGGCATCTGGAACTACAGGGATGAGCATTTGAGTACATATTACAGTGAGGTGGCCACACTGTGACCCGCAGTTCTGCAGACTGGAAGGCACTGAATGCCAGGATTTTTGCAGAGTGTCACTATGAAGTCCTGACTTGGCTCAGAGACCTTCTTAGAGCAGTAATTCGGGACCAGTGGATTTCTGATAAAGTTATTCTAATTTTCTAATAATTGTTTTCTAATAAAAGCCATATGGCAGGTCCTGCTCCCTTGGTAGCATGACCAGTACCTGGCGCAGTGCTAGTGCTGAGCTGACAGGAAGTGCCTCACCTTCATCTCTCACTTGACAGTGGGTGGAAGGTTCTTGGCTCGGTATCCCTCAGTCATGACTGCACACTGTCCTGAGCTTTTCTCCCAACTTCATCCACTTCATACTATTTTAATAAAGCGGTGCTGTGTATTATAACATTGTGCAGCTGAGCATTACACTCATGGCTCCCATTATCAAGCCCCTGCTATATACAGGGCATTTCACAAAGAAGCAAACTTCCAAGCAGTCACTCAGCAACCTCCTCCTAGGAGCATTTGGGGAAGAGAATCTTGGGGCAAGTTTCCTTTACCACCTGCAGTCACCTGGGATGCTGGGAAAAATTTTGATTTCTGTTGTCTTCCCTTCCAGAAAATTATTTGAGAGTGGGGCCAACAAATCTGCACTTGAGTCCATACCTAGGATAGGTTTTTCTGTGCAGTTTTTTAAGTTTAAGAGGTTTTTAAAGTTTAAGACACACTGGTTAGGGTTTTGGGCTCTGGAGGATGAGAAACCTTGCTTGGGTTATCAGATAACAGATTCTTCTCTGGTTTCCCTCCGATGTTATCAGGGGAATTGTTGGTTGTTTCACATTTGGGTGCTCCTGGGCCTTTTAAGAGCCAGGCTGGGAGGGCTGGTGATGGCAACCCTGGCTGGCAACAGAGGCTGTTTCCACCCCTGGGTGGCTCCCCACCTGCTTTCTGCCCTGGTAGGGTTCAAGGCTCCGGGAATTGGCACTCAGTGAAAGAATTTTGATTTCCAGTGGAATTTGTGCTGTCACAAGATTTGACCCATGGGACTAGTGAATAGATAGATGGGTTAGGTGAGCATGTGACTTGGCTGGTGGCCGAGAGAGTGATAAATGTGAGAGTAGCTGGGGAAAATGGAAACGGATTAAGATAGAAGAGGGGCATTGTCCATCTGGCCGATGGCAAGGGCTGGTGGAGCAGCAGTTCTAGACTATTCTGAGGTTAGTTCAGAAACTGACCTAACAACGTGGGAAGTCTCTCCCAAATTGTTTATAGTTTCTCACAGTGGGTGCCTTTTGAAGTGATTGTATTTGACAGCCCAGAGTGTTGGGCACACAGCTTTGTGCTATCTAAGGTCACGGTCCAATTGTGATTCCTAGCAATAGCTTCAAGGCATATTTCATAGCTCTAATAGTTTTCAAGTATAAGGGTGTGAGAATGAGCTTTAAGAATATTTATGCCATGAAATCTTCCAATTGCTCTTCAACACGGGTGCACCATAGTAGGTGTGAATAGAAGTGGTGGCAACAGACCTGAATTCAGGTCTGCCACTGACTATAATACTAGCTTGAGAAGTAACTTGAACTCTGTGAGCCTCAGTTTCCTGTCTGTAGAATGAAGACAATGATACTGCCTTCATAGGATTATTATTAGGATTAAATGAAATATTATAGTGAGGCATTCAGCAAAGTGTTCTATAAATTGGGGTAGGATGTGAGGTAATTGGCATTGTTAGATGCGTCTCTGGGTAAACAACCAAATTTTCTGCTTATTTGGCTGTTTCCCTAGCTGCCTTGTTTAAAACAAAACACCTGAGTTGACCAGAACACCTCTGTTTTTAGAATCTAACTTTGCAGTTGTATTAGTCTCTTCTTGCATTGCCATAAAGAAATACCTGACACCTTCATAAAGAAATGAGGTTTAACTGGCTCACGGTCCTGCAGGCTTTACAGGAAGCATGGTACTGACATCTACTTGGCTTCTAGAGAGGCCTCAGGAAGCTTACAGTCATGGCAGAAAGTGAAGCAGTAGCAGGCACATCACATGGTGAAAGCGGGAGCAAGAGAGACAGTTGGGAGAGGAGGCGCCACACACCTTTTAAACAACCAGATCTCCCAAGAACTCACTCACTATCGTGAAGATAGCACCAAGCCATGAGGGGTCTGTCCCCATGATCCAGACACCTCTCACCAGGCCCCACCTCCAGCACTGGGGATTACAATTGAGCATGAGATTTGGGTAGGGACAACTATCTGAACTGTATCAGCAATAGAGTGTGATTATAAGTTATGCTGTAGGAATAGAATTGTTGTCACTGAAAGATTCCCTTGGCCATGGGAGCCTCCTGGCTCTATGAAGGATCAGCCAATGCTTATCCAGGGAGGTAATGATAAGGTCGAAGTTTGACAAGAAATCTACGTTTTCTTAAGCTAAGTAGTAGGTTAACAGAAGATATGTTGTGTGTTAATAGTTCTATTTACATCTCTTTCTCCAAGGTTATACACACTCTGCATCACTAAGTCAAGACACCATTCTTTGACACTGGCTAATAGTAATAGCAATCATAGCCACTGTGCATTAGCACTTACTCCACATTCCTTGTACTGAGCACTACTTACATTATGTTGGTGTTGTCATTGTCACCATTTCATACATAAAGAAACCAAGGTTTTCAGAGATTGAATAACTTGTGCAAGATCACACATCTGGTAGGGCAGATCCAAGATCTGTTTGTCTCCAAAATCTGCTTCTGTCCTGCCTGGGAGACCTTGGGAATGACGGCAAGTGGTTGTAGGAAGGAGGGCTGATGTCAAGGTGGCTGTGGGGGCAGGAGGCTGAGGGAACTCACTGACCCTTGAGGGACTCCTTAGGTGGGGGATTCTGGGTTTCCTGTTGGCAGCTGGAGGGGGAGTGCCAGTTCCCATAAGTGGTTATTGCCCAGGTTGTGACCTTGGCTTGGCCAGTGATTGGTTCATTTTGGAATTTCATGAGTGACCCCCAGGCAGGGTTCTTACAATCACGCTGGAAGACCACCCAGGAAGTTCCTGTTGGGGTAAAATGATGCAGCAGCCTGCTTTCCTCAGGAGGTCTGAACCCTCCCCATGTACACACACACACACACACAAACACACACACACCCACCCACACACCCTCCACCCCTCTTGGTGTCTTTGGCCTTTTTTCCTAGCTTGTTTGTTTCTATGGTGTCTTCAAGTTCAACTAGAACCTATGGGAATGACTTAGTTTTGAACCTGTAAGAATGAGAAGTAAACAATTCTTGTACTGACTTTGAATTTCCTTTCTTCTGTTGTCCAAAGGTGAAGGGTGACAATGTGTCCCAGATTTTTTTGGATATTCTACAAAAAAATAGATATTTTTTTGTAGAAAAAAGCTTATTCTACAGTGTTGTCCCAATTTTAAAAGCCCTAGAAAACTGGTTAAGGCAAATTATAACCAAATCAAATCACTAATTATTACAATAAAGTGTAACTAGCTACAAAAATCCTAAATTACAATTTTAGGCTTTGAGAAAATATCACTGATGATAGAGGAAGAGTGACAGTCTTTGTTTTGGGTCTTGGGATGGCAGAAAGAAAATATTTAGTAGGGAGTAAAGATCAGTGTACCCCTTGAAGTGTGGTGAAGATGGGTGGGTTTTGATGCTCTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTATGTAGAATTTCCCTAAATTCAAAATGATACTTACATTTTGATAAACCCAGAGAGTTAATTAACATGGCATAAATGTGCTAGCTCATTGTTGTTTTTCAGGGAAAGAACTAGAGAGAGACACAAGTCACCCCGGAATAAGACGGCAGAGGGTCAGAAAAGTCTGTCACCATTCAACCTCCCACTGGAGAGCCCCTGTTGGGAAATGATTCTACTCGGACAGAGGAAGTTCAGGTAAGGATCAAAGGTGGTCTGTAAGCACACTGCCACTTGGCCAAGACTCCTGTCTGTAAATGTTTTCCTTAGGTTCTCTTGGATTTTTGTCTTTATTTTTCTAGCATGCTAATTAGTTTATCTATGTCCCATGGTCTCTTGTTGGTCTAAGGACAGCCTGTCCTGCCCTTTGGTAGCGTGGGGATTCTTCTGAAGTATGATTGGTTGGCCTGCTTTACATGGTGTGGAAAAGTAGCCAGCAAGGTTGACGACAGGGTTGGGAAGGGAAAAGCTGAAGTCTCCCACGACTCATTTCAAAATGGAACAGTATAAGGGGGAGAAGGAAACTCAGAAAAACCTAAGAAGTTTAAAAAACATGGGGCAGCCCAGACTGACCATTACTACTAGAGCTATGCAGGAATTGAGAGGCCGCACGCTCAGATGCCTGGTAGAGAAACGTAAGTTAATTAAGGAGGCCCTGGGTCGAAAAGAGGGGGCAAAAATATATTAACCTAGCTTTGGGTTAACAGCAATCTGTGCAGTGCCTCAGTGTCAGTGCTGTAGTGTGGTGTGAAGGAGCCTATGGCTAACTGGAGAATGCATTTCCTCTGTAAAGGAAACAGCAGCTCCGCAGCTCCAGACACCTACTGTTGCTCAGGAATGCAGGGATTCATTGTTTGAGAAAAGCTGCAAATCAGGATTTTATGTGGAATCCTAACTTTACAGTATTTTTGAAATACTGATACTTTATTAATTTTTTTGAACCATTGAGTGGGTTCTTCTCCAGGTTCCAACTGTCTGGCAGCCCACCTGATTTAAATAAACATCTGAGCTATACACAGAAACATGTCTGCATACCCTCTGCACATCCTGAAGTATATATACACATGTCCAGCCTTGCCCCTCATAAACAAAGTGGTGTATGATACACAGCTGTAAAGATAGATATAGGACTATAGATAAGCATACATCTGTACATACCTGTGCATACACACAGGTAAGCATTTATAATCAAATAGGTGGACTGAAACTGGAATTCCTCAGAGTACACAAGGTGTTCTTGGGACACCAAAACTACAATTGTGGGGTTGAACGTGGGATTCATTGAGCAATGAGCAAATGCCTTTAGTGCTGCCTGCCTTGGCTCTGGATGGCTGATGGTCGGATGGGGCCAGTCTTAGGATTGGATCACCCTGGAGTACTTGAAGGGGTCAGTTTCCTCCTGGATGTGGGTTCAGAGGTGCCAGTGGCCTACAGCAAAGGCTCTTCTTTCTCTGCATCTCCTCTGCACCTCGTAGCTGAGAACACTTTGAGAAGCTCTTGGTGTTGCCCCAGGATGATCTGGTGTGAAAAGCATTGAGATGGGTGTTTGGAGGCTGTATTTTTTAGTAGCTCTGTTACCTTGAGCAGTCACAGCCTTTGTAGGCCTCAATTTCTTTATTGAAAATCTAGGGTTTTGATGAAAGCATCTTAGGTGCTTTTTCTTCTAAGAACCTGAAGCTTAACAGGATCCTTTGTGTATCTACATGTTTTAGGCATACATGTGCACCCCAGGAAATTCTCTCATGCCCTTTCTAGTCAATCTCTGCCCCACCCTCACCTCTCCAAGGCAACCACTGTGTTGATTTCTATCACTGTAGATCAATTTTGCCTGTTTTTGAATTTAGTATAATTAGAATCATATGGTCCATCTCCTGGCCCCCACCCACCCGCCCTGCTTAGCATAATGATTTTGAGATTTATCCATGTTTTGGTATGGTTTCAACAGCTTATTCTTTTTATTTTTGCTGAGTAGTATTCCATTGTATCAGTCTACCACAATTTGTTATCCATTCTCCTAGTGGATGGACATTTGGGTTTTTTTTGTTGTTGTTTGTTTGTTTTTTGAGGCAGAGTCTTGTTCTGTCGCCCAGGCTGGAGTGCAGCGCATGATATCACCTCACTGCAGCCTCTACCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAATAGCCGGGGATTACAGGCACGCACCACCACGCCCAGCTAATTTTTGTATTTTCAGTAGAGATGGGCTTTCACCATGTTGGCCAGGCTTGGTCTCGAACTCCTGACCTCAGGTGATTTGCCTGCCTTGGCCTCCCAAAGTGCTAGGATTACAGGCGTGAGCCACCGTGCCTGGCTGGATGTATATTTTTATTAATTTTTGGATAAAACCTGAGAGTGGAATTGCTGGGCCATATAGCTAAGTGTATATTTAGATTTATATGAAACCGCCAGAGTGGTTTTCCAGAGGCACTGTACCACGGTCCACTTCCACCAGCAGTGTTGGAGAGTCCTGGCTGCTTCTGGCCATCGTCTGAAATAGGAATTTCTCTCACTGTAGGTGATACTTCTGACTTTGCAAGTTGAAGGATTATTAGTTTATGGGATTGAGACCTTCACCACCACCACTTCTTACCATAGCCCATACATTTCATAAATCATGGTTTTTTTGGTCATTACTAGATTCGGAGTTATTTGATGATGAGCGATGTCTGTCTTGCTGATTTAGCTACTAACTGAAACTAGCTTTTTCTAAGTTGGTGTCCTAATTTCACCCCCTTTGCCACTGCATCTGACTGTTTTCTTTCCGAGTGAAAGGATACATACAAATTTCAGAGGCAGAAACCTCTTTGGCCTCCTGTGTCTTTTCAGCGCCTTGCTCTTATTGCTTCATTATTGTTGCCAGTTGGTTTTTAAACAACAAAATCCTTTAAAATTCTATCAACTGGGTTTTGCTAAGTGAATAGACTAATTGCTTTAACTAGCAACGGCCTAGAAGTTTAAAAAGAGAGGAAGCTAGAAAGTAAAAGATAACATTTTAATAATCCTGGTTGTTTCTATGCCCTTGATGTTTAGTTCCTCGTGAAAACATGTTTTAGAAAGAATTTTTAAGCCAATCTGGCCATACACGGATTCCTGGATTTGCTTAGCTTGGTCCATGAGAAATATTGTTAAAGAGTGCTTGACACTGATGCTTGTTAAGTGGATCTTGTGAACATCATAAGGAGATTTTTTTTTTTTTTTTTGAGACGGAGTCTCACTCTGTAGCCCAGGCTGGAGTACAGTGGCACGATCTCAGCTCACTGCAACCTCCGACTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCATGTGCCACCACGCCTTGCTAATTTTTGTATTTTTAGTAGAGACGGGGGTTTCACCATGGTTGGCCAGGATGTTCTTGATCTCCTGACCTCCTGATCCGTCCGCCTCGGCCTTCCAAAGTACGGGGATTACAGGTGTGAGCCACCGCGCCCCAGTCCATAAAGAGATTTTAAAATGTGGGTCCTAGCTACAGGTAAGCTTGGGTTTGTGTAGTGGTGTAAGTTCCCTTGCTACGCCCTTTGCTCTTCTGGGCTGCTAGAGGGTGTAGTAATACTCCCACCTCCAAAAGTTGGACTTCGTAAGCCTTTATAACCCAGCGTGAATTGGAAAGAAGATGCAGGAGGTTTATCTCTATAGATGAGCTCTCACCAAGATTAGTCTAATACCTGGGTTGCGCATTGCAGGGCAAACAGCTCCAGGCCCTCAGAGCTGCTCAAGGCTTTTCAACCCAGGGGATGATAATCAATGTTATGTCAATGAATCAGCCAAACAGACAGAAAGATCACATTATGTTTTCTCTGTTTGAAAGGTAAATACCTCATACATTTTGAAAATTTCAATGAAAATCGTTTGAGTTAAGAAGTTCTAATATTTAAAGAGTTAAGCCTTTCATTTTCTGGAAGCCTTTGTGAATAGGGCTGGGTAGATGCAGCGGGCCCTGCATGTTCACTGCCCTTTGTAGCTTTTACAAATGACCTGTGTCATGTCATCCTCACTGTCTTCTCCCCACCAGGATGACAACTGGGGAGAGACCACCACGGCCATCACAGGCACCTCGGAGCACAGCATATCCCAAGAGGACATTGCCAGGATCAGCAAGGACATGGAGGACAGCGTGGGGCTGGATTGCAAACGCTACCTGGGCCTCACCGTCGCCTCTTTTCTTGGACTTCTAGTTTTCCTCACCCCTATTGCCTTCATCCTTTTACCTCCGATCCTGTGGAGGATGAGCTGGAGCCTTGTGGCACAATTTGTGAGGGGCTCTTTATCTCCATGGCATTCAAACTCCTCATTCTGCTCATAGGGACCTGGGCACCTTTTTTTCCGCAAGCGGAGAGCTGACATGCCACGGGTGTTTGTGTTTCGTGCCCTTTTGTTGGTCCTCATCTTTCTCTTTGTGGTTTCCTATTGGCTTTTTTACGGGGTCCGCATTTTGGACTCTCGGGACCGGAATTACCAGGGCATTGTGCAATATGCAGTCTCCCTTGTGGATGCCCTCCTCTTCATCCATTACCTGGCATCGTCCTGCTGGAGCTCAGGCCAGCTGCAGCCCATGTTCACGCTGCAGGTGGTCCGCTTCCACCGATGGCGAGTCCCGCTTCTACAGCCTGGACACCTGAGGTAAGAGGCAACATCCAGGAGGCAGAAAGGATGGCTGATGTCTTGCTGGGAGACAGCTGCTCTGTAGCACGTGAGGGGTGGTGACAGATGCCAAGAGCTAGGACCAGAGTCTGACTCTTTTCTGGTTTTGGGGAGGAGATGCGAGGGTGGGGAGGGTTGTCCATGTTCATTGAGTTTCTGGACTTCTAGATGGTGCGGGGCAGTTGCTGGCTCTCACCCAGGTTGAGATTTTGCTGGGCTTGTTCTCAAAGTTATTGGCAGCTCCCAAAAATGATGGAGAAAGGAGATGCATAGTGATGGCTGCCTTCTTTGACTCTGAAATTGGCCAATGGACAACAGATAAAGTGACCAGCAGCTCCATTTTGTCCCAAATGTGACATCTGGTTTACCATGTTGTCCCAGTGGAATAATGAATTGTTCCTTTTTTCCCACTCTCAGAGGCCTGGTTTGGGCAGTAAATTATATGGTCATCCGAGGGACCCTTCCAATAAAGAATCAAGTGCAGGTTAGAGACTCCAAATGTGTAATCCTTGAGTGTTGTGAAAATGTATGCCGTGAGAAAAAGTTAGAAGTCAGTTGGGTTGTCATACTTACATCTTTGCATAAAATCTCATTATTTTGTGGTTAAATAAGAGTGATTACCATCATTTTATTTGCTTCAAGGTAAGCACTTTATATATAGATTGTGTATTTAGTCTTCATAGAACCCGTGACCTAGGTATTATTAATCCCTGTGTCACAGATGAAGAAACTAGGGCTTAGGGGATTTAAGTAATTTGCACAAAAACATATGGCTAGCCTCATTTAGGATTCACTCAGATGTCATGAGGCCAGGGCTGAGTGAATGCCCCCATAATGGCATCTCTCACTTTGTGGTTAGTGGCCTATTTTTCCATCTGTTTTCTTCCACAGACTATGAACTCCCTGAGGCCAGGGGCCACCCTTATACCTCATTACATCCTCAGTGCCTGGCATGGAGCATGGCTTGCACTGAGATGTTCTCTGGGTGAATGCAGAGCCTGGGACATTTGACTTCAAAGCCTTTACCCTCTCCCAGGCTCTCTGCCTCCTTAGGCAGTATATGCTGATGTGTGTAGCCTGCTTGGGGCAGGGTAGGCACTTAGTTCATTGCAGCTATTACTGCTGTGATCATGTAGCTGGCAGAGCAGCCAGAATCAGCAAGGGCACACCTTAGTGGGTATCAGAACAATCGGCTTTGTCATAGATTTGGCTGGGCTCCAGGAAGGTGGCTCAGCCTGTATTTGGAGTCAGGCCATGCTGCCAAACCATCTTCATGTTGGTGTGTACCCCCTCCTCCATTCCTCTGGCTTGGCTTGTGCTACGAGAACGGGATGATCTAGCGTTCAAGGTTGCTGCCACCCTAACTGATCCTTGGTGGAAACTGGTGTCCAAGTCACATGTCTGTGCACCAAAAATCTGGGGTTTAGAGTCCTTTCACAGATGCCTGTAGGGCTCTGAAGACAAGTAGGTCACCGCTTTGCTGCATATTCATCTCAGAAGGCTTTCTTTTCCCATGTTTTGCATCAGGGAATGACCAGCAGTTTTGTGTTAAACATCTGCTGTGTGCAGAGCCCTTGGACACACCAGGCTGGCTGCCTTCAGAGCTCTATCTCAGCACCTGTGGCACTCACAGTCACTTGGAAAGAGACCAGTGCACCGCTGTCTGGTGGACAGGTTTCCAGGAAACAGGCCTGGGGGTATAGGTGATAGGAACACAGGAGGACAGAGAATTTCAGATTGTGGCAGCAATAAAGCCGAGCAGGGAGACAGTCTGTCTCAGAACAGGTTTTGCTGCAGTTAAAGTGGTAGAGAAAATCCGGCTGTGGTCTCAGTGGAGATGAATGATATTTGGAACTCTGTATATGTAAGTAGCCAAGACACTTGGCCAGGAGTGAGGTCTATGGTGGTTTTGTTTTTTGGCCCTTAGCCCTAGTTGGTGTGAATTCCACCTGTGTAGGTGGGAAAGGGCAGGGCATCTTCTCACCATAGGTCATGCAGGGTGGTGGGACCGACTTACCCCCATGGGCTCCCACATCGCTCCTCCCTGTACGACTGGTTGAGCTGCACACTGCATCTGAGTGGGAGTGGAGAGGGGACAAACCAAACAGCCCGAGGAAAGTATGCCTGTGGCATGTTCAGGAAAGCATGATTAGCAGCAGGCCCTCGCCTCCCACCACACAGCTCTGCTGGTCAGGGCAGAGCTGGATGGGAGAAGCCAGACTGATTGTGCTGCATGGCTCCCAGGCTTGCTGCAAACCTTTCAGTCTGCTCTTACCATGACCAACAACTGTCCAGGCTTTTAAAAAACTCAAGTCAGTCACCCCAGCTCCCCAGGGAGAACTGAAAGGTGGCAAGTGCCCATCTGCCCTGGGGAGAGCGTTTTGAGGTTGGTCCCCAGCCTCATCCTTTCGGCTTCTTTTTAGGACCATTGGTGTTCCTCCTCTCCCTGCCTTTAATAAGGCCCCCTTTGTCCCTCTCGTGGAGAGCCTGAGTTAGGAGGTGGAAAGAATGGCTGGGGAAAGAGGGACATCTTTACTGACAAATGGAGCCCTCAGGGAGAGCCAGATGCCCAAGTGTCAGCCAGTCTGCCAGAAGCTGGAGCAGGCTTGGCACCTTTCCTCCTGGCATTGTGTGGGCCTGGTCACCTGCCGATCCTTGGGCTAAATCTGGTCTGAACCCAGCAGTGGCTGGAAGAGTTACTAGGCCAGAAATACAACTTCTAAGGCCTTTTGTAAGTGTAGAAACAGACAGGAGGGAAGAGGGAGCGGGAATAGACAAAGCAAGCCTCGGAAATCAGAATAGCAGGTCTCCAATTAGACCCAGCAGAATCACAGGCTGTTGGCTCTCCCTTTATGTAAAGCCTTCACCGTGGCAGCACCCTATTGGGCTTAGGTGCCAAGCGATGGTGAGTTCTTTTTTATGTTTTCAAAGATGATTTTATCGAATTGACTGAGCTATTTTTGAGAGTTGTCTAAAGAATGTCTACTCTTTAGTTTCTTAAAGAAAATAGGCTTCTCATTAGTTCATAAAAGGTGCTTGCTGTGGCCCTGCTTGTTGGCAGGAATGAAGTTTTGGGCTTATTTGAAAACTTTCAAAAATGTAAAAAGTTGTTGCAGAAAGTAAGATACCATAAATAGATTGAGATACTTCCTAACCTCTGCCCAGTGCCCTAGGAGTTATGAAAAGCTTTTCATAGGTTTGGACTCATTTACCCCTCCTTGCTGGCCCTGTCAGAGGTCAGAGCAGTGGGGTAGAGGTGTCCCCTCTTACAGTTGAGGTCCCCAACCCCCAGGGTAAAGGGACCTGCTCAAGGTCACAGGGAATCAGCACCTTCCTGTGCATCACACCGCTTACCCCACCGCACTTTCTACAGCGTCCTGGTGTCTCACACAGTCGCTTTGTCATTTTCCATACACACCTTGCTCGTCACTTTTCTTGGCCCCGTTCTCACAATAAGTTGCTAACTTTTCCAGGATGTTACCAGAGACTAATGACTGTTGATATGACTTTATTTGAGGAGAAACCCAGAAGAATAGAAGAGCCTAAAAATTGGCATTCAATTATCTTAATCATTTTTCAGTTTTGAAACCTCTAAAGGGAAATAAGTGTGAATACTGGTGCACAGGCACTAGTGTAATTGACTGGTTGAATGTGAAATGGTAGAACAACAGAGACAGATAAAGAAAACCTTAACAATAAACAATTACATAGTCTTACTGGGAGCCAGGTACCTTCTGATCTTTTAATGTGCAACTCAGTTTTCACAACAATCATGTGAGGTAGGTTCTGGTAGCCTCCTTTTCAGATGAGCAAACCGAGGCGTGGGAGAGTTAGGTAAGTCTAAGGCCCCACACTTTGTAATTGTGGGAACCAGGATTTGAGCCAGGCTCTCTGGTTCCAGAATGATCTTACCCATTTCACCATACTACCTCTGAATAGATAGTTGCATGTTCACGTCACTGCTTTAGGAACACATGAACAAACCCAGAAGCATTTTTTGAGTGTTTCCCATGTGCCAGCCTCGGTGCCAGATAATTTTTGTATACATTATCCTGTTTCACTTAACACAGAGCTTAGGGCTGGAACAGAGAGAATGTGCAGGATTGTCAAGATGGCTTGCACTCTGAGGTCCATTCCTTAGCTCCACTGGTTATTTTATTCACTCAGTTGACAAAGCCTGGTCCTTAAAGTCATAGCAGTAGGTTTGAAGCCTTTGGTCAGACCTTTTTAACTTCCTATACTTCACATGTCTCAAAGAAAATATATTTTATCTATTGTTGTGTAACAACTACCTCACCACTTAGTAGTTTAAAACAACAATTCAGTATTTCTCACAATTCTGCAGCATGGATTGGGCTCAGCCAGGAGGTTCTTTAGCTGATCCTGCCCATGGTCCTGTGTGATTATACTCACATGGTGAGTCAGCTGAGGCTGACTTTTTTTTCCTGTGTATTCTCTCCTCCTTCTTTCCTGTATATTCTCTCCTTCTCATGGCCTCTCCTTGTTGTTTCCTCATTAGGGTTGCTAGACTTCTTAAATGGCAGCTCAGGGCTCCCAGGAGCACAAAAGCAGAAGCTCCCAGGCCTTTTAAGGTCTGGGCCTGGAACTGTCCCAGTAGTATTTCCACTGCATTCTCTTGGTGAAAGCAAGCCACAGCCCTATCCCACATTTAAGGAGAGAGGACGACACAAGACAGAGGCTACTAGGATGTTAAGTTCAATGTCACAGACCCCCACAACCACCTCGGGCTGTCTTTCTCAGAGTCTGTTTATTCTCATTTTGCATATAGTTCTAACAAATTTAATTATTGATTTCTACATCTTAAAAAGCCCAGAAGTAATATATTTTGAGGGGGGGAAAAAGTGCTGCTTTAAGGAGGTATATGAACATCAATGGAAAAATGATAGCTGATAGTCATCAACAAGGAGGGAGACAGAGAAACCACAAAAGCAGGTATGACTCAGCACTCTGGGAAGCTTTCCACAGTGACCCATTCTATAGGATATTTATATTGCTGAAGCTCCCTTGTACCTAATTCAGCCAGCAGGTTTTAACTGTTTGGGTTTTTAAGCTTCAGGGTCAAAGTTTTGGGGTAAAAAATGCTTCATTCATTGAGACTGAGAGAGAGTAGCTTATAAATTGACACTGACCATAGACCTTGATTTTGTGTCCCCACCCAAATCTCATCTTGAATTATAGCTCCCATAATTCCCACATGTTGGTGGGAGGGACCCAGTGGGAGTTCATTGAATCACGGGGGCGATTTCCCCCATACTGTTCCTGTTGGTAGTGAGTAAGTCTCACAAGATCTGATCGTTTTATAAGAGGAAACCTTATAAAAGGTGGCTCTCATTTTCTTCGTTGTCTGTAAGATGTGCCTTTTGCCTTCTGCCATGATTGTGAGGCCTCCCCAGCCACATGGAACTGTGAGTCCATTAAACCTCTTTTTCTATATAAATTACCCAGTCTTGGGTATGTCTTTATCAGCAGCATAAAAATGGACTAATACAGATCTTTTAATCAAAGATGTGGTAACGAATCACAGAACCACCTGTGCTTTAGAAGAAATGATCCTTGGATTGCTTTTCAAGCAATGGAAGTTTATGATTGTCACATTGTCAATTGTGATTATATTCAAGGAAGTGTTATGGACTGAATGTTTGTGTCCCCCCAAATTCCATATGTGGAAGCCCTAACCTCCAGTGTGGCATATTGGAGATGGAGCCCCTAAGGAAGTAATTGAGGTTAAATGTAGTCATAGTGTGGGGCCCTGATCCCATAGGACTGGCATCCTTACAAGAACAGATACCAGAGAGCTTGCTCTGTCTCTGCACACACCCTTAGAAAAGGCTGTGTGAGGCCCCAGAAAGAAGGTGGCCATCTGTAAGCCCAGAAGAGAGCCCTCAATAGGAACCAGGTTGGCTAGAACGTTGATCTTGGACCCCCAACCTCCAGAACTGTAAGAAAATAAATTTCTGTTGTTTAAGCTACCTAGGCTGTAGCATTTTGATACAGCAAGCCTGAAGCTGAGACAGGAATATTACATACACTGGAGACTTGTGACCCCAAAGACTTTTGACCTGTTGAATAGAGCTCATCTTGTCTCTCTCCAGCTCATGCATGCATCCTCCCAGCTTGCAAGGGGGCCTTGCTTCTCTGGATTGCACTTTGATTTTCTAGTTTTAAGTGACAAAGGGAGAGTCTTCTAGGGATGTTAAAGTTACTCCAGTAATTCCAGGATATTTCCAGCTCCTTTTGAAATCTTATGTTTGTAATTCTGGGTCAAGTAATGTCCAAGCCAGTGATTACATTACTGGTAGGCATGTCTCTCATGCTGGGCCACGCCCTTCCATCCCATGTTCACGATGAGCACCAACGGTTCTCTGAGAGCCCAGAGCCAGTGGCTGCAACGTTGGGAAAATTCTTAAATGACCATCAGTGGTTTTGGCTCATGTTCCTACGATTGTGGGGTTCATATACCATCTCATTTTTAGAAATGTGTGTTTTTGTACTCCTGTAATTACTTTTTAATAAAGATATTTTGCCAGTCCTTAGCTCCACTCCAATAGCAAAGCAAAGGACAAGAACAAGTAAGGGCTGAAACATAGAGCGTGGAGGGTTTTGCTCAGGCCATGCTTTGCTGTGGGAGAATTTTGAAGGCGGGAGTGGAGCTGCCGTTTGTGGTTTGGTGCTGTGGTGCCTGTTAAAAGTGGCTTTAATGAGAGTGTAAGGTGCTGCACACTGAAGCCCTGTGTTTATTCAGCTGCCTCCTGCCAGCGGCTACAGCTGGGATGGCTTCCCTCGCACGGCGTCTGCCCACAGCCTTGCGCCCGGAGCCCAGAGGACTCACAGGAAAGGAGCTGGCAAAGGTGGAAGCTGGTTTTCATGGTCTCCTGAGGGCCCCTGGCCCCTGGGAGATGGGTCACACTCCCTGAATGCTGTGCTGTTGGTTTCCCTGGAGGATTCTTGCTGCAGGCCAGGTCCCGTATTCTCCACACTCACCACAAGTGGCTGGGTGTGACTTGACACGGTGTGAAAGTGGAGGGGCGCGAGCACTCAGGTGGGTGAACAGCCTGCGGGCCTCCTTTCCCTGGCTGCAAAGCCGCCACTCAACTCTGCTCCAGCCCAGGTTTCGGGGAGCCGGGATCCACTTGGGCAGGCCGGGAGCCTCAGACTCCAGACTTTTCATGGTGCGCTCCTTCCTGCTTACTCACGAGGAAGGCGAGGCAGTCCAGCATCCTGGGTGGAGTGGAGGGTGTTTCGAGTCCATATCTAAATCTTTTTCTTAGAGCACCCTAAGCAGGCTGCTGTCTTTGATCCCCATGCCTCTGCTGTTTATCTTGGTGTGATTCATCACTGTAATTTAAGACGTGGAGAGAGCAGAGTTCCCATCCCAGGCAAGGGGAGGCGCAGTGCAGGTTGGACTGTGTAAGGAAATGGCAGCATGGCGAGGTTTGTGCCGCGGCCTACAAGCAGGGCTGCATGCTCCCCAGGCAGACTGTGGCAGAGCCAAGCCCCTCACTTGTAGGGAAGCGGTGTCTCCTAGGTGCCCCAGTAGGGGAGGTCTGCCAGCATCGCTGTGCTGTGGGGAAGGCAGCCAGAGGGCTTCATCCATAACTGGCTCAGCTCCTCAGGAGGAGACCAGCAGTGTGTCTCTGCACTCAGAACTGCCACTGGGTCGTGGTGTTAAGCCCAGGAGGGGTGCATATGTGACAACCTTGTATTGCTTAGTTGCTGAGACCAGAAGATCCAGGTAATTGCATGAGCTATTAGCTACTTCTGGTTCTCACAAACTCCTCCCAGTGTTGATAGAGAATGGTGTCCTCCGGGCATGCTCTGGGTATAGTTTTATTTGTATTATTAGGGACTCATGGAGAAGTGCTCTGGGTGTCCTGCACACTGCACTTTGGAGATCATTCTGTGATTCCCAAGTCCTGCTGATTCCACTTCCTTGGCGCTCTGGGATTAGATATCCTAGGCTGCCAATCTGCATGTTCATCTTTCAGTGGGGATACCCTGCAGGGCTTGTCCACAGCTTGAATTTCAAACCAAAAGCCAGGTACGCTTTCCAAGCCTTCCGATATTGGTTCAAAGAATTTGGCTGCCGAAGCTTTTGTGTAGCTGAGGCACCAGCAGGCCGAGGCACGAGTGAATCCATGTGGCCCGAGGAAGAGCCTTCCCATGGGCCTCAGCAGCCACACAGAGCCTCTGATCTGTTTCCCTTTGCGGGATGGTCAGTCTCCTGTGTCTCAAGACCTCAAGCAGAAACGTGTGGATCTCCCCCTCTATCTTGAAAGTCCAACCAAGTCCAGGCCTTTGTTGTGCAGTTTAAACCAGACCTGTCAGTAAACATGAGCTAATTCCAGTTTTTGTCCCTCTTTGTCCTTCTCAAGTTCCAAGGTGATCATTGCCTGTTATCTATGGGACTTGTGTAAGCTAACTTCCCAAATGCAGCTGTGAGACAAACATTTTAATTAAAAGGCAGAAGGGCCAGGAGATATAAACACTCATGTGCCTGGTTGTCAGTGAAGGCCGGGTGGCGTTCAGCGTCCAGGGGCTAATTATATTCTCTTCTCTGGGACTCACACAAATATTGCCACAAATGTACCTGACTGTCAGACTGAAGTCATTTATCTCCAAGTGTGGGGAGCAGTGAAGCCCACACGTCCAGGTAGATTTAGCTCTTACGGACTCTTCTGGGAAGCGGCAGGTGGGTAAAACTGAAAGCATCAGCTATTGCACCCTAGCTGCAGGTTTTCACAGAAAGCTGAATCAACTTGTATTGGGGATTCTGCATTTTAGAGTTCTCTCAAAGACCTAGGTTTGGGCCCTAAAATGCAGCCACCAGAGCAGGCACACCTTAAAAAGTAGGTAATGAGTGGCCTTAGTGCCTGGGCAGCTGTCAGTACTGGCCTCCTTTGGTTGTCCCTGTCCACTGACCCTCCTTCCTCCCGTTCTCTCACGTTTGCATTCATCTGCAGCCTCCATTACCATTGACCAGCTTTGCCGCTTACCTGCCTCCACCCTTCCTTCCCTAAGTTCGAGTAGTTTCCTAAGTAGCTTCCCCTTAGTTTCCTAAGGCTGAAGTAATAGAGTATAGCACAAACTGGATGGCTTAAAAGTTCTCCTTCTCAGAAAGTACAAAATCTAGGTGTTGGCACAGCTGTGCTCCCCGCAAAGCCTCTAGGGAAGAATCCTTCCTTGCCTCTTCCTAGTTTCTGGTGGCTGCTGGCAACCCTTCGTGTTCCTTTATGGCTGTGTCACTCCAGTTTCTGCCCCCATCATTACATGACCTCCTCTCTGTGGGTCTCTGTATGTCCTCTTTTTTTTTTTTTTTTTTTTTTTGAGACAGAGTCTTACTCTTTCGCCCAGGCTGGAGTGCCTTAGTGCGATCTCAGCTCTCTGCAACCTCTGTCTCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTCCCTAGTAGCTGGGATTACAGATGCATGCCACCACACCCAGCTGCTTTTTGTATTTTTAGTAGAGATGGGGTTTCACTTTGTTGGTCAGGCTGGTCTTGAACTCCTGACCTCAAATGATCTGCCTGTGTCGGCCTCCCAAAGTGCTTGGGATTACAGGCATGAGCCAAAGAGGCCTGGCCACCTCTTTTCTTATACGGATACCAGAATGACTCTACCTTAACTAAACATAACATCTGCAAAGACCCTGTGTCCAAATAAGGTCACATTCTGAAGTTCTGGGTAGATGCAAATTTTGGGAGACACTATTTGCCACTACGGGTTTCTCCAAGTTGTGCTGTCCTGTGACTGAACACTGACCCTGTTGCTCTATTGTGGATGTTTGGGAGGATTAACACCAGCCTGTTTCTGCTCACACTGCTTTCTCTGCCTGGAATTTGCTTTACAGCCTGTTGTCCCTGGTGTGACCATCATTCATCTTGTAAGACTTAACAGCCCAGCTTTGCCCCCACTGTGGATTCCTGGCGGAAACACCGTCCCTCTTCTCCTGGTGTACATGTTCTGTCACGGTCTCTGGATCACTTGACTGTACTTATTGTTTGAGTGGTACTCACCACTAATTGGAGCATAAGCCCTTGAAGGTTTCATTTACATCCGGGTCTCAGTACTAGGGACTTAGCAGGTGCTCATAATTCATAACTGCTTTTGATGGAGTTGGAGAGGCTAGTTATTAAGATTTTTCTGAATGTAGCATCCTTAGCTGGCTTTCAGGATAGTGACTGCATGCTCTAAAAGGAGATCTTTGCAGATTTTACTCTATGATGAGAAAACTTTTTAGTACCTTTTTCTGTCAGCATGTACCTAGGTAATAAGAAGAATGACGTGACATGTATTTGGGGAATTAGCATACAAGAAGTACTTTGACATTTTCCACGTGTGAAGAAAGCTGTTTTTATTGACCTAGCTGAAGGGATCAAATTCATATTTGAAAAGATGTGGCTAAAACTTGAAAAGGACTTGTCCATGGGGGATGTCTTCATCCCTTCTCCCTGCTCAGGGAGAAGTTGCAGGCCATAAAGTGTGAGGGCATCAGCCTGGATGAGCTCTAAAGCTCAATGTTAATTTCGGATTTTAATAACATGTAAGAATACAAGTTTGATTGCAGAAGCCACAATCAAGTTCAAGGGAGGGTGAATGAATGGGAAGTAGAAGGGACCATGTGTACGTCTGTGTGTGTGTGTGCATGTATGCACACATTTTTTTAGGCTTTGGTTCCTTTAGCTATAAAATAAGGGAGTTAAAATGGGGGTGGTTTATGGTCACGATTGTTTCCTTCAAGTATCTAAAAGGCTTTCACCAGCAAGGGGGACTTGGGTTCAGAACAAGGCATTGGGTGAACCAGTGGTAGAACTGAAAGGTCGTTTCAGTGTGAAAGTGAACTTTCTTTTCAAGCAAAGGCTGGATGTCATTTGGGAGCTTGTAAAAAGAATTGTTATATGGCTGCCAGGCGCAATGGCTCACACCTGTAATCCTAGCACTTTGGTAGGCTGAAATGGATGGATCCCTTAAGGCCAGAAGTTTGAGCCAGCCTGAGCAACATGGTGAAACCCCGTCTCTACTAAAATACAAAAAATGAGCCAGGCGTGTGGCATGTGCCTGTAGTCCCAGCTACTCCGGTAGGCCCGAGGCACGAGAATTGCTTGAACCTGGGAAGCAGAGGTTGCAGTTAGCTGAGATTGCACCGCTGCACTCCATCCTGGACGACCAAAGGAGACTGTCTCAAAAATAAAAATAAAAATTGGCCAGGTTTGGTGACTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGGCAGGTGGATCATCTGAGGTCAGGAGTGCAAGACCAGCCTGGCCAACATGGCGAAACCCCATCTCCACTAAAAATCAGCCAGGCATTGGTGGCGTGCGCCTGTAATCCCAGCTACTTGGGATTTGAGGCAGGAGAATTGCTCGGACCTGGGAGACAGAGGTGCAGTGAGCTGAGATACGCCACTGCACTCCAGCTTGGGCGACAGAGCGAGACTCCATCCCTAAATAAATAAAATAATTAAAAAAAACAATTGGTATATGGATTGGGAGATTGGACTGATGGACTCTTGATTTCTCTGATTCTTGAATCTCTGCTTGGGTGAACACTAAGATTCCTTCCATTTCTAAAATTTTGAGGTTCAGTATAAATAGAAATGGGGTCATGCTTTTGTGGCTAGGAGCTGTGTGTATATTCTAGCCACCAA";
326
327 let ranges = symmetric_dust(read);
328 let expected_ranges = vec![
329 742..808,
330 3169..3223,
331 3406..3413,
332 3424..3431,
333 3437..3444,
334 3729..3764,
335 4729..4736,
336 5831..5862,
337 6449..6456,
338 7014..7031,
339 8194..8201,
340 12955..12963,
341 12971..13033,
342 13369..13376,
343 17841..17864,
344 19193..19207,
345 19221..19228,
346 19746..19763,
347 20037..20063
348 ];
349 assert_eq!(ranges, expected_ranges);
350 }
351}