hidive/
trim.rs

1// Import necessary standard library modules
2use std::collections::HashSet;
3use std::io::Write;
4use std::{collections::HashMap, path::PathBuf};
5
6// Import the Absolutize trait to convert relative paths to absolute paths
7
8// Import the Url type to work with URLs
9
10// Import the skydive module, which contains the necessary functions for staging data
11
12// Import types from rust_htslib for working with BAM files.
13use rust_htslib::bam::{self, IndexedReader, Read};
14
15pub fn start(output: &PathBuf, loci_list: &Vec<String>, bam_path: &PathBuf) {
16    // Initialize a HashSet to store unique loci after parsing
17    let mut loci = HashSet::new();
18
19    // Iterate over each locus in the provided list
20    for locus in loci_list {
21        // Attempt to parse the locus using a function from the skydive module
22        match skydive::parse::parse_locus(&locus, 0) {
23            Ok(l_fmt) => {
24                // If parsing is successful, insert the formatted locus into the HashSet
25                loci.insert(l_fmt);
26            }
27            Err(_) => {
28                // If parsing fails, panic and terminate the program, providing an error message
29                panic!("Could not parse locus '{}'.", locus);
30            }
31        }
32    }
33
34    // Open output file for writing.
35    let mut output_file = std::fs::File::create(output).expect("Unable to create output file");
36
37    let mut bam = IndexedReader::from_path(bam_path).unwrap();
38    //let rg_sm_map = get_rg_to_sm_mapping(&bam);
39
40    loci.iter().for_each(|(chr, start, stop, name)| {
41        let _ = bam.fetch(((*chr).as_bytes(), *start, *stop));
42
43        let mut bmap = HashMap::new();
44
45        for p in bam.pileup() {
46            let pileup = p.unwrap();
47
48            if *start <= (pileup.pos() as u64) && (pileup.pos() as u64) < *stop {
49                for alignment in pileup.alignments() {
50                    let qname = String::from_utf8_lossy(alignment.record().qname()).into_owned();
51                    if !bmap.contains_key(&qname) {
52                        bmap.insert(qname.to_owned(), String::new());
53                    }
54
55                    if !alignment.is_del() && !alignment.is_refskip() {
56                        let a = alignment.record().seq()[alignment.qpos().unwrap()];
57
58                        bmap.get_mut(&qname).unwrap().push(a as char);
59                    }
60
61                    if let bam::pileup::Indel::Ins(len) = alignment.indel() {
62                        let pos1 = alignment.qpos().unwrap();
63                        let pos2 = pos1 + (len as usize);
64                        for pos in pos1..pos2 {
65                            let a = alignment.record().seq()[pos];
66
67                            bmap.get_mut(&qname).unwrap().push(a as char);
68                        }
69                    }
70                }
71            }
72        }
73
74        for kv in bmap {
75            let output_str = format!(">{}\n{}", kv.0, kv.1);
76            writeln!(output_file, "{}", output_str).expect("Unable to write to file");
77        }
78    });
79}
80
81fn get_rg_to_sm_mapping(bam: &IndexedReader) -> HashMap<String, String> {
82    let header = bam::Header::from_template(bam.header());
83
84    let rg_sm_map: HashMap<String, String> = header
85        .to_hashmap()
86        .into_iter()
87        .flat_map(|(_, records)| records)
88        .filter(|record| record.contains_key("ID") && record.contains_key("SM"))
89        .map(|record| (record["ID"].to_owned(), record["SM"].to_owned()))
90        .collect();
91
92    rg_sm_map
93}