Gamgee
You miserable little maggot. I'll stove your head in!
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
multiple_variant_reader.h
Go to the documentation of this file.
1 #ifndef gamgee__multiple_variant_reader__guard
2 #define gamgee__multiple_variant_reader__guard
3 
4 #include "htslib/vcf.h"
5 
6 #include "variant_header.h"
8 
9 #include "../exceptions.h"
10 #include "../utils/hts_memory.h"
11 #include "../utils/variant_utils.h"
12 
13 namespace gamgee {
14 
38 template<class ITERATOR>
40  public:
41 
48  explicit MultipleVariantReader(const std::vector<std::string>& filenames, const bool validate_headers = true) :
49  m_variant_files { },
50  m_variant_headers { }
51  {
52  init_reader(filenames, validate_headers);
53  }
54 
63  MultipleVariantReader(const std::vector<std::string>& filenames, const bool validate_headers,
64  const std::vector<std::string>& samples, const bool include = true) :
65  m_variant_files { },
66  m_variant_headers { }
67  {
68  init_reader(filenames, validate_headers);
69  subset_variant_samples(m_variant_header_merger.get_raw_merged_header().get(), samples, include);
70  }
71 
78  void init_reader(const std::vector<std::string>& filenames, const bool validate_headers) {
79  m_variant_files.reserve(filenames.size());
80  m_variant_headers.reserve(filenames.size());
81 
82  for (const auto& filename : filenames) {
83  // TODO? check for maximum one stream
84  auto* file_ptr = bcf_open(filename.empty() ? "-" : filename.c_str(), "r");
85  if ( file_ptr == nullptr ) {
86  throw FileOpenException{filename};
87  }
88  m_variant_files.push_back(std::move(utils::make_shared_hts_file(file_ptr)));
89 
90  auto* header_raw_ptr = bcf_hdr_read(file_ptr);
91  if ( header_raw_ptr == nullptr ) {
92  throw HeaderReadException{filename};
93  }
94  const auto& header_ptr = utils::make_shared_variant_header(header_raw_ptr);
95  m_variant_headers.push_back(header_ptr);
96 
97  if (validate_headers && m_variant_header_merger.get_raw_merged_header().get() != nullptr)
98  validate_header(header_ptr);
99  m_variant_header_merger.add_header(header_ptr);
100  }
101  }
102 
106  MultipleVariantReader(MultipleVariantReader&& other) = default;
108 
113  MultipleVariantReader& operator=(const MultipleVariantReader& other) = delete;
114 
121  ITERATOR begin() const {
122  return ITERATOR{m_variant_files, m_variant_headers};
123  }
124 
130  ITERATOR end() const {
131  return ITERATOR{};
132  }
133 
138  const inline VariantHeader combined_header() const { return VariantHeader {m_variant_header_merger.get_raw_merged_header()}; }
139 
144  inline InputOrderedVariantHeaderMerger& get_variant_header_merger() { return m_variant_header_merger; }
145 
150  const std::vector<std::shared_ptr<bcf_hdr_t>>& get_input_vcf_headers() const { return m_variant_headers; }
151 
152  private:
154  // TODO? only handles chromosome names, not lengths
155  void validate_header(const std::shared_ptr<bcf_hdr_t>& other_header_ptr) {
156  const auto& other_header = VariantHeader{other_header_ptr};
157  if (combined_header().chromosomes() != other_header.chromosomes())
158  throw HeaderCompatibilityException{"chromosomes in header files are inconsistent"};
159  }
160 
161  std::vector<std::shared_ptr<htsFile>> m_variant_files;
162  std::vector<std::shared_ptr<bcf_hdr_t>> m_variant_headers;
163  InputOrderedVariantHeaderMerger m_variant_header_merger;
164 
165 };
166 
167 } // end namespace gamgee
168 
169 #endif /* defined(gamgee__multiple_variant_reader__guard) */
MultipleVariantReader(const std::vector< std::string > &filenames, const bool validate_headers=true)
enables reading records in multiple files (vcf or bcf)
Definition: multiple_variant_reader.h:48
bcf_hdr_t * bcf_hdr_read(htsFile *fp)
Definition: vcf.c:736
ITERATOR begin() const
creates an ITERATOR pointing at the start of the input streams (needed by for-each loop) ...
Definition: multiple_variant_reader.h:121
shared_ptr< bcf_hdr_t > make_shared_variant_header(bcf_hdr_t *bcf_hdr_ptr)
wraps a pre-allocated bcf_hdr_t in a shared_ptr with correct deleter
Definition: hts_memory.cpp:63
std::vector< std::string > chromosomes() const
returns the number of samples in the header
Definition: variant_header.cpp:96
void init_reader(const std::vector< std::string > &filenames, const bool validate_headers)
helper function for constructors
Definition: multiple_variant_reader.h:78
ITERATOR end() const
creates a default ITERATOR (needed by for-each loop)
Definition: multiple_variant_reader.h:130
const std::shared_ptr< bcf_hdr_t > & get_raw_merged_header() const
Get merged VCF header shared_ptr.
Definition: variant_header_merger.h:208
Exception for the case where there is an error opening a file for reading/writing.
Definition: exceptions.h:14
void subset_variant_samples(bcf_hdr_t *hdr_ptr, const std::vector< std::string > &samples, const bool include)
allows the caller to include only selected samples in a Variant Reader. To create a sites only file...
Definition: variant_utils.cpp:15
MultipleVariantReader & operator=(MultipleVariantReader &&other)=default
shared_ptr< htsFile > make_shared_hts_file(htsFile *hts_file_ptr)
wraps a pre-allocated htsFile in a shared_ptr with correct deleter
Definition: hts_memory.cpp:15
const std::vector< std::shared_ptr< bcf_hdr_t > > & get_input_vcf_headers() const
return vector of input VCF headers
Definition: multiple_variant_reader.h:150
Definition: exceptions.h:9
VariantHeaderMerger< true, true, true, true > InputOrderedVariantHeaderMerger
Definition: variant_header_merger.h:315
InputOrderedVariantHeaderMerger & get_variant_header_merger()
return VariantHeaderMerger object
Definition: multiple_variant_reader.h:144
void add_header(const std::shared_ptr< bcf_hdr_t > &hdr)
add a new header into the merged header and update LUTs
Exception for the case where a file header could not be read.
Definition: exceptions.h:32
MultipleVariantReader(const std::vector< std::string > &filenames, const bool validate_headers, const std::vector< std::string > &samples, const bool include=true)
enables reading records in multiple files (vcf or bcf)
Definition: multiple_variant_reader.h:63
const VariantHeader combined_header() const
returns a combined header for the files being read
Definition: multiple_variant_reader.h:138
Utility class to hold a variant header.
Definition: variant_header.h:52
Utility class to read multiple VCF/BCF files with an appropriate iterator in a for-each loop...
Definition: multiple_variant_reader.h:39
#define bcf_open(fn, mode)
Definition: vcf.h:274