Gamgee
You miserable little maggot. I'll stove your head in!
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
variant_header.h
Go to the documentation of this file.
1 #ifndef gamgee__variant_header__guard
2 #define gamgee__variant_header__guard
3 
4 #include "htslib/vcf.h"
5 
6 #include "../missing.h"
7 
8 #include <memory>
9 #include <string>
10 #include <vector>
11 
12 namespace gamgee {
13 
14 template<bool fields_forward_LUT_ordering, bool fields_reverse_LUT_ordering, bool samples_forward_LUT_ordering, bool samples_reverse_LUT_ordering>
15 class VariantHeaderMerger; //forward declaration to declare friendship in VariantHeader
53  public:
54  VariantHeader() = default;
55  explicit VariantHeader(const std::shared_ptr<bcf_hdr_t>& header) : m_header{header} {}
56  VariantHeader(const VariantHeader& other);
57  VariantHeader(VariantHeader&& other) noexcept;
58  VariantHeader& operator=(const VariantHeader& other);
59  VariantHeader& operator=(VariantHeader&& other) noexcept;
60  ~VariantHeader() = default;
61 
68  bool operator==(const VariantHeader& rhs) const;
69  bool operator!=(const VariantHeader& rhs) const { return !operator==(rhs); }
70 
71  std::vector<std::string> samples() const;
72  uint32_t n_samples() const { return uint32_t(bcf_hdr_nsamples(m_header.get())); };
73  std::vector<std::string> chromosomes() const;
74  uint32_t n_chromosomes() const;
75 
84  uint32_t field_index_end() const { return static_cast<uint32_t>(m_header->n[BCF_DT_ID]); };
85 
90  uint32_t n_filters() const;
91  std::vector<std::string> filters() const;
92 
97  uint32_t n_shared_fields() const;
98  std::vector<std::string> shared_fields() const;
99 
104  uint32_t n_individual_fields() const;
105  std::vector<std::string> individual_fields() const;
106 
107  // type checking functions: returns BCF_HT_FLAG, BCF_HT_INT, BCF_HT_REAL, BCF_HT_STR from htslib/vcf.h
108  uint8_t shared_field_type(const std::string& tag) const { return field_type(field_index(tag), BCF_HL_INFO); }
109  uint8_t shared_field_type(const int32_t index) const { return field_type(index, BCF_HL_INFO); }
110  uint8_t individual_field_type(const std::string& tag) const { return field_type(field_index(tag), BCF_HL_FMT); }
111  uint8_t individual_field_type(const int32_t index) const { return field_type(index, BCF_HL_FMT); }
112 
118  uint8_t field_type(const std::string& tag, const int32_t field_category) const { return field_type(field_index(tag), field_category); }
119 
125  uint8_t field_type(const int32_t index, const int32_t field_category) const { return bcf_hdr_id2type(m_header.get(), field_category, index); }
126  /*
127  * returns one of BCF_VL_* values for field with the specified name and category (one of BCF_HL_FMT, BCF_HL_INFO, or BCF_HL_FLT)
128  *
129  * @note must check whether the field exists before calling this function, as it doesn't check for you
130  */
131  uint32_t field_length_descriptor(const std::string& tag, const int32_t field_category) const { return field_length_descriptor(field_index(tag), field_category); }
137  uint32_t field_length_descriptor(const int32_t index, const int32_t field_category) const { return bcf_hdr_id2length(m_header.get(), field_category, index); }
143  uint32_t field_length(const std::string& tag, const int32_t field_category) const { return field_length(field_index(tag), field_category); }
149  uint32_t field_length(const int32_t index, const int32_t field_category) const { return bcf_hdr_id2number(m_header.get(), field_category, index); }
153  bool has_filter(const std::string& filter_name) const { return has_field(field_index(filter_name), BCF_HL_FLT); }
154 
158  bool has_filter(const int32_t filter_index) const { return has_field(filter_index, BCF_HL_FLT); }
159 
163  bool has_shared_field(const std::string& field_name) const { return has_field(field_index(field_name), BCF_HL_INFO); }
164 
168  bool has_shared_field(const int32_t field_index) const { return has_field(field_index, BCF_HL_INFO); }
169 
173  bool has_individual_field(const std::string& field_name) const { return has_field(field_index(field_name), BCF_HL_FMT); }
174 
178  bool has_individual_field(const int32_t field_index) const { return has_field(field_index, BCF_HL_FMT); }
179 
183  bool has_field(const std::string& field_name, const int32_t field_category) const { return has_field(field_index(field_name), field_category); }
184 
188  bool has_field(const int32_t field_index, const int32_t field_category) const {
189  // Can't just use bcf_hdr_idinfo_exists() here since it assumes the index came from a hash lookup,
190  // which is not always the case
191  return field_index >= 0 &&
192  field_index < m_header->n[BCF_DT_ID] &&
193  m_header->id[BCF_DT_ID][field_index].val != nullptr &&
194  m_header->id[BCF_DT_ID][field_index].val->hrec[field_category] != nullptr;
195  }
196 
200  bool has_sample(const std::string& sample_name) const { return has_sample(sample_index(sample_name)); }
201 
205  bool has_sample(const int32_t sample_index) const {
206  // Can't assume that sample_index came from a hash lookup, so must validate the hard way
207  return sample_index >= 0 &&
208  sample_index < m_header->n[BCF_DT_SAMPLE] &&
209  m_header->id[BCF_DT_SAMPLE][sample_index].val != nullptr &&
210  m_header->id[BCF_DT_SAMPLE][sample_index].val->id != -1;
211  }
212 
219  int32_t field_index(const std::string& tag) const {
220  const auto index = bcf_hdr_id2int(m_header.get(), BCF_DT_ID, tag.c_str());
221  return index >= 0 ? index : missing_values::int32;
222  }
223 
229  int32_t sample_index(const std::string& sample) const {
230  const auto index = bcf_hdr_id2int(m_header.get(), BCF_DT_SAMPLE, sample.c_str());
231  return index >= 0 ? index : missing_values::int32;
232  }
233 
234  std::string get_field_name(const int32_t field_idx) const {
235  if(field_idx >= 0 && field_idx < m_header->n[BCF_DT_ID])
236  {
237  auto name_ptr = bcf_hdr_int2id(m_header.get(), BCF_DT_ID, field_idx);
238  if(name_ptr)
239  return name_ptr;
240  }
241  return "";
242  }
243 
244  std::string get_sample_name(const int32_t sample_idx) const {
245  if(sample_idx >= 0 && sample_idx < m_header->n[BCF_DT_SAMPLE])
246  {
247  auto name_ptr= bcf_hdr_int2id(m_header.get(), BCF_DT_SAMPLE, sample_idx);
248  if(name_ptr)
249  return name_ptr;
250  }
251  return "";
252  }
253 
254  private:
255  std::shared_ptr<bcf_hdr_t> m_header;
256 
257  friend class Variant;
258  friend class VariantWriter;
259  friend class VariantHeaderBuilder;
260  friend class VariantBuilder;
263  template<bool fields_forward_LUT_ordering, bool fields_reverse_LUT_ordering, bool samples_forward_LUT_ordering, bool samples_reverse_LUT_ordering>
264  friend class VariantHeaderMerger; //to access m_header
265 };
266 
267 }
268 
269 #endif // gamgee__variant_header__guard
bool has_filter(const int32_t filter_index) const
checks whether the given filter is present given the filter index
Definition: variant_header.h:158
#define bcf_hdr_id2number(hdr, type, int_id)
Definition: vcf.h:698
#define BCF_HL_FLT
Definition: vcf.h:46
#define BCF_HL_INFO
Definition: vcf.h:47
uint8_t individual_field_type(const std::string &tag) const
returns the type of this individual (FORMAT) field
Definition: variant_header.h:110
bool has_field(const std::string &field_name, const int32_t field_category) const
checks whether the given field is present given the field name and field category (which must be one ...
Definition: variant_header.h:183
#define BCF_DT_ID
Definition: vcf.h:76
uint32_t n_shared_fields() const
returns the number of shared fields declared in this header do not use for iteration over filter ind...
Definition: variant_header.cpp:116
Definition: merged_vcf_lut.h:20
#define bcf_hdr_int2id(hdr, type, int_id)
Definition: vcf.h:673
bool has_shared_field(const std::string &field_name) const
checks whether the given shared (INFO) field is present given the field name
Definition: variant_header.h:163
std::vector< std::string > chromosomes() const
returns the number of samples in the header
Definition: variant_header.cpp:96
bool has_field(const int32_t field_index, const int32_t field_category) const
checks whether the given field is present given the field index and field category (one of BCF_HL_FMT...
Definition: variant_header.h:188
int bcf_hdr_id2int(const bcf_hdr_t *hdr, int type, const char *id)
Definition: vcf.c:2145
std::string get_sample_name(const int32_t sample_idx) const
Definition: variant_header.h:244
uint32_t field_index_end() const
returns the last valid field index + 1, to indicate the end of field iteration
Definition: variant_header.h:84
uint32_t n_chromosomes() const
returns the number of chromosomes declared in this header
Definition: variant_header.cpp:100
std::vector< std::string > filters() const
returns a vector of filter names
Definition: variant_header.cpp:104
Utility class to build VariantHeader objects from scratch.
Definition: variant_header_builder.h:19
Helper class for VariantBuilder to manage the fields belonging to the shared region of Variant record...
Definition: variant_builder_shared_region.h:56
bool has_individual_field(const std::string &field_name) const
checks whether the given individual (FORMAT) field is present given the field name ...
Definition: variant_header.h:173
uint8_t shared_field_type(const std::string &tag) const
returns the type of this shared (INFO) field
Definition: variant_header.h:108
constexpr auto int32
missing value for an int32
Definition: missing.h:18
uint8_t shared_field_type(const int32_t index) const
returns the type of this shared (INFO) field
Definition: variant_header.h:109
utility class to write out a VCF/BCF file to any stream
Definition: variant_writer.h:21
uint32_t n_filters() const
returns the number of filters declared in this header do not use for iteration over filter indices â€...
Definition: variant_header.cpp:108
#define BCF_DT_SAMPLE
Definition: vcf.h:78
int32_t field_index(const std::string &tag) const
looks up the index of a particular filter, shared or individual field tag, enabling subsequent O(1) r...
Definition: variant_header.h:219
VariantBuilder: construct Variant records from scratch (and, coming soon, from existing Variant recor...
Definition: variant_builder.h:164
#define bcf_hdr_id2length(hdr, type, int_id)
Definition: vcf.h:697
#define bcf_hdr_id2type(hdr, type, int_id)
Definition: vcf.h:699
bool has_shared_field(const int32_t field_index) const
checks whether the given shared (INFO) field is present given the field index
Definition: variant_header.h:168
uint32_t n_individual_fields() const
returns the number of individual fields declared in this header do not use for iteration over filter...
Definition: variant_header.cpp:124
std::string get_field_name(const int32_t field_idx) const
Definition: variant_header.h:234
#define BCF_HL_FMT
Definition: vcf.h:48
bool has_filter(const std::string &filter_name) const
checks whether the given filter is present given the filter name
Definition: variant_header.h:153
uint32_t field_length_descriptor(const std::string &tag, const int32_t field_category) const
Definition: variant_header.h:131
Definition: exceptions.h:9
Helper class for VariantBuilder to manage the fields belonging to the individual region of Variant re...
Definition: variant_builder_individual_region.h:23
std::vector< std::string > individual_fields() const
returns a vector of individual field names
Definition: variant_header.cpp:120
uint32_t field_length(const int32_t index, const int32_t field_category) const
Definition: variant_header.h:149
std::vector< std::string > shared_fields() const
returns a vector of shared field names
Definition: variant_header.cpp:112
bool operator==(const VariantHeader &rhs) const
equality operators
Definition: variant_header.cpp:63
VariantHeader()=default
initializes a null VariantHeader
uint32_t field_length(const std::string &tag, const int32_t field_category) const
Definition: variant_header.h:143
Utility class to manipulate a Variant record.
Definition: variant.h:29
uint32_t n_samples() const
Definition: variant_header.h:72
#define bcf_hdr_nsamples(hdr)
Definition: vcf.h:433
uint8_t field_type(const int32_t index, const int32_t field_category) const
Definition: variant_header.h:125
bool has_individual_field(const int32_t field_index) const
checks whether the given individual (FORMAT) field is present given the field index ...
Definition: variant_header.h:178
Utility class to hold a variant header.
Definition: variant_header.h:52
bool has_sample(const std::string &sample_name) const
checks whether the given sample is present given the sample name
Definition: variant_header.h:200
VariantHeader(const std::shared_ptr< bcf_hdr_t > &header)
creates a VariantHeader given htslib object.
Definition: variant_header.h:55
std::vector< std::string > samples() const
builds a vector with the names of the samples
Definition: variant_header.cpp:92
uint8_t field_type(const std::string &tag, const int32_t field_category) const
Definition: variant_header.h:118
bool has_sample(const int32_t sample_index) const
checks whether the given sample is present given the sample index
Definition: variant_header.h:205
int32_t sample_index(const std::string &sample) const
looks up the index of a particular sample, enabling subsequent O(1) random-access lookups for that sa...
Definition: variant_header.h:229
uint32_t field_length_descriptor(const int32_t index, const int32_t field_category) const
Definition: variant_header.h:137
uint8_t individual_field_type(const int32_t index) const
returns the type of this individual (FORMAT) field
Definition: variant_header.h:111