Gamgee
You miserable little maggot. I'll stove your head in!
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
variant_builder_shared_region.h
Go to the documentation of this file.
1 #ifndef gamgee__variant_builder_shared_region__guard
2 #define gamgee__variant_builder_shared_region__guard
3 
4 #include "../utils/hts_memory.h"
5 
6 #include "htslib/kstring.h"
7 #include "htslib/vcf.h"
8 
9 #include "variant.h"
10 
11 #include <string>
12 #include <vector>
13 #include <utility>
14 
15 namespace gamgee {
16 
21 
27  int32_t start_offset;
28  uint32_t length;
29 };
30 
57  public:
58  explicit VariantBuilderSharedRegion(const VariantHeader& header, const bool enable_validation);
59 
64  ~VariantBuilderSharedRegion(); // N.B. non-default destruction!
65 
66  void set_enable_validation(const bool enable_validation) { m_enable_validation = enable_validation; }
67 
68  uint32_t ref_allele_length() const { return m_ref_allele_length; }
69  uint32_t num_alt_alleles() const { return m_num_alt_alleles; }
70  uint32_t num_present_info_fields() const { return m_num_present_info_fields; }
71  bool modified() const {
72  return field_is_set(uint32_t(SharedFieldIndex::ID_INDEX)) ||
73  field_is_set(uint32_t(SharedFieldIndex::REF_ALLELE_INDEX)) ||
74  field_is_set(uint32_t(SharedFieldIndex::ALT_ALLELES_INDEX)) ||
75  field_is_set(uint32_t(SharedFieldIndex::FILTERS_INDEX)) ||
76  m_num_present_info_fields > 0;
77  }
78 
79  void set_id(const std::string& id);
80  void set_ref_allele(const std::string& ref_allele);
81  void set_alt_allele(const std::string& alt_allele);
82  void set_alt_alleles(const std::vector<std::string>& alt_alleles);
83  void set_filters(const std::vector<std::string>& filters);
84  void set_filters(const std::vector<int32_t>& filters);
85 
86  template<class FIELD_ID_TYPE, class FIELD_VALUE_TYPE>
87  void set_info_field(const FIELD_ID_TYPE& field_id, const FIELD_VALUE_TYPE& field_value, const int32_t provided_type, const uint32_t num_values) {
88  const auto field_logical_index = info_field_logical_index(field_id);
89  if ( m_enable_validation ) {
90  validate_info_field(field_logical_index, provided_type, num_values);
91  }
92  const auto field_physical_index = info_field_physical_index(field_logical_index);
93  const auto field_was_already_present = field_is_set(field_physical_index);
94 
95  // If the user passed in a missing value, treat it as a request to remove the field
96  if ( missing(field_value) ) {
97  mark_field_as_removed(field_physical_index);
98  if ( field_was_already_present ) --m_num_present_info_fields;
99  }
100  else {
101  encode_info_field(field_logical_index, field_physical_index, field_value);
102  if ( ! field_was_already_present ) ++m_num_present_info_fields;
103  }
104  }
105 
106  void remove_id() {
107  mark_field_as_removed(uint32_t(SharedFieldIndex::ID_INDEX));
108  }
109 
111  mark_field_as_removed(uint32_t(SharedFieldIndex::ALT_ALLELES_INDEX));
112  m_num_alt_alleles = 0;
113  }
114 
115  void remove_filters() {
116  mark_field_as_removed(uint32_t(SharedFieldIndex::FILTERS_INDEX));
117  }
118 
119  template<class FIELD_ID_TYPE>
120  void remove_info_field(const FIELD_ID_TYPE& field_id) {
121  const auto field_logical_index = info_field_logical_index(field_id);
122  if ( m_enable_validation ) {
123  validate_info_field_existence(field_logical_index);
124  }
125 
126  const auto field_physical_index = info_field_physical_index(field_logical_index);
127  const auto field_was_already_present = field_is_set(field_physical_index);
128 
129  mark_field_as_removed(field_physical_index);
130  if ( field_was_already_present ) --m_num_present_info_fields;
131  }
132 
133  uint32_t estimate_total_size() const;
134  void encode_into(kstring_t* destination) const;
135  void clear();
136 
137  private:
138  VariantHeader m_header;
139  kstring_t m_shared_buffer;
140  std::vector<SharedBufferSpan> m_field_buffer_spans;
141  std::vector<uint32_t> m_info_field_lookup_table;
142  uint32_t m_ref_allele_length;
143  uint32_t m_num_alt_alleles;
144  uint32_t m_num_present_info_fields;
145  bool m_enable_validation;
146 
147  static constexpr uint32_t initial_shared_buffer_size = 2000;
148  static constexpr uint32_t buffer_compaction_threshold = 10000;
149  static constexpr int32_t unset_field_sentinel_value = -1;
150  static constexpr int32_t removed_field_sentinel_value = -2;
151 
152  void build_lookup_table();
153 
154  bool field_is_set(const uint32_t field_physical_index) const {
155  return ! field_is_unset(field_physical_index) && ! field_is_removed(field_physical_index);
156  }
157 
158  void mark_field_as_set(const uint32_t field_physical_index, const uint32_t previous_buffer_length);
159 
160  bool field_is_unset(const uint32_t field_physical_index) const {
161  return m_field_buffer_spans[field_physical_index].start_offset == unset_field_sentinel_value;
162  }
163 
164  void mark_field_as_unset(const uint32_t field_physical_index) {
165  m_field_buffer_spans[field_physical_index].start_offset = unset_field_sentinel_value;
166  m_field_buffer_spans[field_physical_index].length = 0;
167  }
168 
169  void mark_all_fields_as_unset();
170 
171  bool field_is_removed(const uint32_t field_physical_index) const {
172  return m_field_buffer_spans[field_physical_index].start_offset == removed_field_sentinel_value;
173  }
174 
175  void mark_field_as_removed(const uint32_t field_physical_index) {
176  m_field_buffer_spans[field_physical_index].start_offset = removed_field_sentinel_value;
177  m_field_buffer_spans[field_physical_index].length = 0;
178  }
179 
180  uint32_t info_field_physical_index(const uint32_t field_logical_index) const {
181  return uint32_t(SharedFieldIndex::INFO_START_INDEX) + m_info_field_lookup_table[field_logical_index];
182  }
183 
184  // These trivial overloads only exist to allow us to unify the string id / integer id cases in
185  // the templated info field setter functions above
186  int32_t info_field_logical_index(const std::string& field_id) const { return m_header.field_index(field_id); }
187  int32_t info_field_logical_index(const uint32_t field_id) const { return int32_t(field_id); }
188 
189  void validate_info_field(const int32_t field_index, const uint32_t provided_type, const uint32_t num_values) const;
190  void validate_info_field_existence(const int32_t field_index) const;
191 
192  void encode_non_info_field(const uint32_t field_physical_index, const std::string& value);
193  void encode_non_info_field(const uint32_t field_physical_index, const std::vector<std::string>& values);
194  void encode_non_info_field(const uint32_t field_physical_index, const std::vector<int32_t>& values);
195 
196  void encode_info_field(const int32_t field_logical_index, const uint32_t field_physical_index, const int32_t value);
197  void encode_info_field(const int32_t field_logical_index, const uint32_t field_physical_index, const std::vector<int32_t>& values);
198  void encode_info_field(const int32_t field_logical_index, const uint32_t field_physical_index, const float value);
199  void encode_info_field(const int32_t field_logical_index, const uint32_t field_physical_index, const std::vector<float>& values);
200  void encode_info_field(const int32_t field_logical_index, const uint32_t field_physical_index, const std::string& value);
201  void encode_info_field(const int32_t field_logical_index, const uint32_t field_physical_index, const bool value);
202 
203  void copy_encoded_field_into(kstring_t* target, const uint32_t field_physical_index) const {
204  kputsn(m_shared_buffer.s + m_field_buffer_spans[field_physical_index].start_offset,
205  int32_t(m_field_buffer_spans[field_physical_index].length),
206  target);
207  }
208 
209  void compact_shared_buffer();
210  uint32_t unused_buffer_space();
211 };
212 
213 }
214 
215 #endif /* gamgee__variant_builder_shared_region__guard */
void encode_into(kstring_t *destination) const
Copy all encoded data for all shared fields into the provided byte buffer in the proper order and for...
Definition: variant_builder_shared_region.cpp:175
void clear()
Reset the shared region to a pristine state with no field data.
Definition: variant_builder_shared_region.cpp:213
SharedFieldIndex
Enum to represent the ordering of the various shared fields as they are physically laid out in the en...
Definition: variant_builder_shared_region.h:20
uint32_t num_alt_alleles() const
Definition: variant_builder_shared_region.h:69
void set_id(const std::string &id)
Definition: variant_builder_shared_region.cpp:78
void set_ref_allele(const std::string &ref_allele)
Definition: variant_builder_shared_region.cpp:87
Represents a section (range of bytes) in the shared memory pool VariantBuilderSharedRegion::m_shared_...
Definition: variant_builder_shared_region.h:26
Helper class for VariantBuilder to manage the fields belonging to the shared region of Variant record...
Definition: variant_builder_shared_region.h:56
bool modified() const
Definition: variant_builder_shared_region.h:71
void set_filters(const std::vector< std::string > &filters)
Definition: variant_builder_shared_region.cpp:124
int32_t start_offset
Definition: variant_builder_shared_region.h:27
uint32_t estimate_total_size() const
Produce a slight overestimate of the number of bytes required to hold all encoded data for this share...
Definition: variant_builder_shared_region.cpp:162
void set_enable_validation(const bool enable_validation)
Definition: variant_builder_shared_region.h:66
int32_t field_index(const std::string &tag) const
looks up the index of a particular filter, shared or individual field tag, enabling subsequent O(1) r...
Definition: variant_header.h:219
Definition: bgzf.h:69
VariantBuilderSharedRegion(const VariantHeader &header, const bool enable_validation)
Definition: variant_builder_shared_region.cpp:13
Definition: exceptions.h:9
char * s
Definition: bgzf.h:71
void set_info_field(const FIELD_ID_TYPE &field_id, const FIELD_VALUE_TYPE &field_value, const int32_t provided_type, const uint32_t num_values)
Definition: variant_builder_shared_region.h:87
void remove_id()
Definition: variant_builder_shared_region.h:106
void remove_filters()
Definition: variant_builder_shared_region.h:115
~VariantBuilderSharedRegion()
Definition: variant_builder_shared_region.cpp:29
Utility class to hold a variant header.
Definition: variant_header.h:52
void set_alt_allele(const std::string &alt_allele)
Definition: variant_builder_shared_region.cpp:96
uint32_t num_present_info_fields() const
Definition: variant_builder_shared_region.h:70
void remove_info_field(const FIELD_ID_TYPE &field_id)
Definition: variant_builder_shared_region.h:120
bool missing(const bool value)
Returns true if bool is false (missing).
Definition: missing.h:23
void set_alt_alleles(const std::vector< std::string > &alt_alleles)
Definition: variant_builder_shared_region.cpp:106
uint32_t ref_allele_length() const
Definition: variant_builder_shared_region.h:68
VariantBuilderSharedRegion & operator=(VariantBuilderSharedRegion &&other)=default
void remove_alt_alleles()
Definition: variant_builder_shared_region.h:110
uint32_t length
Definition: variant_builder_shared_region.h:28