1 #ifndef gamgee__variant_builder_individual_region__guard
2 #define gamgee__variant_builder_individual_region__guard
35 int32_t
gt_index()
const {
return m_gt_field_index; }
37 bool modified()
const {
return m_num_present_fields > 0; }
39 template<
class FIELD_ID_TYPE,
class BULK_FIELD_VALUES_TYPE>
42 bulk_set_field(field_id, std::forward<BULK_FIELD_VALUES_TYPE>(field_values),
BCF_HT_INT, m_int_fields,
true);
45 template<
class FIELD_ID_TYPE,
class BULK_FIELD_VALUES_TYPE>
48 bulk_set_field(field_id, std::forward<BULK_FIELD_VALUES_TYPE>(field_values),
BCF_HT_INT, m_int_fields);
51 template<
class FIELD_ID_TYPE,
class BULK_FIELD_VALUES_TYPE>
53 bulk_set_field(field_id, std::forward<BULK_FIELD_VALUES_TYPE>(field_values),
BCF_HT_REAL, m_float_fields);
56 template<
class FIELD_ID_TYPE,
class BULK_FIELD_VALUES_TYPE>
58 bulk_set_field(field_id, std::forward<BULK_FIELD_VALUES_TYPE>(field_values),
BCF_HT_STR, m_string_fields);
61 template<
class FIELD_ID_TYPE,
class SAMPLE_ID_TYPE,
class FIELD_VALUE_TYPE>
62 void set_genotype_field_by_sample(
const FIELD_ID_TYPE& field_id,
const SAMPLE_ID_TYPE& sample_id,
const FIELD_VALUE_TYPE* field_values,
const uint32_t num_field_values) {
64 set_field_by_sample(field_id, sample_id, field_values, num_field_values,
BCF_HT_INT, m_int_fields,
true);
67 template<
class FIELD_ID_TYPE,
class SAMPLE_ID_TYPE,
class FIELD_VALUE_TYPE>
68 void set_integer_field_by_sample(
const FIELD_ID_TYPE& field_id,
const SAMPLE_ID_TYPE& sample_id,
const FIELD_VALUE_TYPE* field_values,
const uint32_t num_field_values) {
70 set_field_by_sample(field_id, sample_id, field_values, num_field_values,
BCF_HT_INT, m_int_fields);
73 template<
class FIELD_ID_TYPE,
class SAMPLE_ID_TYPE,
class FIELD_VALUE_TYPE>
74 void set_float_field_by_sample(
const FIELD_ID_TYPE& field_id,
const SAMPLE_ID_TYPE& sample_id,
const FIELD_VALUE_TYPE* field_values,
const uint32_t num_field_values) {
75 set_field_by_sample(field_id, sample_id, field_values, num_field_values,
BCF_HT_REAL, m_float_fields);
78 template<
class FIELD_ID_TYPE,
class SAMPLE_ID_TYPE,
class FIELD_VALUE_TYPE>
79 void set_string_field_by_sample(
const FIELD_ID_TYPE& field_id,
const SAMPLE_ID_TYPE& sample_id,
const FIELD_VALUE_TYPE* field_values,
const uint32_t num_field_values) {
80 set_field_by_sample(field_id, sample_id, field_values, num_field_values,
BCF_HT_STR, m_string_fields);
83 template<
class FIELD_ID_TYPE>
85 const auto field_idx = field_index(field_id);
86 if ( m_enable_validation ) {
87 validate_individual_field_existence(field_idx);
89 remove_field(field_idx);
98 std::vector<int32_t> m_field_lookup_table;
99 int32_t m_gt_field_index;
100 uint32_t m_num_present_fields;
101 std::vector<VariantBuilderIndividualField<int32_t, int32_t>> m_int_fields;
102 std::vector<VariantBuilderIndividualField<float, float>> m_float_fields;
103 std::vector<VariantBuilderIndividualField<char, std::string>> m_string_fields;
104 bool m_enable_validation;
112 static const uint32_t int_field_short_value_threshold;
113 static const uint32_t float_field_short_value_threshold;
114 static const uint32_t string_field_short_value_threshold;
116 void build_lookup_tables();
118 template<
class FIELD_ID_TYPE,
class BULK_FIELD_VALUES_TYPE,
class FIELD_TYPE>
119 void bulk_set_field(
const FIELD_ID_TYPE& field_id, BULK_FIELD_VALUES_TYPE&& field_values,
const int32_t provided_type, std::vector<FIELD_TYPE>& fields_of_type,
const bool allow_gt =
false) {
120 const auto field_idx = field_index(field_id);
121 if ( m_enable_validation ) {
122 validate_individual_field(field_idx, provided_type, allow_gt);
123 validate_multi_sample_vector_length(field_values);
126 auto& field = fields_of_type[m_field_lookup_table[field_idx]];
127 const auto field_was_already_present = field.present();
130 field.set_entire_field(std::forward<BULK_FIELD_VALUES_TYPE>(field_values));
137 if ( ! field.present() ) {
140 update_present_field_count(field_was_already_present, field.present());
143 template<
class FIELD_ID_TYPE,
class SAMPLE_ID_TYPE,
class FIELD_VALUE_TYPE,
class FIELD_TYPE>
144 void set_field_by_sample(
const FIELD_ID_TYPE& field_id,
const SAMPLE_ID_TYPE& sample_id,
const FIELD_VALUE_TYPE* field_values,
const uint32_t num_field_values,
const int32_t provided_type, std::vector<FIELD_TYPE>& fields_of_type,
const bool allow_gt =
false) {
145 const auto field_idx = field_index(field_id);
146 const auto sample_idx = sample_index(sample_id);
147 if ( m_enable_validation ) {
148 validate_individual_field(field_idx, sample_idx, provided_type, allow_gt);
151 auto& field = fields_of_type[m_field_lookup_table[field_idx]];
152 const auto field_was_already_present = field.present();
154 field.set_sample_field_value(sample_idx, field_values, num_field_values);
159 update_present_field_count(field_was_already_present, field.present());
162 void remove_field(
const int32_t field_index);
166 int32_t field_index(
const std::string& field_id)
const {
return m_header.
field_index(field_id); }
167 int32_t field_index(
const uint32_t field_id)
const {
return int32_t(field_id); }
168 int32_t sample_index(
const std::string& sample_id)
const {
return m_header.
sample_index(sample_id); }
169 int32_t sample_index(
const uint32_t sample_id)
const {
return int32_t(sample_id); }
171 void validate_individual_field(
const int32_t field_index,
const uint32_t provided_type,
const bool allow_gt)
const;
172 void validate_individual_field(
const int32_t field_index,
const int32_t sample_index,
const uint32_t provided_type,
const bool allow_gt)
const;
173 void validate_individual_field_existence(
const int32_t field_index)
const;
175 template<
class ELEMENT_TYPE>
176 void validate_multi_sample_vector_length(
const std::vector<std::vector<ELEMENT_TYPE>>& vec)
const {
178 if ( vec.size() != m_header.
n_samples() && ! vec.empty() ) {
179 throw std::invalid_argument(std::string{
"Number of elements in non-empty vector of vectors for individual field ("} + std::to_string(vec.size()) +
") not equal to the number of samples (" + std::to_string(m_header.
n_samples()) +
")");
183 template<
class ELEMENT_TYPE>
184 void validate_multi_sample_vector_length(
const std::vector<ELEMENT_TYPE>& vec)
const {
185 const auto num_samples = m_header.
n_samples();
188 if ( vec.size() % num_samples != 0 ) {
189 throw std::invalid_argument(std::string{
"Number of elements in flattened vector for individual field ("} + std::to_string(vec.size()) +
") not divisible by number of samples (" + std::to_string(num_samples) +
")");
194 void validate_multi_sample_vector_length(
const std::vector<std::string>& vec)
const {
196 if ( vec.size() != m_header.
n_samples() && ! vec.empty() ) {
197 throw std::invalid_argument(std::string{
"Number of elements in non-empty vector for individual field ("} + std::to_string(vec.size()) +
") not equal to the number of samples (" + std::to_string(m_header.
n_samples()) +
")");
201 void update_present_field_count(
const bool field_was_already_present,
const bool field_currently_present) {
202 if ( ! field_was_already_present && field_currently_present ) {
203 ++m_num_present_fields;
205 else if ( field_was_already_present && ! field_currently_present ) {
206 --m_num_present_fields;
void set_genotype_field_by_sample(const FIELD_ID_TYPE &field_id, const SAMPLE_ID_TYPE &sample_id, const FIELD_VALUE_TYPE *field_values, const uint32_t num_field_values)
Definition: variant_builder_individual_region.h:62
#define BCF_HT_REAL
Definition: vcf.h:55
int32_t gt_index() const
Definition: variant_builder_individual_region.h:35
void bulk_set_integer_field(const FIELD_ID_TYPE &field_id, BULK_FIELD_VALUES_TYPE &&field_values)
Definition: variant_builder_individual_region.h:46
void set_string_field_by_sample(const FIELD_ID_TYPE &field_id, const SAMPLE_ID_TYPE &sample_id, const FIELD_VALUE_TYPE *field_values, const uint32_t num_field_values)
Definition: variant_builder_individual_region.h:79
void remove_individual_field(const FIELD_ID_TYPE &field_id)
Definition: variant_builder_individual_region.h:84
void bulk_set_genotype_field(const FIELD_ID_TYPE &field_id, BULK_FIELD_VALUES_TYPE &&field_values)
Definition: variant_builder_individual_region.h:40
void bulk_set_float_field(const FIELD_ID_TYPE &field_id, BULK_FIELD_VALUES_TYPE &&field_values)
Definition: variant_builder_individual_region.h:52
void bulk_set_string_field(const FIELD_ID_TYPE &field_id, BULK_FIELD_VALUES_TYPE &&field_values)
Definition: variant_builder_individual_region.h:57
void set_integer_field_by_sample(const FIELD_ID_TYPE &field_id, const SAMPLE_ID_TYPE &sample_id, const FIELD_VALUE_TYPE *field_values, const uint32_t num_field_values)
Definition: variant_builder_individual_region.h:68
void set_enable_validation(const bool enable_validation)
Definition: variant_builder_individual_region.h:33
uint32_t num_present_fields() const
Definition: variant_builder_individual_region.h:36
bool modified() const
Definition: variant_builder_individual_region.h:37
~VariantBuilderIndividualRegion()=default
#define BCF_HT_STR
Definition: vcf.h:56
void set_float_field_by_sample(const FIELD_ID_TYPE &field_id, const SAMPLE_ID_TYPE &sample_id, const FIELD_VALUE_TYPE *field_values, const uint32_t num_field_values)
Definition: variant_builder_individual_region.h:74
Definition: exceptions.h:9
Helper class for VariantBuilder to manage the fields belonging to the individual region of Variant re...
Definition: variant_builder_individual_region.h:23
VariantBuilderIndividualRegion & operator=(VariantBuilderIndividualRegion &&other)=default
void encode_into(kstring_t *buffer) const
Encode all individual fields into the provided byte buffer in the proper order and format for final i...
Definition: variant_builder_individual_region.cpp:103
VariantBuilderIndividualRegion(const VariantHeader &header, const bool enable_validation)
Definition: variant_builder_individual_region.cpp:16
#define BCF_HT_INT
Definition: vcf.h:54
uint32_t estimate_total_size() const
Produce a slight overestimate of the total size of the encoded data for this individual region...
Definition: variant_builder_individual_region.cpp:83
void clear()
Reset the individual region to a pristine state with no field data.
Definition: variant_builder_individual_region.cpp:133