1 #ifndef gamgee__variant__guard
2 #define gamgee__variant__guard
11 #include "../utils/variant_utils.h"
14 #include "boost/dynamic_bitset.hpp"
32 explicit Variant(
const std::shared_ptr<bcf_hdr_t>&
header,
const std::shared_ptr<bcf1_t>& body) noexcept;
48 bool missing()
const {
return m_body ==
nullptr; }
50 uint32_t
chromosome()
const {
return uint32_t(m_body->rid);}
53 uint32_t
alignment_stop()
const {
return uint32_t(m_body->pos + m_body->rlen);}
54 float qual()
const {
return m_body->qual;}
55 uint32_t
n_samples()
const {
return uint32_t(m_body->n_sample);}
56 uint32_t
n_alleles()
const {
return uint32_t(m_body->n_allele);}
58 std::string
id()
const;
59 std::string
ref()
const;
60 std::vector<std::string>
alt()
const;
64 bool has_filter(
const std::string& filter)
const;
147 template <
class VALUE,
template<
class>
class ITER>
149 const ITER<VALUE>& first,
150 const ITER<VALUE>& last,
151 const std::function<
bool (
const decltype(*first)& value)> pred)
154 auto selected_samples = boost::dynamic_bitset<>(
n_samples);
157 selected_samples[i] = pred(*it++);
159 return selected_samples;
176 std::shared_ptr<bcf1_t> m_body;
178 bcf_fmt_t* find_individual_field(
const std::string& tag)
const {
return bcf_get_fmt(m_header.m_header.get(), m_body.get(), tag.c_str()); }
179 bcf_info_t* find_shared_field(
const std::string& tag)
const {
return bcf_get_info(m_header.m_header.get(), m_body.get(), tag.c_str()); }
182 bool check_field(
const int32_t type_field,
const int32_t type_value,
const int32_t index)
const;
183 inline AlleleType allele_type_from_difference(
const int diff)
const;
185 template<
class FIELD_TYPE,
class INDEX_OR_TAG> SharedField<FIELD_TYPE> shared_field_as(
const INDEX_OR_TAG& p)
const;
186 template<
class FIELD_TYPE,
class INDEX_OR_TAG> IndividualField<IndividualFieldValue<FIELD_TYPE>> individual_field_as(
const INDEX_OR_TAG& p)
const;
195 inline void set_alignment_start(
const int32_t start) { m_body->pos = start - 1; }
196 inline void set_alignment_stop(
const int32_t end) { m_body->rlen = end - m_body->pos; }
198 inline void set_reference_allele(
const char*
ref,
const int32_t ref_length)
205 if(m_body->rlen >= ref_length)
207 memcpy(m_body->d.allele[0], ref, ref_length);
208 m_body->d.allele[0][ref_length] =
'\0';
216 m_body->d.allele[0] =
const_cast<char*
>(
ref);
218 bcf_update_alleles(const_cast<const bcf_hdr_t*>(m_header.m_header.get()), m_body.get(),
const_cast<const char**
>(m_body->d.allele), m_body->n_allele);
221 inline void set_reference_allele(
const char* ref) { set_reference_allele(ref, static_cast<int32_t>(strlen(ref))); }
222 inline void set_reference_allele(
const char ref_base) { set_reference_allele(&ref_base, 1); }
bool boolean_shared_field(const std::string &tag) const
whether or not the tag is present
Definition: variant.cpp:190
Variant & operator=(const Variant &other)
deep copy assignment of a Variant and it's header. Shared pointers maintain state to all other associ...
Definition: variant.cpp:80
bcf_fmt_t * bcf_get_fmt(const bcf_hdr_t *hdr, bcf1_t *line, const char *key)
Definition: vcf.c:2979
DiploidPLGenotype
simple enum to keep the indices of the genotypes in the PL field of diploid individuals ...
Definition: variant.h:24
AlleleMask allele_mask() const
computes the allele types for all allels (including the reference allele)
Definition: variant.cpp:117
VariantHeader header() const
returns the header for this variant
Definition: variant.h:43
IndividualField< IndividualFieldValue< std::string > > individual_field_as_string(const std::string &tag) const
same as string_individual_field but will attempt to convert underlying data to string if possible...
Definition: variant.cpp:152
std::string chromosome_name() const
returns the name of the chromosome by querying the header.
Definition: variant.h:51
std::string id() const
returns the variant id field (typically dbsnp id)
Definition: variant.cpp:92
uint32_t n_samples() const
returns the number of samples in this Variant record
Definition: variant.h:55
VariantFilters filters() const
returns a vector-like object with all the filters for this record
Definition: variant.cpp:108
IndividualField< IndividualFieldValue< int32_t > > integer_individual_field(const std::string &tag) const
returns a random access object with all the values in a given individual field tag in integer format ...
Definition: variant.cpp:132
int bcf_unpack(bcf1_t *b, int which)
Definition: vcf.c:1945
SharedField< int32_t > shared_field_as_integer(const std::string &tag) const
same as integer_shared_field but will attempt to convert underlying data to integer if possible...
Definition: variant.cpp:210
uint32_t alignment_start() const
returns a 1-based alignment start position (as you would see in a VCF file).
Definition: variant.h:52
bool missing() const
returns true if this is a default-constructed Variant object with no data
Definition: variant.h:48
bool has_filter(const std::string &filter) const
checks for the existence of a filter in this record
Definition: variant.cpp:113
std::string ref() const
returns the ref allele in this Variant record
Definition: variant.cpp:97
static boost::dynamic_bitset select_if(const ITER< VALUE > &first, const ITER< VALUE > &last, const std::function< bool(const decltype(*first)&value)> pred)
functional-style set logic operations for variant field vectors
Definition: variant.h:148
utility class to write out a VCF/BCF file to any stream
Definition: variant_writer.h:21
SharedField< std::string > string_shared_field(const std::string &tag) const
returns a random access object with all the values in a given shared field tag in string format for a...
Definition: variant.cpp:206
#define BCF1_DIRTY_ALS
Definition: vcf.h:160
uint32_t alignment_stop() const
returns a 1-based alignment stop position, as you would see in a VCF INFO END tag, or the end position of the reference allele if there is no END tag.
Definition: variant.h:53
bcf_info_t * bcf_get_info_id(bcf1_t *line, const int id)
Definition: vcf.c:3004
IndividualField< IndividualFieldValue< std::string > > string_individual_field(const std::string &tag) const
returns a random access object with all the values in a given individual field tag in string format f...
Definition: variant.cpp:140
A class template to hold the values of a specific Variant's shared field.
Definition: shared_field.h:57
std::vector< std::string > alt() const
returns the vectors of alt alleles in this Variant record
Definition: variant.cpp:102
bcf_info_t * bcf_get_info(const bcf_hdr_t *hdr, bcf1_t *line, const char *key)
Definition: vcf.c:2986
VariantBuilder: construct Variant records from scratch (and, coming soon, from existing Variant recor...
Definition: variant_builder.h:164
IndividualField< IndividualFieldValue< int32_t > > individual_field_as_integer(const std::string &tag) const
same as integer_individual_field but will attempt to convert underlying data to integer if possible...
Definition: variant.cpp:144
#define BCF_UN_STR
Definition: vcf.h:334
AlleleType
Definition: variant_utils.h:22
uint32_t n_alleles() const
returns the number of alleles in this Variant record including the reference allele ...
Definition: variant.h:56
IndividualField< Genotype > genotypes() const
special getter for the Genotype (GT) field. Returns a random access object with all the values in a g...
Definition: variant.cpp:252
A class template to hold the values of a specific Variant's format field for all samples.
Definition: individual_field.h:65
Definition: exceptions.h:9
int bcf_update_alleles(const bcf_hdr_t *hdr, bcf1_t *line, const char **alleles, int nals)
Definition: vcf.c:2922
SharedField< std::string > shared_field_as_string(const std::string &tag) const
same as string_shared_field but will attempt to convert underlying data to string if possible...
Definition: variant.cpp:218
SharedField< float > float_shared_field(const std::string &tag) const
returns a random access object with all the values in a given shared field tag in float format for al...
Definition: variant.cpp:202
SharedField< int32_t > integer_shared_field(const std::string &tag) const
returns a random access object with all the values in a given shared field tag in integer format cont...
Definition: variant.cpp:198
class to manipulate filter field objects without making copies.
Definition: variant_filters.h:23
Utility class to manipulate a Variant record.
Definition: variant.h:29
SharedField< float > shared_field_as_float(const std::string &tag) const
same as float_shared_field but will attempt to convert underlying data to float if possible...
Definition: variant.cpp:214
IndividualField< IndividualFieldValue< float > > float_individual_field(const std::string &tag) const
returns a random access object with all the values in a given individual field tag in float format fo...
Definition: variant.cpp:136
Variant()=default
initializes a null Variant
Utility class to handle reference blocks while iterating over multiple variant files.
Definition: reference_block_splitting_variant_iterator.h:16
std::vector< AlleleType > AlleleMask
Definition: variant_utils.h:24
uint32_t chromosome() const
returns the integer representation of the chromosome. Notice that chromosomes are listed in index ord...
Definition: variant.h:50
IndividualField< IndividualFieldValue< float > > individual_field_as_float(const std::string &tag) const
same as float_individual_field but will attempt to convert underlying data to float if possible...
Definition: variant.cpp:148
bcf_fmt_t * bcf_get_fmt_id(bcf1_t *line, const int id)
Definition: vcf.c:2993
float qual() const
returns the Phred scaled site qual (probability that the site is not reference). See VCF spec...
Definition: variant.h:54