Gamgee
You miserable little maggot. I'll stove your head in!
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
genotype.h
Go to the documentation of this file.
1 #ifndef gamgee__genotype__guard
2 #define gamgee__genotype__guard
3 
5 
6 #include "../utils/genotype_utils.h"
7 #include "../utils/hts_memory.h"
8 #include "../utils/utils.h"
9 #include "../utils/variant_field_type.h"
10 #include "../utils/variant_utils.h"
11 
12 #include <memory>
13 #include <utility>
14 #include <stdexcept>
15 
16 namespace gamgee {
17 
21 class Genotype{
22 
23  public:
30  Genotype(const std::shared_ptr<bcf1_t>& body, const bcf_fmt_t* const format_ptr, const uint8_t* data_ptr);
31 
35  Genotype(const Genotype& other) = delete;
36 
41  Genotype& operator=(const Genotype& other) = delete;
42 
47  Genotype(Genotype&& other) = default;
48 
53  Genotype& operator=(Genotype&& other) = default;
54 
58  ~Genotype() = default;
59 
65  bool operator!=(const Genotype& other) const;
66 
73  bool operator==(const Genotype& other) const;
74 
75  // only for diploids
81  bool het() const;
82 
88  bool non_ref_het() const;
89 
95  uint32_t fast_diploid_key_generation() const;
96 
97  //for all ploidies
102  bool hom_var() const;
103 
108  bool hom_ref() const;
109 
115  bool missing() const;
116 
117  // referencing alleles inside a sample's genotype
118 
123  std::vector<std::string> allele_strings() const;
124 
129  std::vector<int32_t> allele_keys() const;
130 
136  std::string allele_string(const uint32_t index) const;
137 
143  int32_t allele_key(const uint32_t index) const;
144 
150  int32_t operator[](const uint32_t index) const;
151 
156  uint32_t size() const;
157 
177  bool snp(const AlleleMask& mask) const;
178 
194  bool insertion(const AlleleMask& mask) const;
195 
211  bool deletion(const AlleleMask& mask) const;
212 
232  bool indel(const AlleleMask& mask) const;
233 
249  bool biallelic() const;
250 
261  bool complex() const { return !biallelic(); }
262 
273  bool mixed() const;
274 
275  bool variant() const {
276  return !missing() && !hom_ref();
277  }
278 
290  static inline void encode_genotype(std::vector<int32_t>& alleles) {
291  encode_genotype(alleles, false);
292  }
293 
305  static inline void encode_genotype(std::vector<int32_t>& alleles, bool phase_all_alleles) {
306  for ( auto allele_index = 0u; allele_index < alleles.size(); ++allele_index ) {
307  // Only legal value below -1 is the int32 vector end value
308  if ( alleles[allele_index] < -1 && alleles[allele_index] != bcf_int32_vector_end ) {
309  throw std::invalid_argument{"Genotype vector must consist only of allele indices, -1 for missing values, or vector end values"};
310  }
311  // Do not modify vector end values
312  else if ( alleles[allele_index] != bcf_int32_vector_end ) {
313  alleles[allele_index] = (alleles[allele_index] + 1) << 1 | (phase_all_alleles && allele_index > 0u ? 1 : 0);
314  }
315  }
316  }
317 
329  static inline void encode_genotypes(std::vector<std::vector<int32_t>>& multiple_genotypes) {
330  for ( auto& genotype : multiple_genotypes ) {
331  encode_genotype(genotype, false);
332  }
333  }
334 
343  static inline void encode_genotypes(VariantBuilderMultiSampleVector<int32_t>& multiple_genotypes) {
344  auto& genotypes_vector = const_cast<std::vector<int32_t>&>(multiple_genotypes.get_vector());
345  encode_genotype(genotypes_vector, false);
346  }
347 
348  private:
349  std::shared_ptr<bcf1_t> m_body;
350  const bcf_fmt_t* m_format_ptr;
351  const uint8_t* m_data_ptr;
352 
353  bool allele_is_type_or_ref(const AlleleType& type, const std::vector<int32_t>& keys, const AlleleMask& mask) const;
354 };
355 
356 }
357 
358 #endif
bool het() const
Checks if this genotype vector is any type of heterozygous call.
Definition: genotype.cpp:36
Encodes a genotype.
Definition: genotype.h:21
bool insertion(const AlleleMask &mask) const
whether or not this genotype represents an insertion
Definition: genotype.cpp:112
bool operator!=(const Genotype &other) const
Checks if another genotype does not equal this genotype.
Definition: genotype.cpp:20
bool snp(const AlleleMask &mask) const
whether or not this genotype represents a snp
Definition: genotype.cpp:107
bool indel(const AlleleMask &mask) const
whether or not this genotype represents an insertion or deletion
Definition: genotype.cpp:122
Genotype(const std::shared_ptr< bcf1_t > &body, const bcf_fmt_t *const format_ptr, const uint8_t *data_ptr)
Constructs a genotype.
Definition: genotype.cpp:14
bool missing() const
Checks if all alleles are missing.
Definition: genotype.cpp:73
bool hom_var() const
Checks if this genotype vector is a homozygous call that is non-reference.
Definition: genotype.cpp:58
bool non_ref_het() const
Checks if this genotype vector is a heterozygous call and none of the alleles is the reference...
Definition: genotype.cpp:45
bool operator==(const Genotype &other) const
Checks if another genotype equals this genotype.
Definition: genotype.cpp:24
int32_t allele_key(const uint32_t index) const
Returns the allele key within this line.
Definition: genotype.cpp:95
~Genotype()=default
Explicit default as recommended by many threads on stackoverflow.
static void encode_genotypes(VariantBuilderMultiSampleVector< int32_t > &multiple_genotypes)
Converts multiple genotypes stored in a VariantBuilderMultiSampleVector into BCF-encoded format suita...
Definition: genotype.h:343
bool hom_ref() const
Checks if this genotype vector is a homozygous call that is reference.
Definition: genotype.cpp:64
Definition: vcf.h:136
Class that allows you to efficiently prepare multi-sample data for setting individual fields in Varia...
Definition: variant_builder_multi_sample_vector.h:40
#define bcf_int32_vector_end
Definition: vcf.h:752
std::vector< std::string > allele_strings() const
Returns a vector with all the allele strings.
Definition: genotype.cpp:83
AlleleType
Definition: variant_utils.h:22
static void encode_genotype(std::vector< int32_t > &alleles)
Converts a vector of allele indices representing a genotype into BCF-encoded format suitable for pass...
Definition: genotype.h:290
Genotype & operator=(const Genotype &other)=delete
copying of the Genotype object is not allowed.
static void encode_genotype(std::vector< int32_t > &alleles, bool phase_all_alleles)
Converts a vector of allele indices representing a genotype into BCF-encoded format suitable for pass...
Definition: genotype.h:305
bool variant() const
Definition: genotype.h:275
bool complex() const
literally the negation of biallelic(mask)
Definition: genotype.h:261
static void encode_genotypes(std::vector< std::vector< int32_t >> &multiple_genotypes)
Converts multiple vectors of allele indices representing genotypes into BCF-encoded format suitable f...
Definition: genotype.h:329
Definition: exceptions.h:9
bool biallelic() const
whether or not this genotype has at most one alternate allele
Definition: genotype.cpp:135
std::string allele_string(const uint32_t index) const
Returns the allele string at index.
Definition: genotype.cpp:91
bool mixed() const
identifies variants with two different types of alleles
Definition: genotype.cpp:143
const std::vector< ELEMENT_TYPE > & get_vector() const
Get a reference to the internal one-dimensional vector used for value storage.
Definition: variant_builder_multi_sample_vector.h:117
std::vector< int32_t > allele_keys() const
Returns a vector with all the allele keys.
Definition: genotype.cpp:87
int32_t operator[](const uint32_t index) const
Returns the allele key within this line.
Definition: genotype.cpp:99
uint32_t fast_diploid_key_generation() const
A bit encoding for the first two alleles.
Definition: genotype.cpp:69
bool deletion(const AlleleMask &mask) const
whether or not this genotype represents an deletion
Definition: genotype.cpp:117
std::vector< AlleleType > AlleleMask
Definition: variant_utils.h:24
uint32_t size() const
Returns the number of alleles.
Definition: genotype.cpp:103