46 #define BCF_HL_FLT 0 // header line
50 #define BCF_HL_STR 4 // structured header line TAG=<A=..,B=..>
51 #define BCF_HL_GEN 5 // generic header line
53 #define BCF_HT_FLAG 0 // header type
58 #define BCF_VL_FIXED 0 // variable length
76 #define BCF_DT_ID 0 // dictionary type
78 #define BCF_DT_SAMPLE 2
107 int ntransl, *transl[2];
119 #define BCF_BT_NULL 0
120 #define BCF_BT_INT8 1
121 #define BCF_BT_INT16 2
122 #define BCF_BT_INT32 3
123 #define BCF_BT_FLOAT 5
124 #define BCF_BT_CHAR 7
141 uint32_t p_off:31, p_free:1;
153 uint32_t vptr_off:31,
159 #define BCF1_DIRTY_ID 1
160 #define BCF1_DIRTY_ALS 2
161 #define BCF1_DIRTY_FLT 4
162 #define BCF1_DIRTY_INF 8
165 int m_fmt,
m_info, m_id, m_als, m_allele, m_flt;
179 #define BCF_ERR_CTG_UNDEF 1
180 #define BCF_ERR_TAG_UNDEF 2
181 #define BCF_ERR_NCOLS 4
199 uint32_t n_info:16, n_allele:16;
200 uint32_t n_fmt:8, n_sample:24;
230 #define bcf_init1() bcf_init()
231 #define bcf_read1(fp,h,v) bcf_read((fp),(h),(v))
232 #define vcf_read1(fp,h,v) vcf_read((fp),(h),(v))
233 #define bcf_write1(fp,h,v) bcf_write((fp),(h),(v))
234 #define vcf_write1(fp,h,v) vcf_write((fp),(h),(v))
235 #define bcf_destroy1(v) bcf_destroy(v)
236 #define vcf_parse1(s,h,v) vcf_parse((s),(h),(v))
237 #define bcf_clear1(v) bcf_clear(v)
238 #define vcf_format1(h,v,s) vcf_format((h),(v),(s))
274 #define bcf_open(fn, mode) hts_open((fn), (mode))
275 #define vcf_open(fn, mode) hts_open((fn), (mode))
276 #define bcf_close(fp) hts_close(fp)
277 #define vcf_close(fp) hts_close(fp)
334 #define BCF_UN_STR 1 // up to ALT inclusive
335 #define BCF_UN_FLT 2 // up to FILTER
336 #define BCF_UN_INFO 4 // up to INFO
337 #define BCF_UN_SHR (BCF_UN_STR|BCF_UN_FLT|BCF_UN_INFO) // all shared information
338 #define BCF_UN_FMT 8 // unpack format and each sample
339 #define BCF_UN_IND BCF_UN_FMT // a synonymo of BCF_UN_FMT
340 #define BCF_UN_ALL (BCF_UN_SHR|BCF_UN_FMT) // everything
369 int bcf_readrec(
BGZF *fp,
void *null,
void *v,
int *tid,
int *beg,
int *end);
433 #define bcf_hdr_nsamples(hdr) (hdr)->n[BCF_DT_SAMPLE]
534 #define bcf_update_info_int32(hdr,line,key,values,n) bcf_update_info((hdr),(line),(key),(values),(n),BCF_HT_INT)
535 #define bcf_update_info_float(hdr,line,key,values,n) bcf_update_info((hdr),(line),(key),(values),(n),BCF_HT_REAL)
536 #define bcf_update_info_flag(hdr,line,key,string,n) bcf_update_info((hdr),(line),(key),(string),(n),BCF_HT_FLAG)
537 #define bcf_update_info_string(hdr,line,key,string) bcf_update_info((hdr),(line),(key),(string),1,BCF_HT_STR)
556 #define bcf_update_format_int32(hdr,line,key,values,n) bcf_update_format((hdr),(line),(key),(values),(n),BCF_HT_INT)
557 #define bcf_update_format_float(hdr,line,key,values,n) bcf_update_format((hdr),(line),(key),(values),(n),BCF_HT_REAL)
558 #define bcf_update_format_char(hdr,line,key,values,n) bcf_update_format((hdr),(line),(key),(values),(n),BCF_HT_STR)
559 #define bcf_update_genotypes(hdr,line,gts,n) bcf_update_format((hdr),(line),"GT",(gts),(n),BCF_HT_INT) // See bcf_gt_ macros below
566 #define bcf_gt_phased(idx) ((idx+1)<<1|1)
567 #define bcf_gt_unphased(idx) ((idx+1)<<1)
568 #define bcf_gt_missing 0
569 #define bcf_gt_is_missing(val) ((val)>>1 ? 0 : 1)
570 #define bcf_gt_is_phased(idx) ((idx)&1)
571 #define bcf_gt_allele(val) (((val)>>1)-1)
574 #define bcf_alleles2gt(a,b) ((a)>(b)?((a)*((a)+1)/2+(b)):((b)*((b)+1)/2+(a)))
575 static inline void bcf_gt2alleles(
int igt,
int *a,
int *b)
578 while ( k<igt ) { dk++; k += dk; }
579 *b = dk - 1; *a = igt - k + *b;
623 #define bcf_get_info_int32(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_INT)
624 #define bcf_get_info_float(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_REAL)
625 #define bcf_get_info_string(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_STR)
626 #define bcf_get_info_flag(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_FLAG)
650 #define bcf_get_format_int32(hdr,line,tag,dst,ndst) bcf_get_format_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_INT)
651 #define bcf_get_format_float(hdr,line,tag,dst,ndst) bcf_get_format_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_REAL)
652 #define bcf_get_format_char(hdr,line,tag,dst,ndst) bcf_get_format_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_STR)
653 #define bcf_get_genotypes(hdr,line,dst,ndst) bcf_get_format_values(hdr,line,"GT",(void**)(dst),ndst,BCF_HT_INT)
673 #define bcf_hdr_int2id(hdr,type,int_id) ((hdr)->id[type][int_id].key)
697 #define bcf_hdr_id2length(hdr,type,int_id) ((hdr)->id[BCF_DT_ID][int_id].val->info[type]>>8 & 0xf)
698 #define bcf_hdr_id2number(hdr,type,int_id) ((hdr)->id[BCF_DT_ID][int_id].val->info[type]>>12)
699 #define bcf_hdr_id2type(hdr,type,int_id) ((hdr)->id[BCF_DT_ID][int_id].val->info[type]>>4 & 0xf)
700 #define bcf_hdr_id2coltype(hdr,type,int_id) ((hdr)->id[BCF_DT_ID][int_id].val->info[type] & 0xf)
701 #define bcf_hdr_idinfo_exists(hdr,type,int_id) ((int_id<0 || bcf_hdr_id2coltype(hdr,type,int_id)==0xf) ? 0 : 1)
702 #define bcf_hdr_id2hrec(hdr,dict_type,col_type,int_id) ((hdr)->id[(dict_type)==BCF_DT_CTG?BCF_DT_CTG:BCF_DT_ID][int_id].val->hrec[(dict_type)==BCF_DT_CTG?0:(col_type)])
720 #define bcf_itr_destroy(iter) hts_itr_destroy(iter)
721 #define bcf_itr_queryi(idx, tid, beg, end) hts_itr_query((idx), (tid), (beg), (end), bcf_readrec)
722 #define bcf_itr_querys(idx, hdr, s) hts_itr_querys((idx), (s), (hts_name2id_f)(bcf_hdr_name2id), (hdr), hts_itr_query, bcf_readrec)
723 #define bcf_itr_next(htsfp, itr, r) hts_itr_next((htsfp)->fp.bgzf, (itr), (r), 0)
724 #define bcf_index_load(fn) hts_idx_load(fn, HTS_FMT_CSI)
725 #define bcf_index_seqnames(idx, hdr, nptr) hts_idx_seqnames((idx),(nptr),(hts_id2name_f)(bcf_hdr_id2name),(hdr))
750 #define bcf_int8_vector_end (INT8_MIN+1)
751 #define bcf_int16_vector_end (INT16_MIN+1)
752 #define bcf_int32_vector_end (INT32_MIN+1)
753 #define bcf_str_vector_end 0
754 #define bcf_int8_missing INT8_MIN
755 #define bcf_int16_missing INT16_MIN
756 #define bcf_int32_missing INT32_MIN
757 #define bcf_str_missing 0x07
760 static inline void bcf_float_set(
float *ptr, uint32_t value)
762 union { uint32_t i;
float f; } u;
766 #define bcf_float_set_vector_end(x) bcf_float_set(&(x),bcf_float_vector_end)
767 #define bcf_float_set_missing(x) bcf_float_set(&(x),bcf_float_missing)
768 static inline int bcf_float_is_missing(
float f)
770 union { uint32_t i;
float f; } u;
774 static inline int bcf_float_is_vector_end(
float f)
776 union { uint32_t i;
float f; } u;
783 #define BRANCH(type_t, missing, vector_end) { \
784 type_t *ptr = (type_t*) (fmt->p + isample*fmt->size); \
786 for (i=0; i<fmt->n && ptr[i]!=vector_end; i++) \
788 if ( i ) kputc("/|"[ptr[i]&1], str); \
789 if ( !(ptr[i]>>1) ) kputc('.', str); \
790 else kputw((ptr[i]>>1) - 1, str); \
792 if (i == 0) kputc('.', str); \
798 default: fprintf(stderr,
"FIXME: type %d in bcf_format_gt?\n", fmt->
type); abort();
break;
803 static inline void bcf_enc_size(
kstring_t *s,
int size,
int type)
806 kputc(15<<4|type, s);
811 kputsn((
char*)&x, 4, s);
815 kputsn((
char*)&x, 2, s);
821 }
else kputc(size<<4|type, s);
824 static inline int bcf_enc_inttype(
long x)
831 static inline void bcf_enc_int1(
kstring_t *s, int32_t x)
838 kputc(bcf_int8_missing, s);
839 }
else if (x <= INT8_MAX && x > bcf_int8_missing) {
845 kputsn((
char*)&z, 2, s);
849 kputsn((
char*)&z, 4, s);
853 static inline int32_t bcf_dec_int1(
const uint8_t *p,
int type, uint8_t **q)
856 *q = (uint8_t*)p + 1;
859 *q = (uint8_t*)p + 2;
862 *q = (uint8_t*)p + 4;
867 static inline int32_t bcf_dec_typed_int1(
const uint8_t *p, uint8_t **q)
869 return bcf_dec_int1(p + 1, *p&0xf, q);
872 static inline int32_t bcf_dec_size(
const uint8_t *p, uint8_t **q,
int *type)
876 *q = (uint8_t*)p + 1;
878 }
else return bcf_dec_typed_int1(p + 1, q);
bcf_hdr_t * bcf_hdr_dup(const bcf_hdr_t *hdr)
Definition: vcf.c:2387
bcf_hdr_t * vcf_hdr_read(htsFile *fp)
Definition: vcf.c:1195
void bcf_enc_vfloat(kstring_t *s, int n, float *a)
Definition: vcf.c:1410
int vcf_hdr_write(htsFile *fp, const bcf_hdr_t *h)
Definition: vcf.c:1353
int bcf_hdr_add_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec)
Definition: vcf.c:464
bcf_fmt_t * bcf_get_fmt(const bcf_hdr_t *hdr, bcf1_t *line, const char *key)
Definition: vcf.c:2979
int bcf_hdr_set(bcf_hdr_t *hdr, const char *fname)
Definition: vcf.c:1270
void bcf_hrec_set_val(bcf_hrec_t *hrec, int i, const char *str, int len, int is_quoted)
Definition: vcf.c:222
void bcf_hdr_destroy(bcf_hdr_t *h)
Definition: vcf.c:714
bcf_hdr_t * bcf_hdr_read(htsFile *fp)
Definition: vcf.c:736
int bcf_hdr_sync(bcf_hdr_t *h)
Definition: vcf.c:110
int32_t i
Definition: vcf.h:148
int is_file(char *fn)
Definition: files.c:57
int32_t rid
Definition: vcf.h:195
variant_t * var
Definition: vcf.h:172
uint8_t * p
Definition: vcf.h:139
bcf_hrec_t ** hrec
Definition: vcf.h:105
float qual
Definition: vcf.h:198
int max_unpack
Definition: vcf.h:203
int * flt
Definition: vcf.h:167
#define BCF_BT_INT32
Definition: vcf.h:122
uint8_t * bcf_fmt_sized_array(kstring_t *s, uint8_t *ptr)
Definition: vcf.c:1461
int bcf_hdr_parse(bcf_hdr_t *hdr, char *htxt)
Definition: vcf.c:562
bcf_fmt_t * fmt
Definition: vcf.h:171
int bcf_readrec(BGZF *fp, void *null, void *v, int *tid, int *beg, int *end)
Definition: vcf.c:927
#define bcf_int8_vector_end
Definition: vcf.h:750
#define bcf_int32_missing
Definition: vcf.h:756
int bcf_get_variant_type(bcf1_t *rec, int ith_allele)
Definition: vcf.c:2626
bcf_hrec_t * bcf_hdr_get_hrec(const bcf_hdr_t *hdr, int type, const char *key, const char *value, const char *str_class)
Definition: vcf.c:508
int bcf_hdr_id2int(const bcf_hdr_t *hdr, int type, const char *id)
Definition: vcf.c:2145
int bcf_write(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v)
Definition: vcf.c:1149
int bcf_update_filter(const bcf_hdr_t *hdr, bcf1_t *line, int *flt_ids, int n)
Definition: vcf.c:2844
int id
Definition: vcf.h:137
int bcf_update_format_string(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const char **values, int n)
Definition: vcf.c:2715
int bcf_get_info_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, void **dst, int *ndst, int type)
Definition: vcf.c:3016
int32_t pos
Definition: vcf.h:196
int vcf_parse(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v)
Definition: vcf.c:1719
#define BRANCH(type_t, missing, vector_end)
int bcf_remove_filter(const bcf_hdr_t *hdr, bcf1_t *line, int flt_id, int pass)
Definition: vcf.c:2875
int bcf_unpack(bcf1_t *b, int which)
Definition: vcf.c:1945
void bcf_enc_vint(kstring_t *s, int n, int32_t *a, int wsize)
Definition: vcf.c:1371
#define BCF_BT_INT8
Definition: vcf.h:120
int vcf_read(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v)
Definition: vcf.c:1903
int bcf_get_format_string(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, char ***dst, int *ndst)
Definition: vcf.c:3093
uint8_t * keep_samples
Definition: vcf.h:109
char * bcf_hdr_fmt_text(const bcf_hdr_t *hdr, int is_bcf, int *len)
Definition: vcf.c:1313
void bcf_hrec_destroy(bcf_hrec_t *hrec)
Definition: vcf.c:146
void bcf_enc_vchar(kstring_t *s, int l, const char *a)
Definition: vcf.c:1416
uint32_t bcf_float_vector_end
float f
Definition: vcf.h:149
#define bcf_int16_vector_end
Definition: vcf.h:751
void bcf_empty(bcf1_t *v)
const bcf_idinfo_t * val
Definition: vcf.h:97
int bcf_update_format(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const void *values, int n, int type)
Definition: vcf.c:2741
int id
Definition: vcf.h:92
bcf_hdr_t * bcf_hdr_subset(const bcf_hdr_t *h0, int n, char *const *samples, int *imap)
Definition: vcf.c:2396
const char * bcf_hdr_get_version(const bcf_hdr_t *hdr)
Definition: vcf.c:668
void bcf_clear(bcf1_t *v)
Definition: vcf.c:799
int key
Definition: vcf.h:145
uint32_t vptr_len
Definition: vcf.h:152
void bcf_hrec_format(const bcf_hrec_t *hrec, kstring_t *str)
Definition: vcf.c:1309
int bcf_subset_format(const bcf_hdr_t *hdr, bcf1_t *rec)
Definition: vcf.c:878
int nsamples_ori
Definition: vcf.h:108
int nhrec
Definition: vcf.h:106
kstring_t mem
Definition: vcf.h:110
bcf_info_t * bcf_get_info_id(bcf1_t *line, const int id)
Definition: vcf.c:3004
int bcf_get_variant_types(bcf1_t *rec)
Definition: vcf.c:2621
int bcf_hdr_combine(bcf_hdr_t *dst, const bcf_hdr_t *src)
Definition: vcf.c:2213
int bcf_update_info(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const void *values, int n, int type)
Definition: vcf.c:2632
bcf_info_t * bcf_get_info(const bcf_hdr_t *hdr, bcf1_t *line, const char *key)
Definition: vcf.c:2986
#define bcf_int32_vector_end
Definition: vcf.h:752
int bcf_hdr_printf(bcf_hdr_t *h, const char *format,...)
Definition: vcf.c:645
bcf_hrec_t * bcf_hrec_dup(bcf_hrec_t *hrec)
Definition: vcf.c:162
int m_info
Definition: vcf.h:165
uint32_t p_len
Definition: vcf.h:140
void hrec_add_idx(bcf_hrec_t *hrec, int idx)
Definition: vcf.c:242
int bcf_is_snp(bcf1_t *v)
Definition: vcf.c:2527
int nkeys
Definition: vcf.h:85
int vcf_write(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v)
Definition: vcf.c:2129
uint8_t * vptr
Definition: vcf.h:151
int bcf_hdr_append(bcf_hdr_t *h, const char *line)
Definition: vcf.c:589
bcf_hdr_t * bcf_hdr_init(const char *mode)
Definition: vcf.c:698
bcf_info_t * info
Definition: vcf.h:170
bcf1_t * bcf_dup(bcf1_t *src)
Definition: vcf.c:1143
int shared_dirty
Definition: vcf.h:174
#define str(x)
Definition: sam.c:66
char * value
Definition: vcf.h:84
int errcode
Definition: vcf.h:206
int bcf_has_filter(const bcf_hdr_t *hdr, bcf1_t *line, char *filter)
Definition: vcf.c:2889
int bcf_update_alleles_str(const bcf_hdr_t *hdr, bcf1_t *line, const char *alleles_string)
Definition: vcf.c:2949
int bcf_translate(const bcf_hdr_t *dst_hdr, bcf_hdr_t *src_hdr, bcf1_t *src_line)
Definition: vcf.c:2271
const char * key
Definition: vcf.h:96
bcf_hrec_t * bcf_hdr_parse_line(const bcf_hdr_t *h, const char *line, int *len)
Definition: vcf.c:268
int bcf_index_build(const char *fn, int min_shift)
Definition: vcf.c:2195
int type
Definition: vcf.h:146
int bcf_update_alleles(const bcf_hdr_t *hdr, bcf1_t *line, const char **alleles, int nals)
Definition: vcf.c:2922
char * key
Definition: vcf.h:83
int type
Definition: vcf.h:133
char ** allele
Definition: vcf.h:169
int n_flt
Definition: vcf.h:166
int type
Definition: vcf.h:138
int bcf_hdr_add_sample(bcf_hdr_t *hdr, const char *sample)
Definition: vcf.c:58
#define BCF_DT_CTG
Definition: vcf.h:77
char * id
Definition: vcf.h:168
int bcf_hdr_write(htsFile *fp, bcf_hdr_t *h)
Definition: vcf.c:769
#define BCF_BT_INT16
Definition: vcf.h:121
int32_t rlen
Definition: vcf.h:197
#define bcf_int16_missing
Definition: vcf.h:755
void bcf_fmt_array(kstring_t *s, int n, int type, void *data)
Definition: vcf.c:1422
int var_type
Definition: vcf.h:173
int bcf_add_filter(const bcf_hdr_t *hdr, bcf1_t *line, int flt_id)
Definition: vcf.c:2857
int bcf_hrec_find_key(bcf_hrec_t *hrec, const char *key)
Definition: vcf.c:253
bcf1_t * bcf_copy(bcf1_t *dst, bcf1_t *src)
Definition: vcf.c:1121
const char ** bcf_hdr_seqnames(const bcf_hdr_t *h, int *nseqs)
Definition: vcf.c:1333
kstring_t shared
Definition: vcf.h:201
int bcf_get_format_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, void **dst, int *ndst, int type)
Definition: vcf.c:3131
int bcf_update_id(const bcf_hdr_t *hdr, bcf1_t *line, const char *id)
Definition: vcf.c:2966
int indiv_dirty
Definition: vcf.h:175
bcf_dec_t d
Definition: vcf.h:202
int unpacked
Definition: vcf.h:204
char ** vals
Definition: vcf.h:86
#define bcf_int8_missing
Definition: vcf.h:754
int vcf_format(const bcf_hdr_t *h, const bcf1_t *v, kstring_t *s)
Definition: vcf.c:2014
bcf_idpair_t * id[3]
Definition: vcf.h:102
htsFile vcfFile
Definition: vcf.h:273
int bcf_read(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v)
Definition: vcf.c:919
void bcf_hdr_set_version(bcf_hdr_t *hdr, const char *version)
Definition: vcf.c:679
int bcf_subset(const bcf_hdr_t *h, bcf1_t *v, int n, int *imap)
Definition: vcf.c:2499
uint32_t bcf_float_missing
bcf_fmt_t * bcf_get_fmt_id(bcf1_t *line, const int id)
Definition: vcf.c:2993
void bcf_hdr_remove(bcf_hdr_t *h, int type, const char *key)
Definition: vcf.c:598
void bcf_hrec_add_key(bcf_hrec_t *hrec, const char *str, int len)
Definition: vcf.c:210
void bcf_destroy(bcf1_t *v)
int type
Definition: vcf.h:82
int bcf_hdr_set_samples(bcf_hdr_t *hdr, const char *samples, int is_file)
Definition: vcf.c:2433
char ** samples
Definition: vcf.h:104