Gamgee
You miserable little maggot. I'll stove your head in!
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
hts.h
Go to the documentation of this file.
1 /* hts.h -- format-neutral I/O, indexing, and iterator API functions.
2 
3  Copyright (C) 2012-2014 Genome Research Ltd.
4  Copyright (C) 2012 Broad Institute.
5 
6  Author: Heng Li <lh3@sanger.ac.uk>
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this software and associated documentation files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice shall be included in
16 all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 DEALINGS IN THE SOFTWARE. */
25 
26 #ifndef HTSLIB_HTS_H
27 #define HTSLIB_HTS_H
28 
29 #include <stddef.h>
30 #include <stdint.h>
31 
32 #ifndef HTS_BGZF_TYPEDEF
33 typedef struct BGZF BGZF;
34 #define HTS_BGZF_TYPEDEF
35 #endif
36 struct cram_fd;
37 struct hFILE;
38 
39 #ifndef KSTRING_T
40 #define KSTRING_T kstring_t
41 typedef struct __kstring_t {
42  size_t l, m;
43  char *s;
44 } kstring_t;
45 #endif
46 
47 #ifndef kroundup32
48 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
49 #endif
50 
58 #define hts_expand(type_t, n, m, ptr) if ((n) > (m)) { \
59  (m) = (n); kroundup32(m); \
60  (ptr) = (type_t*)realloc((ptr), (m) * sizeof(type_t)); \
61  }
62 #define hts_expand0(type_t, n, m, ptr) if ((n) > (m)) { \
63  int t = (m); (m) = (n); kroundup32(m); \
64  (ptr) = (type_t*)realloc((ptr), (m) * sizeof(type_t)); \
65  memset(((type_t*)ptr)+t,0,sizeof(type_t)*((m)-t)); \
66  }
67 
68 /************
69  * File I/O *
70  ************/
71 
72 // Add new entries only at the end (but before the *_maximum entry)
73 // of these enums, as their numbering is part of the htslib ABI.
74 
77  sequence_data, // Sequence data -- SAM, BAM, CRAM, etc
78  variant_data, // Variant calling data -- VCF, BCF, etc
79  index_file, // Index file associated with some data file
80  region_list, // Coordinate intervals or regions -- BED, etc
82 };
83 
89 };
90 
94 };
95 
96 typedef struct htsFormat {
100 } htsFormat;
101 
102 // Maintainers note htsFile cannot be an opaque structure because some of its
103 // fields are part of libhts.so's ABI (hence these fields must not be moved):
104 // - fp is used in the public sam_itr_next()/etc macros
105 // - is_bin is used directly in samtools <= 1.1 and bcftools <= 1.1
106 // - is_write and is_cram are used directly in samtools <= 1.1
107 // - fp is used directly in samtools (up to and including current develop)
108 // - line is used directly in bcftools (up to and including current develop)
109 typedef struct {
110  uint32_t is_bin:1, is_write:1, is_be:1, is_cram:1, dummy:28;
111  int64_t lineno;
113  char *fn, *fn_aux;
114  union {
116  struct cram_fd *cram;
117  struct hFILE *hfile;
118  void *voidp;
119  } fp;
121 } htsFile;
122 
123 // REQUIRED_FIELDS
125  SAM_QNAME = 0x00000001,
126  SAM_FLAG = 0x00000002,
127  SAM_RNAME = 0x00000004,
128  SAM_POS = 0x00000008,
129  SAM_MAPQ = 0x00000010,
130  SAM_CIGAR = 0x00000020,
131  SAM_RNEXT = 0x00000040,
132  SAM_PNEXT = 0x00000080,
133  SAM_TLEN = 0x00000100,
134  SAM_SEQ = 0x00000200,
135  SAM_QUAL = 0x00000400,
136  SAM_AUX = 0x00000800,
137  SAM_RGAUX = 0x00001000,
138 };
139 
160 };
161 
162 /**********************
163  * Exported functions *
164  **********************/
165 
166 extern int hts_verbose;
167 
169 extern const unsigned char seq_nt16_table[256];
170 
172 extern const char seq_nt16_str[];
173 
174 #ifdef __cplusplus
175 extern "C" {
176 #endif
177 
183 const char *hts_version(void);
184 
191 int hts_detect_format(struct hFILE *fp, htsFormat *fmt);
192 
196 const char *hts_format_description(const htsFormat *format);
197 
222 htsFile *hts_open(const char *fn, const char *mode);
223 
229 htsFile *hts_hopen(struct hFILE *fp, const char *fn, const char *mode);
230 
236 int hts_close(htsFile *fp);
237 
243 const htsFormat *hts_get_format(htsFile *fp);
244 
252 int hts_set_opt(htsFile *fp, enum cram_option opt, ...);
253 
254 int hts_getline(htsFile *fp, int delimiter, kstring_t *str);
255 char **hts_readlines(const char *fn, int *_n);
264 char **hts_readlist(const char *fn, int is_file, int *_n);
265 
273 int hts_set_threads(htsFile *fp, int n);
274 
282 int hts_set_fai_filename(htsFile *fp, const char *fn_aux);
283 
284 #ifdef __cplusplus
285 }
286 #endif
287 
288 /************
289  * Indexing *
290  ************/
291 
302 #define HTS_IDX_NOCOOR (-2)
303 #define HTS_IDX_START (-3)
304 #define HTS_IDX_REST (-4)
305 #define HTS_IDX_NONE (-5)
306 
307 #define HTS_FMT_CSI 0
308 #define HTS_FMT_BAI 1
309 #define HTS_FMT_TBI 2
310 #define HTS_FMT_CRAI 3
311 
312 struct __hts_idx_t;
313 typedef struct __hts_idx_t hts_idx_t;
314 
315 typedef struct {
316  uint64_t u, v;
317 } hts_pair64_t;
318 
319 typedef int hts_readrec_func(BGZF *fp, void *data, void *r, int *tid, int *beg, int *end);
320 
321 typedef struct {
322  uint32_t read_rest:1, finished:1, dummy:29;
323  int tid, beg, end, n_off, i;
324  uint64_t curr_off;
327  struct {
328  int n, m;
329  int *a;
330  } bins;
331 } hts_itr_t;
332 
333 #ifdef __cplusplus
334 extern "C" {
335 #endif
336 
337  #define hts_bin_first(l) (((1<<(((l)<<1) + (l))) - 1) / 7)
338  #define hts_bin_parent(l) (((l) - 1) >> 3)
339 
340  hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls);
341  void hts_idx_destroy(hts_idx_t *idx);
342  int hts_idx_push(hts_idx_t *idx, int tid, int beg, int end, uint64_t offset, int is_mapped);
343  void hts_idx_finish(hts_idx_t *idx, uint64_t final_offset);
344 
345  void hts_idx_save(const hts_idx_t *idx, const char *fn, int fmt);
346  hts_idx_t *hts_idx_load(const char *fn, int fmt);
347 
348  uint8_t *hts_idx_get_meta(hts_idx_t *idx, int *l_meta);
349  void hts_idx_set_meta(hts_idx_t *idx, int l_meta, uint8_t *meta, int is_copy);
350 
351  int hts_idx_get_stat(const hts_idx_t* idx, int tid, uint64_t* mapped, uint64_t* unmapped);
352  uint64_t hts_idx_get_n_no_coor(const hts_idx_t* idx);
353 
354  const char *hts_parse_reg(const char *s, int *beg, int *end);
355  hts_itr_t *hts_itr_query(const hts_idx_t *idx, int tid, int beg, int end, hts_readrec_func *readrec);
356  void hts_itr_destroy(hts_itr_t *iter);
357 
358  typedef int (*hts_name2id_f)(void*, const char*);
359  typedef const char *(*hts_id2name_f)(void*, int);
360  typedef hts_itr_t *hts_itr_query_func(const hts_idx_t *idx, int tid, int beg, int end, hts_readrec_func *readrec);
361 
362  hts_itr_t *hts_itr_querys(const hts_idx_t *idx, const char *reg, hts_name2id_f getid, void *hdr, hts_itr_query_func *itr_query, hts_readrec_func *readrec);
363  int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data);
364  const char **hts_idx_seqnames(const hts_idx_t *idx, int *n, hts_id2name_f getid, void *hdr); // free only the array, not the values
365 
366 #ifdef __cplusplus
367 }
368 #endif
369 
370 static inline int hts_reg2bin(int64_t beg, int64_t end, int min_shift, int n_lvls)
371 {
372  int l, s = min_shift, t = ((1<<((n_lvls<<1) + n_lvls)) - 1) / 7;
373  for (--end, l = n_lvls; l > 0; --l, s += 3, t -= 1<<((l<<1)+l))
374  if (beg>>s == end>>s) return t + (beg>>s);
375  return 0;
376 }
377 
378 static inline int hts_bin_bot(int bin, int n_lvls)
379 {
380  int l, b;
381  for (l = 0, b = bin; b; ++l, b = hts_bin_parent(b)); // compute the level of bin
382  return (bin - hts_bin_first(l)) << (n_lvls - l) * 3;
383 }
384 
385 /**************
386  * Endianness *
387  **************/
388 
389 static inline int ed_is_big(void)
390 {
391  long one= 1;
392  return !(*((char *)(&one)));
393 }
394 static inline uint16_t ed_swap_2(uint16_t v)
395 {
396  return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8));
397 }
398 static inline void *ed_swap_2p(void *x)
399 {
400  *(uint16_t*)x = ed_swap_2(*(uint16_t*)x);
401  return x;
402 }
403 static inline uint32_t ed_swap_4(uint32_t v)
404 {
405  v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
406  return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
407 }
408 static inline void *ed_swap_4p(void *x)
409 {
410  *(uint32_t*)x = ed_swap_4(*(uint32_t*)x);
411  return x;
412 }
413 static inline uint64_t ed_swap_8(uint64_t v)
414 {
415  v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
416  v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
417  return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
418 }
419 static inline void *ed_swap_8p(void *x)
420 {
421  *(uint64_t*)x = ed_swap_8(*(uint64_t*)x);
422  return x;
423 }
424 
425 #endif
#define hts_bin_first(l)
Definition: hts.h:337
Definition: hts.h:92
uint8_t * hts_idx_get_meta(hts_idx_t *idx, int *l_meta)
Definition: hts.c:1161
Definition: hts.h:127
Definition: hts.h:130
Definition: hts.h:151
Definition: hts.h:109
Definition: hts.h:315
Definition: hts.h:86
char * fn_aux
Definition: hts.h:113
Definition: hts.h:137
int is_file(char *fn)
Definition: files.c:57
void hts_idx_finish(hts_idx_t *idx, uint64_t final_offset)
Definition: hts.c:803
char ** hts_readlist(const char *fn, int is_file, int *_n)
Definition: hts.c:507
enum htsCompression compression
Definition: hts.h:99
Definition: hfile.h:42
int * a
Definition: hts.h:329
const char seq_nt16_str[]
Definition: hts.c:79
char ** hts_readlines(const char *fn, int *_n)
Definition: hts.c:561
Definition: hts.h:86
Definition: hts.h:87
int hts_close(htsFile *fp)
Definition: hts.c:367
int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data)
Definition: hts.c:1400
Definition: hts.h:87
const char * hts_parse_reg(const char *s, int *beg, int *end)
Definition: hts.c:1351
Definition: hts.h:87
struct cram_fd * cram
Definition: hts.h:116
struct htsFormat htsFormat
struct hFILE * hfile
Definition: hts.h:117
void * voidp
Definition: hts.h:118
Definition: hts.h:156
Definition: hts.h:142
Definition: hts.h:152
void hts_idx_destroy(hts_idx_t *idx)
Definition: hts.c:878
const char * hts_format_description(const htsFormat *format)
Definition: hts.c:235
Definition: hts.h:132
Definition: hts.h:131
Definition: hts.h:135
Definition: hts.h:149
Definition: hts.h:93
Definition: hts.h:128
Definition: hts.h:143
size_t l
Definition: bgzf.h:70
int hts_verbose
Definition: hts.c:52
int hts_set_opt(htsFile *fp, enum cram_option opt,...)
Definition: hts.c:432
enum @17 mode
struct __kstring_t kstring_t
hts_pair64_t * off
Definition: hts.h:325
Definition: hts.h:92
Definition: hts.h:150
htsCompression
Definition: hts.h:91
Definition: hts.h:87
htsFormat format
Definition: hts.h:120
int hts_set_threads(htsFile *fp, int n)
Definition: hts.c:446
Definition: hts.h:141
void hts_idx_set_meta(hts_idx_t *idx, int l_meta, uint8_t *meta, int is_copy)
Definition: hts.c:1151
Definition: hts.h:78
const char * hts_version(void)
Definition: hts.c:54
Definition: hts.h:155
Definition: hts.h:77
BGZF * bgzf
Definition: hts.h:115
kstring_t line
Definition: hts.h:112
Definition: hts.h:92
int hts_set_fai_filename(htsFile *fp, const char *fn_aux)
Definition: hts.c:456
void hts_idx_save(const hts_idx_t *idx, const char *fn, int fmt)
Definition: hts.c:984
const char *(* hts_id2name_f)(void *, int)
Definition: hts.h:359
Definition: hts.h:158
Definition: hts.h:133
Definition: hts.h:87
Definition: hts.h:147
uint64_t hts_idx_get_n_no_coor(const hts_idx_t *idx)
Definition: hts.c:1206
Definition: hts.h:87
Definition: bgzf.h:69
int hts_readrec_func(BGZF *fp, void *data, void *r, int *tid, int *beg, int *end)
Definition: hts.h:319
Definition: hts.h:87
const char ** hts_idx_seqnames(const hts_idx_t *idx, int *n, hts_id2name_f getid, void *hdr)
Definition: hts.c:1167
Definition: hts.c:640
Definition: hts.h:321
Definition: hts.h:92
void hts_itr_destroy(hts_itr_t *iter)
Definition: hts.c:1346
Definition: hts.h:85
htsFile * hts_hopen(struct hFILE *fp, const char *fn, const char *mode)
Definition: hts.c:273
Definition: hts.h:154
Definition: hts.h:96
size_t m
Definition: bgzf.h:70
int(* hts_name2id_f)(void *, const char *)
Definition: hts.h:358
Definition: hts.h:153
int hts_idx_push(hts_idx_t *idx, int tid, int beg, int end, uint64_t offset, int is_mapped)
Definition: hts.c:819
Definition: hts.h:87
Definition: hts.h:145
Definition: hts.h:157
Definition: hts.h:134
#define str(x)
Definition: sam.c:66
hts_itr_t * hts_itr_query(const hts_idx_t *idx, int tid, int beg, int end, hts_readrec_func *readrec)
Definition: hts.c:1233
Definition: hts.h:79
int n
Definition: hts.h:328
int hts_idx_get_stat(const hts_idx_t *idx, int tid, uint64_t *mapped, uint64_t *unmapped)
Definition: hts.c:1187
enum htsExactFormat format
Definition: hts.h:98
hts_readrec_func * readrec
Definition: hts.h:326
Definition: hts.h:144
Definition: hts.h:87
Definition: hts.h:87
Definition: hts.h:76
char * s
Definition: bgzf.h:71
Definition: hts.h:126
hts_itr_t * hts_itr_query_func(const hts_idx_t *idx, int tid, int beg, int end, hts_readrec_func *readrec)
Definition: hts.h:360
htsExactFormat
Definition: hts.h:84
uint64_t curr_off
Definition: hts.h:324
Definition: hts.h:146
cram_option
Definition: hts.h:140
int tid
Definition: hts.h:323
Definition: hts.h:136
const htsFormat * hts_get_format(htsFile *fp)
Definition: hts.c:427
htsFile * hts_open(const char *fn, const char *mode)
Definition: hts.c:252
int hts_getline(htsFile *fp, int delimiter, kstring_t *str)
Definition: hts.c:499
hts_idx_t * hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls)
Definition: hts.c:697
#define hts_bin_parent(l)
Definition: hts.h:338
hts_itr_t * hts_itr_querys(const hts_idx_t *idx, const char *reg, hts_name2id_f getid, void *hdr, hts_itr_query_func *itr_query, hts_readrec_func *readrec)
Definition: hts.c:1382
Definition: hts.h:88
Definition: hts.h:87
Definition: hts.h:148
Definition: hts.h:129
Definition: hts.h:87
Definition: hts.h:81
Definition: hts.h:159
const unsigned char seq_nt16_table[256]
Definition: hts.c:59
htsFormatCategory
Definition: hts.h:75
int64_t lineno
Definition: hts.h:111
sam_fields
Definition: hts.h:124
Definition: cram_structs.h:655
enum htsFormatCategory category
Definition: hts.h:97
Definition: hts.h:125
uint64_t v
Definition: hts.h:316
Definition: bgzf.h:49
hts_idx_t * hts_idx_load(const char *fn, int fmt)
Definition: hts.c:1500
Definition: hts.h:80
int hts_detect_format(struct hFILE *fp, htsFormat *fmt)
Definition: hts.c:148