Gamgee
You miserable little maggot. I'll stove your head in!
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sam.h
Go to the documentation of this file.
1 /* sam.h -- SAM and BAM file I/O and manipulation.
2 
3  Copyright (C) 2008, 2009, 2013-2014 Genome Research Ltd.
4  Copyright (C) 2010, 2012, 2013 Broad Institute.
5 
6  Author: Heng Li <lh3@sanger.ac.uk>
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this software and associated documentation files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice shall be included in
16 all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 DEALINGS IN THE SOFTWARE. */
25 
26 #ifndef HTSLIB_SAM_H
27 #define HTSLIB_SAM_H
28 
29 #include <stdint.h>
30 #include "hts.h"
31 
32 /**********************
33  *** SAM/BAM header ***
34  **********************/
35 
46 typedef struct {
47  int32_t n_targets, ignore_sam_err;
48  uint32_t l_text;
49  uint32_t *target_len;
50  int8_t *cigar_tab;
51  char **target_name;
52  char *text;
53  void *sdict;
54 } bam_hdr_t;
55 
56 /****************************
57  *** CIGAR related macros ***
58  ****************************/
59 
60 #define BAM_CMATCH 0
61 #define BAM_CINS 1
62 #define BAM_CDEL 2
63 #define BAM_CREF_SKIP 3
64 #define BAM_CSOFT_CLIP 4
65 #define BAM_CHARD_CLIP 5
66 #define BAM_CPAD 6
67 #define BAM_CEQUAL 7
68 #define BAM_CDIFF 8
69 #define BAM_CBACK 9
70 
71 #define BAM_CIGAR_STR "MIDNSHP=XB"
72 #define BAM_CIGAR_SHIFT 4
73 #define BAM_CIGAR_MASK 0xf
74 #define BAM_CIGAR_TYPE 0x3C1A7
75 
76 #define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
77 #define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
78 #define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
79 #define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
80 
81 /* bam_cigar_type returns a bit flag with:
82  * bit 1 set if the cigar operation consumes the query
83  * bit 2 set if the cigar operation consumes the reference
84  *
85  * For reference, the unobfuscated truth table for this function is:
86  * BAM_CIGAR_TYPE QUERY REFERENCE
87  * --------------------------------
88  * BAM_CMATCH 1 1
89  * BAM_CINS 1 0
90  * BAM_CDEL 0 1
91  * BAM_CREF_SKIP 0 1
92  * BAM_CSOFT_CLIP 1 0
93  * BAM_CHARD_CLIP 0 0
94  * BAM_CPAD 0 0
95  * BAM_CEQUAL 1 1
96  * BAM_CDIFF 1 1
97  * BAM_CBACK 0 0
98  * --------------------------------
99  */
100 #define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3) // bit 1: consume query; bit 2: consume reference
101 
103 #define BAM_FPAIRED 1
104 
105 #define BAM_FPROPER_PAIR 2
106 
107 #define BAM_FUNMAP 4
108 
109 #define BAM_FMUNMAP 8
110 
111 #define BAM_FREVERSE 16
112 
113 #define BAM_FMREVERSE 32
114 
115 #define BAM_FREAD1 64
116 
117 #define BAM_FREAD2 128
118 
119 #define BAM_FSECONDARY 256
120 
121 #define BAM_FQCFAIL 512
122 
123 #define BAM_FDUP 1024
124 
125 #define BAM_FSUPPLEMENTARY 2048
126 
127 /*************************
128  *** Alignment records ***
129  *************************/
130 
144 typedef struct {
145  int32_t tid;
146  int32_t pos;
147  uint32_t bin:16, qual:8, l_qname:8;
148  uint32_t flag:16, n_cigar:16;
149  int32_t l_qseq;
150  int32_t mtid;
151  int32_t mpos;
152  int32_t isize;
153 } bam1_core_t;
154 
170 typedef struct {
172  int l_data, m_data;
173  uint8_t *data;
174 #ifndef BAM_NO_ID
175  uint64_t id;
176 #endif
177 } bam1_t;
178 
184 #define bam_is_rev(b) (((b)->core.flag&BAM_FREVERSE) != 0)
185 
190 #define bam_is_mrev(b) (((b)->core.flag&BAM_FMREVERSE) != 0)
191 
196 #define bam_get_qname(b) ((char*)(b)->data)
197 
206 #define bam_get_cigar(b) ((uint32_t*)((b)->data + (b)->core.l_qname))
207 
217 #define bam_get_seq(b) ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname)
218 
223 #define bam_get_qual(b) ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1))
224 
229 #define bam_get_aux(b) ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1) + (b)->core.l_qseq)
230 
235 #define bam_get_l_aux(b) ((b)->l_data - ((b)->core.n_cigar<<2) - (b)->core.l_qname - (b)->core.l_qseq - (((b)->core.l_qseq + 1)>>1))
236 
242 #define bam_seqi(s, i) ((s)[(i)>>1] >> ((~(i)&1)<<2) & 0xf)
243 
244 /**************************
245  *** Exported functions ***
246  **************************/
247 
248 #ifdef __cplusplus
249 extern "C" {
250 #endif
251 
252  /***************
253  *** BAM I/O ***
254  ***************/
255 
256  bam_hdr_t *bam_hdr_init(void);
258  int bam_hdr_write(BGZF *fp, const bam_hdr_t *h);
259  void bam_hdr_destroy(bam_hdr_t *h);
260  int bam_name2id(bam_hdr_t *h, const char *ref);
261  bam_hdr_t* bam_hdr_dup(const bam_hdr_t *h0);
262 
263  bam1_t *bam_init1(void);
264  void bam_destroy1(bam1_t *b);
265  int bam_read1(BGZF *fp, bam1_t *b);
266  int bam_write1(BGZF *fp, const bam1_t *b);
267  bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc);
268  bam1_t *bam_dup1(const bam1_t *bsrc);
269 
270  int bam_cigar2qlen(int n_cigar, const uint32_t *cigar);
271  int bam_cigar2rlen(int n_cigar, const uint32_t *cigar);
272 
284  int32_t bam_endpos(const bam1_t *b);
285 
286  int bam_str2flag(const char *str);
287  char *bam_flag2str(int flag);
289  /*************************
290  *** BAM/CRAM indexing ***
291  *************************/
292 
293  // These BAM iterator functions work only on BAM files. To work with either
294  // BAM or CRAM files use the sam_index_load() & sam_itr_*() functions.
295  #define bam_itr_destroy(iter) hts_itr_destroy(iter)
296  #define bam_itr_queryi(idx, tid, beg, end) sam_itr_queryi(idx, tid, beg, end)
297  #define bam_itr_querys(idx, hdr, region) sam_itr_querys(idx, hdr, region)
298  #define bam_itr_next(htsfp, itr, r) hts_itr_next((htsfp)->fp.bgzf, (itr), (r), 0)
299 
300  // Load .csi or .bai BAM index file.
301  #define bam_index_load(fn) hts_idx_load((fn), HTS_FMT_BAI)
302 
303  int bam_index_build(const char *fn, int min_shift);
304 
305  // Load BAM (.csi or .bai) or CRAM (.crai) index file.
306  hts_idx_t *sam_index_load(htsFile *fp, const char *fn);
307 
308  #define sam_itr_destroy(iter) hts_itr_destroy(iter)
309  hts_itr_t *sam_itr_queryi(const hts_idx_t *idx, int tid, int beg, int end);
310  hts_itr_t *sam_itr_querys(const hts_idx_t *idx, bam_hdr_t *hdr, const char *region);
311  #define sam_itr_next(htsfp, itr, r) hts_itr_next((htsfp)->fp.bgzf, (itr), (r), (htsfp))
312 
313  /***************
314  *** SAM I/O ***
315  ***************/
316 
317  #define sam_open(fn, mode) (hts_open((fn), (mode)))
318  #define sam_close(fp) hts_close(fp)
319 
320  int sam_open_mode(char *mode, const char *fn, const char *format);
321 
322  typedef htsFile samFile;
323  bam_hdr_t *sam_hdr_parse(int l_text, const char *text);
324  bam_hdr_t *sam_hdr_read(samFile *fp);
325  int sam_hdr_write(samFile *fp, const bam_hdr_t *h);
326 
327  int sam_parse1(kstring_t *s, bam_hdr_t *h, bam1_t *b);
328  int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str);
329  int sam_read1(samFile *fp, bam_hdr_t *h, bam1_t *b);
330  int sam_write1(samFile *fp, const bam_hdr_t *h, const bam1_t *b);
331 
332  /*************************************
333  *** Manipulating auxiliary fields ***
334  *************************************/
335 
336  uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]);
337  int32_t bam_aux2i(const uint8_t *s);
338  double bam_aux2f(const uint8_t *s);
339  char bam_aux2A(const uint8_t *s);
340  char *bam_aux2Z(const uint8_t *s);
341 
342  void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data);
343  int bam_aux_del(bam1_t *b, uint8_t *s);
344 
345 #ifdef __cplusplus
346 }
347 #endif
348 
349 /**************************
350  *** Pileup and Mpileup ***
351  **************************/
352 
353 #if !defined(BAM_NO_PILEUP)
354 
373 typedef struct {
375  int32_t qpos;
376  int indel, level;
377  uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, aux:28;
378 } bam_pileup1_t;
379 
380 typedef int (*bam_plp_auto_f)(void *data, bam1_t *b);
381 
382 struct __bam_plp_t;
383 typedef struct __bam_plp_t *bam_plp_t;
384 
385 struct __bam_mplp_t;
386 typedef struct __bam_mplp_t *bam_mplp_t;
387 
388 #ifdef __cplusplus
389 extern "C" {
390 #endif
391 
398  bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data);
400  int bam_plp_push(bam_plp_t iter, const bam1_t *b);
401  const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
402  const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
403  void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt);
404  void bam_plp_reset(bam_plp_t iter);
405 
406  bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data);
415  void bam_mplp_init_overlaps(bam_mplp_t iter);
416  void bam_mplp_destroy(bam_mplp_t iter);
417  void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt);
418  int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp);
419 
420 #ifdef __cplusplus
421 }
422 #endif
423 
424 #endif // ~!defined(BAM_NO_PILEUP)
425 
426 #endif
hts_itr_t * sam_itr_querys(const hts_idx_t *idx, bam_hdr_t *hdr, const char *region)
Definition: sam.c:579
uint32_t l_text
Definition: sam.h:48
char * bam_aux2Z(const uint8_t *s)
Definition: sam.c:1193
int32_t l_qseq
Definition: sam.h:149
Definition: hts.h:109
Definition: sam.h:373
bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data)
Definition: sam.c:1817
uint64_t id
Definition: sam.h:175
int bam_cigar2qlen(int n_cigar, const uint32_t *cigar)
Definition: sam.c:243
int32_t isize
Definition: sam.h:152
void * sdict
Definition: sam.h:53
void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data)
Definition: sam.c:1103
bam_hdr_t * bam_hdr_read(BGZF *fp)
Definition: sam.c:109
int n
Definition: sam.c:1810
int32_t mtid
Definition: sam.h:150
bam_plp_t * iter
Definition: sam.c:1812
int sam_read1(samFile *fp, bam_hdr_t *h, bam1_t *b)
Definition: sam.c:912
int bam_hdr_write(BGZF *fp, const bam_hdr_t *h)
Definition: sam.c:150
bam1_t * b
Definition: sam.h:374
Definition: sam.h:144
int32_t mpos
Definition: sam.h:151
struct __bam_mplp_t * bam_mplp_t
Definition: sam.h:386
char * text
Definition: sam.h:52
Definition: sam.c:1809
void bam_destroy1(bam1_t *b)
Definition: sam.c:213
int32_t bam_endpos(const bam1_t *b)
Definition: sam.c:261
void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt)
Definition: sam.c:1842
enum @17 mode
bam_hdr_t * sam_hdr_read(samFile *fp)
Definition: sam.c:633
const bam_pileup1_t * bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp)
Definition: sam.c:1761
int32_t pos
Definition: sam.h:146
int sam_parse1(kstring_t *s, bam_hdr_t *h, bam1_t *b)
Definition: sam.c:726
Definition: sam.h:170
void bam_hdr_destroy(bam_hdr_t *h)
Definition: sam.c:51
int32_t tid
Definition: sam.h:145
uint8_t * bam_aux_get(const bam1_t *b, const char tag[2])
Definition: sam.c:1138
bam1_t * bam_dup1(const bam1_t *bsrc)
Definition: sam.c:235
hts_itr_t * sam_itr_queryi(const hts_idx_t *idx, int tid, int beg, int end)
Definition: sam.c:562
Definition: bgzf.h:69
bam_hdr_t * bam_hdr_init(void)
int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp)
Definition: sam.c:1857
double bam_aux2f(const uint8_t *s)
Definition: sam.c:1176
int bam_cigar2rlen(int n_cigar, const uint32_t *cigar)
Definition: sam.c:252
hts_idx_t * sam_index_load(htsFile *fp, const char *fn)
Definition: sam.c:501
Definition: hts.c:640
Definition: hts.h:321
char ** target_name
Definition: sam.h:51
Definition: sam.h:46
int32_t qpos
Definition: sam.h:375
uint8_t * data
Definition: sam.h:173
int * n_plp
Definition: sam.c:1813
int m_data
Definition: sam.h:172
void bam_mplp_init_overlaps(bam_mplp_t iter)
Definition: sam.c:1835
int bam_write1(BGZF *fp, const bam1_t *b)
Definition: sam.c:360
htsFile samFile
Definition: sam.h:322
#define str(x)
Definition: sam.c:66
bam_hdr_t * bam_hdr_dup(const bam_hdr_t *h0)
Definition: sam.c:66
const bam_pileup1_t * bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp)
Definition: sam.c:1676
void bam_plp_destroy(bam_plp_t iter)
Definition: sam.c:1459
int32_t n_targets
Definition: sam.h:47
char * bam_flag2str(int flag)
Definition: sam.c:1248
struct __bam_plp_t * bam_plp_t
Definition: sam.h:383
int bam_str2flag(const char *str)
Definition: sam.c:1219
uint32_t * target_len
Definition: sam.h:49
bam1_t * bam_init1(void)
Definition: sam.c:208
void bam_plp_reset(bam_plp_t iter)
Definition: sam.c:1785
bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data)
Definition: sam.c:1437
int sam_write1(samFile *fp, const bam_hdr_t *h, const bam1_t *b)
Definition: sam.c:1071
bam1_t * bam_copy1(bam1_t *bdst, const bam1_t *bsrc)
Definition: sam.c:219
int bam_aux_del(bam1_t *b, uint8_t *s)
Definition: sam.c:1152
int32_t bam_aux2i(const uint8_t *s)
Definition: sam.c:1164
char bam_aux2A(const uint8_t *s)
Definition: sam.c:1185
int bam_name2id(bam_hdr_t *h, const char *ref)
Definition: sam.c:187
int bam_read1(BGZF *fp, bam1_t *b)
Definition: sam.c:324
void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt)
Definition: sam.c:1800
int sam_hdr_write(samFile *fp, const bam_hdr_t *h)
Definition: sam.c:675
int sam_open_mode(char *mode, const char *fn, const char *format)
Definition: sam.c:1201
int(* bam_plp_auto_f)(void *data, bam1_t *b)
Definition: sam.h:380
int bam_index_build(const char *fn, int min_shift)
Definition: sam.c:428
int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str)
Definition: sam.c:950
void bam_mplp_destroy(bam_mplp_t iter)
Definition: sam.c:1849
const bam_pileup1_t ** plp
Definition: sam.c:1814
int bam_plp_push(bam_plp_t iter, const bam1_t *b)
Definition: sam.c:1722
Definition: bgzf.h:49
bam1_core_t core
Definition: sam.h:171
int level
Definition: sam.h:376
bam_hdr_t * sam_hdr_parse(int l_text, const char *text)
Definition: sam.c:595
int8_t * cigar_tab
Definition: sam.h:50