Gamgee
You miserable little maggot. I'll stove your head in!
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Classes | Macros | Typedefs | Functions
sam.h File Reference
#include <stdint.h>
#include "hts.h"

Go to the source code of this file.

Classes

struct  bam_hdr_t
 
struct  bam1_core_t
 
struct  bam1_t
 
struct  bam_pileup1_t
 

Macros

#define BAM_CMATCH   0
 
#define BAM_CINS   1
 
#define BAM_CDEL   2
 
#define BAM_CREF_SKIP   3
 
#define BAM_CSOFT_CLIP   4
 
#define BAM_CHARD_CLIP   5
 
#define BAM_CPAD   6
 
#define BAM_CEQUAL   7
 
#define BAM_CDIFF   8
 
#define BAM_CBACK   9
 
#define BAM_CIGAR_STR   "MIDNSHP=XB"
 
#define BAM_CIGAR_SHIFT   4
 
#define BAM_CIGAR_MASK   0xf
 
#define BAM_CIGAR_TYPE   0x3C1A7
 
#define bam_cigar_op(c)   ((c)&BAM_CIGAR_MASK)
 
#define bam_cigar_oplen(c)   ((c)>>BAM_CIGAR_SHIFT)
 
#define bam_cigar_opchr(c)   (BAM_CIGAR_STR[bam_cigar_op(c)])
 
#define bam_cigar_gen(l, o)   ((l)<<BAM_CIGAR_SHIFT|(o))
 
#define bam_cigar_type(o)   (BAM_CIGAR_TYPE>>((o)<<1)&3)
 
#define BAM_FPAIRED   1
 
#define BAM_FPROPER_PAIR   2
 
#define BAM_FUNMAP   4
 
#define BAM_FMUNMAP   8
 
#define BAM_FREVERSE   16
 
#define BAM_FMREVERSE   32
 
#define BAM_FREAD1   64
 
#define BAM_FREAD2   128
 
#define BAM_FSECONDARY   256
 
#define BAM_FQCFAIL   512
 
#define BAM_FDUP   1024
 
#define BAM_FSUPPLEMENTARY   2048
 
#define bam_is_rev(b)   (((b)->core.flag&BAM_FREVERSE) != 0)
 
#define bam_is_mrev(b)   (((b)->core.flag&BAM_FMREVERSE) != 0)
 
#define bam_get_qname(b)   ((char*)(b)->data)
 
#define bam_get_cigar(b)   ((uint32_t*)((b)->data + (b)->core.l_qname))
 
#define bam_get_seq(b)   ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname)
 
#define bam_get_qual(b)   ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1))
 
#define bam_get_aux(b)   ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1) + (b)->core.l_qseq)
 
#define bam_get_l_aux(b)   ((b)->l_data - ((b)->core.n_cigar<<2) - (b)->core.l_qname - (b)->core.l_qseq - (((b)->core.l_qseq + 1)>>1))
 
#define bam_seqi(s, i)   ((s)[(i)>>1] >> ((~(i)&1)<<2) & 0xf)
 
#define bam_itr_destroy(iter)   hts_itr_destroy(iter)
 
#define bam_itr_queryi(idx, tid, beg, end)   sam_itr_queryi(idx, tid, beg, end)
 
#define bam_itr_querys(idx, hdr, region)   sam_itr_querys(idx, hdr, region)
 
#define bam_itr_next(htsfp, itr, r)   hts_itr_next((htsfp)->fp.bgzf, (itr), (r), 0)
 
#define bam_index_load(fn)   hts_idx_load((fn), HTS_FMT_BAI)
 
#define sam_itr_destroy(iter)   hts_itr_destroy(iter)
 
#define sam_itr_next(htsfp, itr, r)   hts_itr_next((htsfp)->fp.bgzf, (itr), (r), (htsfp))
 
#define sam_open(fn, mode)   (hts_open((fn), (mode)))
 
#define sam_close(fp)   hts_close(fp)
 

Typedefs

typedef htsFile samFile
 
typedef int(* bam_plp_auto_f )(void *data, bam1_t *b)
 
typedef struct __bam_plp_t * bam_plp_t
 
typedef struct __bam_mplp_tbam_mplp_t
 

Functions

bam_hdr_tbam_hdr_init (void)
 
bam_hdr_tbam_hdr_read (BGZF *fp)
 
int bam_hdr_write (BGZF *fp, const bam_hdr_t *h)
 
void bam_hdr_destroy (bam_hdr_t *h)
 
int bam_name2id (bam_hdr_t *h, const char *ref)
 
bam_hdr_tbam_hdr_dup (const bam_hdr_t *h0)
 
bam1_tbam_init1 (void)
 
void bam_destroy1 (bam1_t *b)
 
int bam_read1 (BGZF *fp, bam1_t *b)
 
int bam_write1 (BGZF *fp, const bam1_t *b)
 
bam1_tbam_copy1 (bam1_t *bdst, const bam1_t *bsrc)
 
bam1_tbam_dup1 (const bam1_t *bsrc)
 
int bam_cigar2qlen (int n_cigar, const uint32_t *cigar)
 
int bam_cigar2rlen (int n_cigar, const uint32_t *cigar)
 
int32_t bam_endpos (const bam1_t *b)
 
int bam_str2flag (const char *str)
 
char * bam_flag2str (int flag)
 
int bam_index_build (const char *fn, int min_shift)
 
hts_idx_tsam_index_load (htsFile *fp, const char *fn)
 
hts_itr_tsam_itr_queryi (const hts_idx_t *idx, int tid, int beg, int end)
 
hts_itr_tsam_itr_querys (const hts_idx_t *idx, bam_hdr_t *hdr, const char *region)
 
int sam_open_mode (char *mode, const char *fn, const char *format)
 
bam_hdr_tsam_hdr_parse (int l_text, const char *text)
 
bam_hdr_tsam_hdr_read (samFile *fp)
 
int sam_hdr_write (samFile *fp, const bam_hdr_t *h)
 
int sam_parse1 (kstring_t *s, bam_hdr_t *h, bam1_t *b)
 
int sam_format1 (const bam_hdr_t *h, const bam1_t *b, kstring_t *str)
 
int sam_read1 (samFile *fp, bam_hdr_t *h, bam1_t *b)
 
int sam_write1 (samFile *fp, const bam_hdr_t *h, const bam1_t *b)
 
uint8_t * bam_aux_get (const bam1_t *b, const char tag[2])
 
int32_t bam_aux2i (const uint8_t *s)
 
double bam_aux2f (const uint8_t *s)
 
char bam_aux2A (const uint8_t *s)
 
char * bam_aux2Z (const uint8_t *s)
 
void bam_aux_append (bam1_t *b, const char tag[2], char type, int len, uint8_t *data)
 
int bam_aux_del (bam1_t *b, uint8_t *s)
 
bam_plp_t bam_plp_init (bam_plp_auto_f func, void *data)
 
void bam_plp_destroy (bam_plp_t iter)
 
int bam_plp_push (bam_plp_t iter, const bam1_t *b)
 
const bam_pileup1_tbam_plp_next (bam_plp_t iter, int *_tid, int *_pos, int *_n_plp)
 
const bam_pileup1_tbam_plp_auto (bam_plp_t iter, int *_tid, int *_pos, int *_n_plp)
 
void bam_plp_set_maxcnt (bam_plp_t iter, int maxcnt)
 
void bam_plp_reset (bam_plp_t iter)
 
bam_mplp_t bam_mplp_init (int n, bam_plp_auto_f func, void **data)
 
void bam_mplp_init_overlaps (bam_mplp_t iter)
 
void bam_mplp_destroy (bam_mplp_t iter)
 
void bam_mplp_set_maxcnt (bam_mplp_t iter, int maxcnt)
 
int bam_mplp_auto (bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp)
 

Macro Definition Documentation

#define BAM_CBACK   9
#define BAM_CDEL   2
#define BAM_CDIFF   8
#define BAM_CEQUAL   7
#define BAM_CHARD_CLIP   5
#define bam_cigar_gen (   l,
 
)    ((l)<<BAM_CIGAR_SHIFT|(o))
#define BAM_CIGAR_MASK   0xf
#define bam_cigar_op (   c)    ((c)&BAM_CIGAR_MASK)
#define bam_cigar_opchr (   c)    (BAM_CIGAR_STR[bam_cigar_op(c)])
#define bam_cigar_oplen (   c)    ((c)>>BAM_CIGAR_SHIFT)
#define BAM_CIGAR_SHIFT   4
#define BAM_CIGAR_STR   "MIDNSHP=XB"
#define BAM_CIGAR_TYPE   0x3C1A7
#define bam_cigar_type (   o)    (BAM_CIGAR_TYPE>>((o)<<1)&3)
#define BAM_CINS   1
#define BAM_CMATCH   0
#define BAM_CPAD   6
#define BAM_CREF_SKIP   3
#define BAM_CSOFT_CLIP   4
#define BAM_FDUP   1024

optical or PCR duplicate

#define BAM_FMREVERSE   32

the mate is mapped to the reverse strand

#define BAM_FMUNMAP   8

the mate is unmapped

#define BAM_FPAIRED   1

the read is paired in sequencing, no matter whether it is mapped in a pair

#define BAM_FPROPER_PAIR   2

the read is mapped in a proper pair

#define BAM_FQCFAIL   512

QC failure

#define BAM_FREAD1   64

this is read1

#define BAM_FREAD2   128

this is read2

#define BAM_FREVERSE   16

the read is mapped to the reverse strand

#define BAM_FSECONDARY   256

not primary alignment

#define BAM_FSUPPLEMENTARY   2048

supplementary alignment

#define BAM_FUNMAP   4

the read itself is unmapped; conflictive with BAM_FPROPER_PAIR

#define bam_get_aux (   b)    ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1) + (b)->core.l_qseq)

Get auxiliary data

Parameters
bpointer to an alignment
Returns
pointer to the concatenated auxiliary data
#define bam_get_cigar (   b)    ((uint32_t*)((b)->data + (b)->core.l_qname))

Get the CIGAR array

Parameters
bpointer to an alignment
Returns
pointer to the CIGAR array

In the CIGAR array, each element is a 32-bit integer. The lower 4 bits gives a CIGAR operation and the higher 28 bits keep the length of a CIGAR.

#define bam_get_l_aux (   b)    ((b)->l_data - ((b)->core.n_cigar<<2) - (b)->core.l_qname - (b)->core.l_qseq - (((b)->core.l_qseq + 1)>>1))

Get length of auxiliary data

Parameters
bpointer to an alignment
Returns
length of the concatenated auxiliary data
#define bam_get_qname (   b)    ((char*)(b)->data)

Get the name of the query

Parameters
bpointer to an alignment
Returns
pointer to the name string, null terminated
#define bam_get_qual (   b)    ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1))

Get query quality

Parameters
bpointer to an alignment
Returns
pointer to quality string
#define bam_get_seq (   b)    ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname)

Get query sequence

Parameters
bpointer to an alignment
Returns
pointer to sequence

Each base is encoded in 4 bits: 1 for A, 2 for C, 4 for G, 8 for T and 15 for N. Two bases are packed in one byte with the base at the higher 4 bits having smaller coordinate on the read. It is recommended to use bam_seqi() macro to get the base.

#define bam_index_load (   fn)    hts_idx_load((fn), HTS_FMT_BAI)
#define bam_is_mrev (   b)    (((b)->core.flag&BAM_FMREVERSE) != 0)

Get whether the query's mate is on the reverse strand

Parameters
bpointer to an alignment
Returns
boolean true if query's mate on the reverse strand
#define bam_is_rev (   b)    (((b)->core.flag&BAM_FREVERSE) != 0)

Get whether the query is on the reverse strand

Parameters
bpointer to an alignment
Returns
boolean true if query is on the reverse strand
#define bam_itr_destroy (   iter)    hts_itr_destroy(iter)

The string must be freed by the user

#define bam_itr_next (   htsfp,
  itr,
 
)    hts_itr_next((htsfp)->fp.bgzf, (itr), (r), 0)
#define bam_itr_queryi (   idx,
  tid,
  beg,
  end 
)    sam_itr_queryi(idx, tid, beg, end)
#define bam_itr_querys (   idx,
  hdr,
  region 
)    sam_itr_querys(idx, hdr, region)
#define bam_seqi (   s,
 
)    ((s)[(i)>>1] >> ((~(i)&1)<<2) & 0xf)

Get a base on read

Parameters
sQuery sequence returned by bam1_seq()
iThe i-th position, 0-based
Returns
4-bit integer representing the base.
#define sam_close (   fp)    hts_close(fp)
#define sam_itr_destroy (   iter)    hts_itr_destroy(iter)
#define sam_itr_next (   htsfp,
  itr,
 
)    hts_itr_next((htsfp)->fp.bgzf, (itr), (r), (htsfp))
#define sam_open (   fn,
  mode 
)    (hts_open((fn), (mode)))

Typedef Documentation

typedef struct __bam_mplp_t* bam_mplp_t
typedef int(* bam_plp_auto_f)(void *data, bam1_t *b)
typedef struct __bam_plp_t* bam_plp_t
typedef htsFile samFile

Function Documentation

char bam_aux2A ( const uint8_t *  s)
double bam_aux2f ( const uint8_t *  s)
int32_t bam_aux2i ( const uint8_t *  s)
char* bam_aux2Z ( const uint8_t *  s)
void bam_aux_append ( bam1_t b,
const char  tag[2],
char  type,
int  len,
uint8_t *  data 
)
int bam_aux_del ( bam1_t b,
uint8_t *  s 
)
uint8_t* bam_aux_get ( const bam1_t b,
const char  tag[2] 
)
int bam_cigar2qlen ( int  n_cigar,
const uint32_t *  cigar 
)
int bam_cigar2rlen ( int  n_cigar,
const uint32_t *  cigar 
)
bam1_t* bam_copy1 ( bam1_t bdst,
const bam1_t bsrc 
)
void bam_destroy1 ( bam1_t b)
bam1_t* bam_dup1 ( const bam1_t bsrc)
int32_t bam_endpos ( const bam1_t b)

Calculate the rightmost base position of an alignment on the reference genome.

Parameters
bpointer to an alignment
Returns
the coordinate of the first base after the alignment, 0-based

For a mapped read, this is just b->core.pos + bam_cigar2rlen. For an unmapped read (either according to its flags or if it has no cigar string), we return b->core.pos + 1 by convention.

char* bam_flag2str ( int  flag)

returns negative value on error

void bam_hdr_destroy ( bam_hdr_t h)
bam_hdr_t* bam_hdr_dup ( const bam_hdr_t h0)
bam_hdr_t* bam_hdr_init ( void  )
bam_hdr_t* bam_hdr_read ( BGZF fp)
int bam_hdr_write ( BGZF fp,
const bam_hdr_t h 
)
int bam_index_build ( const char *  fn,
int  min_shift 
)
bam1_t* bam_init1 ( void  )
int bam_mplp_auto ( bam_mplp_t  iter,
int *  _tid,
int *  _pos,
int *  n_plp,
const bam_pileup1_t **  plp 
)
void bam_mplp_destroy ( bam_mplp_t  iter)
bam_mplp_t bam_mplp_init ( int  n,
bam_plp_auto_f  func,
void **  data 
)
void bam_mplp_init_overlaps ( bam_mplp_t  iter)

bam_mplp_init_overlaps() - if called, mpileup will detect overlapping read pairs and for each base pair set the base quality of the lower-quality base to zero, thus effectively discarding it from calling. If the two bases are identical, the quality of the other base is increased to the sum of their qualities (capped at 200), otherwise it is multiplied by 0.8.

void bam_mplp_set_maxcnt ( bam_mplp_t  iter,
int  maxcnt 
)
int bam_name2id ( bam_hdr_t h,
const char *  ref 
)
const bam_pileup1_t* bam_plp_auto ( bam_plp_t  iter,
int *  _tid,
int *  _pos,
int *  _n_plp 
)
void bam_plp_destroy ( bam_plp_t  iter)
bam_plp_t bam_plp_init ( bam_plp_auto_f  func,
void *  data 
)

bam_plp_init() - sets an iterator over multiple : see mplp_func in bam_plcmd.c in samtools for an example. Expected return status: 0 on success, -1 on end, < -1 on non-recoverable errors : user data to pass to

const bam_pileup1_t* bam_plp_next ( bam_plp_t  iter,
int *  _tid,
int *  _pos,
int *  _n_plp 
)
int bam_plp_push ( bam_plp_t  iter,
const bam1_t b 
)
void bam_plp_reset ( bam_plp_t  iter)
void bam_plp_set_maxcnt ( bam_plp_t  iter,
int  maxcnt 
)
int bam_read1 ( BGZF fp,
bam1_t b 
)
int bam_str2flag ( const char *  str)
int bam_write1 ( BGZF fp,
const bam1_t b 
)
int sam_format1 ( const bam_hdr_t h,
const bam1_t b,
kstring_t str 
)
bam_hdr_t* sam_hdr_parse ( int  l_text,
const char *  text 
)
bam_hdr_t* sam_hdr_read ( samFile fp)
int sam_hdr_write ( samFile fp,
const bam_hdr_t h 
)
hts_idx_t* sam_index_load ( htsFile fp,
const char *  fn 
)
hts_itr_t* sam_itr_queryi ( const hts_idx_t idx,
int  tid,
int  beg,
int  end 
)
hts_itr_t* sam_itr_querys ( const hts_idx_t idx,
bam_hdr_t hdr,
const char *  region 
)
int sam_open_mode ( char *  mode,
const char *  fn,
const char *  format 
)
int sam_parse1 ( kstring_t s,
bam_hdr_t h,
bam1_t b 
)
int sam_read1 ( samFile fp,
bam_hdr_t h,
bam1_t b 
)
int sam_write1 ( samFile fp,
const bam_hdr_t h,
const bam1_t b 
)