Gamgee
You miserable little maggot. I'll stove your head in!
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sam_header.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2013-2014 Genome Research Ltd.
3 Author: James Bonfield <jkb@sanger.ac.uk>
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 
8  1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 
11  2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
14 
15  3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16 Institute nor the names of its contributors may be used to endorse or promote
17 products derived from this software without specific prior written permission.
18 
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 
43 /*
44  * TODO.
45  *
46  * - Sort order (parse to struct, enum type, updating funcs)
47  * - Removal of lines.
48  * - Updating of lines
49  */
50 
51 #ifndef _SAM_HDR_H_
52 #define _SAM_HDR_H_
53 
54 #ifdef __cplusplus
55 extern "C" {
56 #endif
57 
58 #ifdef HAVE_CONFIG_H
59 #include "io_lib_config.h"
60 #endif
61 
62 #include <stdarg.h>
63 
64 #include "cram/string_alloc.h"
65 #include "cram/pooled_alloc.h"
66 
67 #include "htslib/khash.h"
68 #include "htslib/kstring.h"
69 
70 // For structure assignment. Eg kstring_t s = KS_INITIALIZER;
71 #define KS_INITIALIZER {0,0,0}
72 
73 // For initialisation elsewhere. Eg KS_INIT(x->str);
74 #define KS_INIT(ks) ((ks)->l = 0, (ks)->m = 0, (ks)->s = NULL)
75 
76 // Frees the string subfield only. Assumes 's' itself is static.
77 #define KS_FREE(ks) do { if ((ks)->s) free((ks)->s); } while(0)
78 
79 /*
80  * Proposed new SAM header parsing
81 
82 1 @SQ ID:foo LN:100
83 2 @SQ ID:bar LN:200
84 3 @SQ ID:ram LN:300 UR:xyz
85 4 @RG ID:r ...
86 5 @RG ID:s ...
87 
88 Hash table for 2-char @keys without dup entries.
89 If dup lines, we form a circular linked list. Ie hash keys = {RG, SQ}.
90 
91 HASH("SQ")--\
92  |
93  (3) <-> 1 <-> 2 <-> 3 <-> (1)
94 
95 HASH("RG")--\
96  |
97  (5) <-> 4 <-> 5 <-> (4)
98 
99 Items stored in the hash values also form their own linked lists:
100 Ie SQ->ID(foo)->LN(100)
101  SQ->ID(bar)->LN(200)
102  SQ->ID(ram)->LN(300)->UR(xyz)
103  RG->ID(r)
104  */
105 
113 typedef struct SAM_hdr_tag_s {
115  char *str;
116  int len;
117 } SAM_hdr_tag;
118 
134 typedef struct SAM_hdr_item_s {
135  struct SAM_hdr_item_s *next; // cirular
137  SAM_hdr_tag *tag; // first tag
138  int order; // 0 upwards
139 } SAM_hdr_type;
140 
142 typedef struct {
143  char *name;
144  uint32_t len;
147 } SAM_SQ;
148 
150 typedef struct {
151  char *name;
154  int name_len;
155  int id; // numerical ID
156 } SAM_RG;
157 
159 typedef struct {
160  char *name;
163  int name_len;
164  int id; // numerical ID
165  int prev_id; // -1 if none
166 } SAM_PG;
167 
169 KHASH_MAP_INIT_STR(m_s2i, int)
170 
183 typedef struct {
185  khash_t(sam_hdr) *h;
189 
190  // @SQ lines / references
191  int nref;
193  khash_t(m_s2i) *ref_hash;
194 
195  // @RG lines / read-groups
196  int nrg;
198  khash_t(m_s2i) *rg_hash;
199 
200  // @PG lines / programs
201  int npg;
202  int npg_end;
205  khash_t(m_s2i) *pg_hash;
206  int *pg_end;
207 
208  // @cond internal
209  char ID_buf[1024]; // temporary buffer
210  int ID_cnt;
211  int ref_count; // number of uses of this SAM_hdr
212  // @endcond
213 } SAM_hdr;
214 
221 SAM_hdr *sam_hdr_new(void);
222 
231 SAM_hdr *sam_hdr_parse_(const char *hdr, int len);
232 
233 
239 
240 
246 void sam_hdr_incr_ref(SAM_hdr *hdr);
247 
248 
257 void sam_hdr_decr_ref(SAM_hdr *hdr);
258 
259 
268 void sam_hdr_free(SAM_hdr *hdr);
269 
274 int sam_hdr_length(SAM_hdr *hdr);
275 
280 char *sam_hdr_str(SAM_hdr *hdr);
281 
295 int sam_hdr_add_lines(SAM_hdr *sh, const char *lines, int len);
296 
306 int sam_hdr_add(SAM_hdr *sh, const char *type, ...);
307 
324 int sam_hdr_vadd(SAM_hdr *sh, const char *type, va_list ap, ...);
325 
333 SAM_hdr_type *sam_hdr_find(SAM_hdr *hdr, char *type,
334  char *ID_key, char *ID_value);
335 
348 char *sam_hdr_find_line(SAM_hdr *hdr, char *type,
349  char *ID_key, char *ID_value);
350 
363  SAM_hdr_type *type,
364  char *key,
365  SAM_hdr_tag **prev);
366 
379 int sam_hdr_update(SAM_hdr *hdr, SAM_hdr_type *type, ...);
380 
386 int sam_hdr_rebuild(SAM_hdr *hdr);
387 
392 int sam_hdr_name2ref(SAM_hdr *hdr, const char *ref);
393 
400 SAM_RG *sam_hdr_find_rg(SAM_hdr *hdr, const char *rg);
401 
412 int sam_hdr_link_pg(SAM_hdr *hdr);
413 
414 
431 int sam_hdr_add_PG(SAM_hdr *sh, const char *name, ...);
432 
442 char *stringify_argv(int argc, char *argv[]);
443 
444 #ifdef __cplusplus
445 }
446 #endif
447 
448 #endif /* _SAM_HDR_H_ */
#define KHASH_MAP_INIT_INT(name, khval_t)
Definition: khash.h:583
struct SAM_hdr_item_s * next
Definition: sam_header.h:135
SAM_hdr_tag * tag
Definition: sam_header.h:153
struct SAM_hdr_tag_s * next
Definition: sam_header.h:114
int sam_hdr_vadd(SAM_hdr *sh, const char *type, va_list ap,...)
Definition: sam_header.c:418
int npg
Number of @PG lines.
Definition: sam_header.h:201
SAM_hdr_type * ty
Definition: sam_header.h:161
uint32_t len
Definition: sam_header.h:144
SAM_SQ * ref
Array of parsed @SQ lines.
Definition: sam_header.h:192
SAM_hdr_tag * tag
Definition: sam_header.h:162
char * stringify_argv(int argc, char *argv[])
Definition: sam_header.c:1190
SAM_hdr_tag * sam_hdr_find_key(SAM_hdr *sh, SAM_hdr_type *type, char *key, SAM_hdr_tag **prev)
Definition: sam_header.c:668
int npg_end
Number of terminating @PG lines.
Definition: sam_header.h:202
Definition: string_alloc.h:52
struct SAM_hdr_item_s SAM_hdr_type
void sam_hdr_incr_ref(SAM_hdr *hdr)
Definition: sam_header.c:932
char * name
Definition: sam_header.h:151
int name_len
Definition: sam_header.h:163
int npg_end_alloc
Size of pg_end field.
Definition: sam_header.h:203
SAM_RG * sam_hdr_find_rg(SAM_hdr *hdr, const char *rg)
Definition: sam_header.c:1040
Definition: sam_header.h:183
SAM_hdr_tag * tag
Definition: sam_header.h:146
Definition: sam_header.h:113
#define KHASH_MAP_INIT_STR(name, khval_t)
Definition: khash.h:614
char * str
Definition: sam_header.h:115
int sam_hdr_link_pg(SAM_hdr *hdr)
Definition: sam_header.c:1061
char * sam_hdr_str(SAM_hdr *hdr)
Definition: sam_header.c:1021
int prev_id
Definition: sam_header.h:165
int nrg
Number of @RG lines.
Definition: sam_header.h:196
SAM_PG * pg
Array of parsed @PG lines.
Definition: sam_header.h:204
Definition: sam_header.h:159
Definition: sam_header.h:150
pool_alloc_t * type_pool
Pool of SAM_hdr_type structs.
Definition: sam_header.h:187
kstring_t text
concatenated text, indexed by SAM_hdr_tag
Definition: sam_header.h:184
SAM_hdr_tag * tag
Definition: sam_header.h:137
Definition: bgzf.h:69
SAM_hdr_type * ty
Definition: sam_header.h:152
void sam_hdr_decr_ref(SAM_hdr *hdr)
Definition: sam_header.c:944
struct SAM_hdr_tag_s SAM_hdr_tag
pool_alloc_t * tag_pool
Pool of SAM_hdr_tag structs.
Definition: sam_header.h:188
int sam_hdr_rebuild(SAM_hdr *hdr)
Definition: sam_header.c:749
void sam_hdr_free(SAM_hdr *hdr)
Definition: sam_header.c:956
char * sam_hdr_find_line(SAM_hdr *hdr, char *type, char *ID_key, char *ID_value)
Definition: sam_header.c:631
int len
Definition: sam_header.h:116
char * name
Definition: sam_header.h:160
Definition: sam_header.h:142
#define khash_t(name)
Definition: khash.h:422
Definition: sam_header.h:134
int order
Definition: sam_header.h:138
SAM_RG * rg
Array of parsed @RG lines.
Definition: sam_header.h:197
char * name
Definition: sam_header.h:143
int sam_hdr_add(SAM_hdr *sh, const char *type,...)
Definition: sam_header.c:412
int * pg_end
@PG chain termination IDs
Definition: sam_header.h:206
int name_len
Definition: sam_header.h:154
SAM_hdr * sam_hdr_parse_(const char *hdr, int len)
Definition: sam_header.c:887
int sam_hdr_update(SAM_hdr *hdr, SAM_hdr_type *type,...)
Definition: sam_header.c:700
int sam_hdr_length(SAM_hdr *hdr)
Definition: sam_header.c:1017
int id
Definition: sam_header.h:164
int sam_hdr_add_PG(SAM_hdr *sh, const char *name,...)
Definition: sam_header.c:1143
Definition: pooled_alloc.h:43
SAM_hdr * sam_hdr_dup(SAM_hdr *hdr)
Definition: sam_header.c:920
struct SAM_hdr_item_s * prev
Definition: sam_header.h:136
int sam_hdr_name2ref(SAM_hdr *hdr, const char *ref)
Definition: sam_header.c:1029
int sam_hdr_add_lines(SAM_hdr *sh, const char *lines, int len)
Definition: sam_header.c:272
int id
Definition: sam_header.h:155
string_alloc_t * str_pool
Pool of SAM_hdr_tag->str strings.
Definition: sam_header.h:186
SAM_hdr_type * ty
Definition: sam_header.h:145
SAM_hdr * sam_hdr_new(void)
Definition: sam_header.c:818
SAM_hdr_type * sam_hdr_find(SAM_hdr *hdr, char *type, char *ID_key, char *ID_value)
Definition: sam_header.c:561
int nref
Number of @SQ lines.
Definition: sam_header.h:191