Gamgee
You miserable little maggot. I'll stove your head in!
|
#include <stdarg.h>
#include "cram/string_alloc.h"
#include "cram/pooled_alloc.h"
#include "htslib/khash.h"
#include "htslib/kstring.h"
Go to the source code of this file.
Classes | |
struct | SAM_hdr_tag_s |
struct | SAM_hdr_item_s |
struct | SAM_SQ |
struct | SAM_RG |
struct | SAM_PG |
struct | SAM_hdr |
Macros | |
#define | KS_INITIALIZER {0,0,0} |
#define | KS_INIT(ks) ((ks)->l = 0, (ks)->m = 0, (ks)->s = NULL) |
#define | KS_FREE(ks) do { if ((ks)->s) free((ks)->s); } while(0) |
Typedefs | |
typedef struct SAM_hdr_tag_s | SAM_hdr_tag |
typedef struct SAM_hdr_item_s | SAM_hdr_type |
Functions | |
SAM_hdr * | sam_hdr_new (void) |
SAM_hdr * | sam_hdr_parse_ (const char *hdr, int len) |
SAM_hdr * | sam_hdr_dup (SAM_hdr *hdr) |
void | sam_hdr_incr_ref (SAM_hdr *hdr) |
void | sam_hdr_decr_ref (SAM_hdr *hdr) |
void | sam_hdr_free (SAM_hdr *hdr) |
int | sam_hdr_length (SAM_hdr *hdr) |
char * | sam_hdr_str (SAM_hdr *hdr) |
int | sam_hdr_add_lines (SAM_hdr *sh, const char *lines, int len) |
int | sam_hdr_add (SAM_hdr *sh, const char *type,...) |
int | sam_hdr_vadd (SAM_hdr *sh, const char *type, va_list ap,...) |
SAM_hdr_type * | sam_hdr_find (SAM_hdr *hdr, char *type, char *ID_key, char *ID_value) |
char * | sam_hdr_find_line (SAM_hdr *hdr, char *type, char *ID_key, char *ID_value) |
SAM_hdr_tag * | sam_hdr_find_key (SAM_hdr *sh, SAM_hdr_type *type, char *key, SAM_hdr_tag **prev) |
int | sam_hdr_update (SAM_hdr *hdr, SAM_hdr_type *type,...) |
int | sam_hdr_rebuild (SAM_hdr *hdr) |
int | sam_hdr_name2ref (SAM_hdr *hdr, const char *ref) |
SAM_RG * | sam_hdr_find_rg (SAM_hdr *hdr, const char *rg) |
int | sam_hdr_link_pg (SAM_hdr *hdr) |
int | sam_hdr_add_PG (SAM_hdr *sh, const char *name,...) |
char * | stringify_argv (int argc, char *argv[]) |
SAM header parsing.
These functions can be shared between SAM, BAM and CRAM file formats as all three internally use the same string encoding for header fields.
Consider using the scram() generic API and calling scram_get_header() to obtain the format-specific pointer to the SAM_hdr struct.
#define KS_FREE | ( | ks | ) | do { if ((ks)->s) free((ks)->s); } while(0) |
#define KS_INIT | ( | ks | ) | ((ks)->l = 0, (ks)->m = 0, (ks)->s = NULL) |
#define KS_INITIALIZER {0,0,0} |
typedef struct SAM_hdr_tag_s SAM_hdr_tag |
A single key:value pair on a header line
These form a linked list and hold strings. The strings are allocated from a string_alloc_t pool referenced in the master SAM_hdr structure. Do not attempt to free, malloc or manipulate these strings directly.
typedef struct SAM_hdr_item_s SAM_hdr_type |
The parsed version of the SAM header string.
Each header type (SQ, RG, HD, etc) points to its own SAM_hdr_type struct via the main hash table h in the SAM_hdr struct.
These in turn consist of circular bi-directional linked lists (ie rings) to hold the multiple instances of the same header type code. For example if we have 5 @SQ lines the primary hash table will key on @SQ pointing to the first SAM_hdr_type and that in turn will be part of a ring of 5 elements.
For each SAM_hdr_type structure we also point to a SAM_hdr_tag structure which holds the tokenised attributes; the tab separated key:value pairs per line.
int sam_hdr_add | ( | SAM_hdr * | sh, |
const char * | type, | ||
... | |||
) |
Adds a single line to a SAM header.
Specify type and one or more key,value pairs, ending with the NULL key. Eg. sam_hdr_add(h, "SQ", "ID", "foo", "LN", "100", NULL).
int sam_hdr_add_lines | ( | SAM_hdr * | sh, |
const char * | lines, | ||
int | len | ||
) |
Appends a formatted line to an existing SAM header.
Line is a full SAM header record, eg "@SQ\tSN:foo\tLN:100", with optional new-line. If it contains more than 1 line then multiple lines will be added in order.
Len is the length of the text data, or 0 if unknown (in which case it should be null terminated).
int sam_hdr_add_PG | ( | SAM_hdr * | sh, |
const char * | name, | ||
... | |||
) |
Add an line.
If we wish complete control over this use sam_hdr_add() directly. This function uses that, but attempts to do a lot of tedious house work for you too.
Call it as per sam_hdr_add() with a series of key,value pairs ending in NULL.
void sam_hdr_decr_ref | ( | SAM_hdr * | hdr | ) |
Increments a reference count on hdr.
This permits multiple files to share the same header, all calling sam_hdr_free when done, without causing errors for other open files.
If the reference count hits zero then the header is automatically freed. This makes it a synonym for sam_hdr_free().
Produces a duplicate copy of hdr and returns it.
SAM_hdr_type* sam_hdr_find | ( | SAM_hdr * | hdr, |
char * | type, | ||
char * | ID_key, | ||
char * | ID_value | ||
) |
Returns NULL if no type/ID is found
SAM_hdr_tag* sam_hdr_find_key | ( | SAM_hdr * | sh, |
SAM_hdr_type * | type, | ||
char * | key, | ||
SAM_hdr_tag ** | prev | ||
) |
Looks for a specific key in a single sam header line.
If prev is non-NULL it also fills this out with the previous tag, to permit use in key removal. *prev is set to NULL when the tag is the first key in the list. When a tag isn't found, prev (if non NULL) will be the last tag in the existing list.
char* sam_hdr_find_line | ( | SAM_hdr * | hdr, |
char * | type, | ||
char * | ID_key, | ||
char * | ID_value | ||
) |
As per SAM_hdr_type, but returns a complete line of formatted text for a specific head type/ID combination. If ID is NULL then it returns the first line of the specified type.
The returned string is malloced and should be freed by the calling function with free().
Looks up a read-group by name and returns a pointer to the start of the associated tag list.
void sam_hdr_free | ( | SAM_hdr * | hdr | ) |
Deallocates all storage used by a SAM_hdr struct.
This also decrements the header reference count. If after decrementing it is still non-zero then the header is assumed to be in use by another caller and the free is not done.
This is a synonym for sam_hdr_dec_ref().
void sam_hdr_incr_ref | ( | SAM_hdr * | hdr | ) |
Increments a reference count on hdr.
This permits multiple files to share the same header, all calling sam_hdr_free when done, without causing errors for other open files.
int sam_hdr_length | ( | SAM_hdr * | hdr | ) |
Returns the current length of the SAM_hdr in text form.
Call sam_hdr_rebuild() first if editing has taken place.
int sam_hdr_link_pg | ( | SAM_hdr * | hdr | ) |
Fixes any PP links in headers.
If the entries are in order then this doesn't need doing, but incase our header is out of order this goes through the sh->pg[] array setting the prev_id field.
int sam_hdr_name2ref | ( | SAM_hdr * | hdr, |
const char * | ref | ||
) |
Looks up a reference sequence by name and returns the numerical ID.
SAM_hdr* sam_hdr_new | ( | void | ) |
Creates an empty SAM header, ready to be populated.
SAM_hdr* sam_hdr_parse_ | ( | const char * | hdr, |
int | len | ||
) |
Tokenises a SAM header into a hash table.
Also extracts a few bits on specific data types, such as lines.
int sam_hdr_rebuild | ( | SAM_hdr * | hdr | ) |
Reconstructs the kstring from the header hash table.
char* sam_hdr_str | ( | SAM_hdr * | hdr | ) |
Returns the string form of the SAM_hdr.
Call sam_hdr_rebuild() first if editing has taken place.
int sam_hdr_update | ( | SAM_hdr * | hdr, |
SAM_hdr_type * | type, | ||
... | |||
) |
Adds or updates tag key,value pairs in a header line.
Eg for adding M5 tags to lines or updating sort order for the line (although use the sam_hdr_sort_order() function for HD manipulation, which is a wrapper around this funuction).
Specify multiple key,value pairs ending in NULL.
int sam_hdr_vadd | ( | SAM_hdr * | sh, |
const char * | type, | ||
va_list | ap, | ||
... | |||
) |
Adds a single line to a SAM header.
This is much like sam_hdr_add() but with the additional va_list argument. This is followed by specifying type and one or more key,value pairs, ending with the NULL key.
Eg. sam_hdr_vadd(h, "SQ", args, "ID", "foo", "LN", "100", NULL).
The purpose of the additional va_list parameter is to permit other varargs functions to call this while including their own additional parameters; an example is in sam_hdr_add_PG().
char* stringify_argv | ( | int | argc, |
char * | argv[] | ||
) |
A function to help with construction of CL tags in records. Takes an argc, argv pair and returns a single space-separated string. This string should be deallocated by the calling function.