Gamgee
You miserable little maggot. I'll stove your head in!
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Classes | Macros | Functions
cram_io.c File Reference
#include <stdio.h>
#include <errno.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <zlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <math.h>
#include <ctype.h>
#include "cram/cram.h"
#include "cram/os.h"
#include "cram/md5.h"
#include "cram/open_trace_file.h"
#include "cram/rANS_static.h"
#include "htslib/hfile.h"
#include "htslib/bgzf.h"
#include "htslib/faidx.h"

Classes

struct  cram_job
 

Macros

#define RP(...)
 
#define TRIAL_SPAN   50
 
#define NTRIALS   3
 
#define MAXDELTA   0.20
 
#define MAXFAILS   4
 

Functions

int itf8_decode (cram_fd *fd, int32_t *val_p)
 
int itf8_encode (cram_fd *fd, int32_t val)
 
int itf8_get (char *cp, int32_t *val_p)
 
int itf8_put (char *cp, int32_t val)
 
int ltf8_put (char *cp, int64_t val)
 
int ltf8_get (char *cp, int64_t *val_p)
 
int ltf8_decode (cram_fd *fd, int64_t *val_p)
 
int itf8_put_blk (cram_block *blk, int val)
 
int int32_decode (cram_fd *fd, int32_t *val)
 
int int32_encode (cram_fd *fd, int32_t val)
 
int int32_get (cram_block *b, int32_t *val)
 
int int32_put (cram_block *b, int32_t val)
 
char * zlib_mem_inflate (char *cdata, size_t csize, size_t *size)
 
cram_blockcram_new_block (enum cram_content_type content_type, int content_id)
 
cram_blockcram_read_block (cram_fd *fd)
 
int cram_write_block (cram_fd *fd, cram_block *b)
 
void cram_free_block (cram_block *b)
 
int cram_uncompress_block (cram_block *b)
 
int cram_compress_block (cram_fd *fd, cram_block *b, cram_metrics *metrics, int method, int level)
 
cram_metricscram_new_metrics (void)
 
char * cram_block_method2str (enum cram_block_method m)
 
char * cram_content_type2str (enum cram_content_type t)
 
int paranoid_fclose (FILE *fp)
 
void refs_free (refs_t *r)
 
int refs2id (refs_t *r, SAM_hdr *h)
 
int cram_set_header (cram_fd *fd, SAM_hdr *hdr)
 
void expand_cache_path (char *path, char *dir, char *fn)
 
void mkdir_prefix (char *path, int mode)
 
void cram_ref_incr (refs_t *r, int id)
 
void cram_ref_decr (refs_t *r, int id)
 
ref_entrycram_ref_load (refs_t *r, int id)
 
char * cram_get_ref (cram_fd *fd, int id, int start, int end)
 
int cram_load_reference (cram_fd *fd, char *fn)
 
cram_containercram_new_container (int nrec, int nslice)
 
void cram_free_container (cram_container *c)
 
cram_containercram_read_container (cram_fd *fd)
 
int cram_write_container (cram_fd *fd, cram_container *c)
 
int cram_flush_container (cram_fd *fd, cram_container *c)
 
void * cram_flush_thread (void *arg)
 
int cram_flush_container_mt (cram_fd *fd, cram_container *c)
 
cram_block_compression_hdrcram_new_compression_header (void)
 
void cram_free_compression_header (cram_block_compression_hdr *hdr)
 
void cram_free_slice_header (cram_block_slice_hdr *hdr)
 
void cram_free_slice (cram_slice *s)
 
cram_slicecram_new_slice (enum cram_content_type type, int nrecs)
 
cram_slicecram_read_slice (cram_fd *fd)
 
cram_file_defcram_read_file_def (cram_fd *fd)
 
int cram_write_file_def (cram_fd *fd, cram_file_def *def)
 
void cram_free_file_def (cram_file_def *def)
 
SAM_hdrcram_read_SAM_hdr (cram_fd *fd)
 
int cram_write_SAM_hdr (cram_fd *fd, SAM_hdr *hdr)
 
cram_fdcram_open (const char *filename, const char *mode)
 
cram_fdcram_dopen (hFILE *fp, const char *filename, const char *mode)
 
int cram_seek (cram_fd *fd, off_t offset, int whence)
 
int cram_flush (cram_fd *fd)
 
int cram_close (cram_fd *fd)
 
int cram_eof (cram_fd *fd)
 
int cram_set_option (cram_fd *fd, enum cram_option opt,...)
 
int cram_set_voption (cram_fd *fd, enum cram_option opt, va_list args)
 

Macro Definition Documentation

#define MAXDELTA   0.20
#define MAXFAILS   4
#define NTRIALS   3
#define RP (   ...)
#define TRIAL_SPAN   50

Function Documentation

char* cram_block_method2str ( enum cram_block_method  m)
int cram_close ( cram_fd fd)

Closes a CRAM file.

Returns
Returns 0 on success; -1 on failure
int cram_compress_block ( cram_fd fd,
cram_block b,
cram_metrics metrics,
int  method,
int  level 
)

Compresses a block.

Compresses a block using one of two different zlib strategies. If we only want one choice set strat2 to be -1.

The logic here is that sometimes Z_RLE does a better job than Z_FILTERED or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is significantly faster.

Returns
Returns 0 on success; -1 on failure
char* cram_content_type2str ( enum cram_content_type  t)
cram_fd* cram_dopen ( struct hFILE fp,
const char *  filename,
const char *  mode 
)

Opens an existing stream for reading or writing.

Returns
Returns file handle on success; NULL on failure.
int cram_eof ( cram_fd fd)

Checks for end of file on a cram_fd stream.

Returns
Returns 0 if not at end of file 1 if we hit an expected EOF (end of range or EOF block) 2 for other EOF (end of stream without EOF block)
int cram_flush ( cram_fd fd)
int cram_flush_container ( cram_fd fd,
cram_container c 
)

Flushes a container to disk.

Flushes a completely or partially full container to disk, writing container structure, header and blocks. This also calls the encoder functions.

Returns
Returns 0 on success; -1 on failure
int cram_flush_container_mt ( cram_fd fd,
cram_container c 
)
void* cram_flush_thread ( void *  arg)
void cram_free_block ( cram_block b)

Frees a CRAM block, deallocating internal data too.

void cram_free_compression_header ( cram_block_compression_hdr hdr)
void cram_free_container ( cram_container c)
void cram_free_file_def ( cram_file_def def)

Frees a cram_file_def structure.

void cram_free_slice ( cram_slice s)

Frees a slice

void cram_free_slice_header ( cram_block_slice_hdr hdr)

Slices and slice headers

Frees a slice header

char* cram_get_ref ( cram_fd fd,
int  id,
int  start,
int  end 
)

Returns a portion of a reference sequence from start to end inclusive.

The returned pointer is owned by the cram_file fd and should not be freed by the caller. It is valid only until the next cram_get_ref is called with the same fd parameter (so is thread-safe if given multiple files).

To return the entire reference sequence, specify start as 1 and end as 0.

Returns
Returns reference on success; NULL on failure
int cram_load_reference ( cram_fd fd,
char *  fn 
)

Reference sequence handling

Loads a reference set from fn and stores in the cram_fd.

Returns
Returns 0 on success; -1 on failure
cram_block* cram_new_block ( enum cram_content_type  content_type,
int  content_id 
)

CRAM blocks - the dynamically growable data block. We have code to create, update, (un)compress and read/write.

These are derived from the deflate_interlaced.c blocks, but with the CRAM extension of content types and IDs.

Allocates a new cram_block structure with a specified content_type and id.

Returns
Returns block pointer on success; NULL on failure
cram_block_compression_hdr* cram_new_compression_header ( void  )

Compression headers; the first part of the container

Creates a new blank container compression header

Returns
Returns header ptr on success; NULL on failure
cram_container* cram_new_container ( int  nrec,
int  nslice 
)

Containers

Creates a new container, specifying the maximum number of slices and records permitted.

Returns
Returns cram_container ptr on success; NULL on failure
cram_metrics* cram_new_metrics ( void  )
cram_slice* cram_new_slice ( enum cram_content_type  type,
int  nrecs 
)

Creates a new empty slice in memory, for subsequent writing to disk.

Returns
Returns cram_slice ptr on success; NULL on failure
cram_fd* cram_open ( const char *  filename,
const char *  mode 
)

The top-level cram opening, closing and option handling

Opens a CRAM file for read (mode "rb") or write ("wb").

The filename may be "-" to indicate stdin or stdout.

Returns
Returns file handle on success; NULL on failure.
cram_block* cram_read_block ( cram_fd fd)

Reads a block from a cram file.

Returns
Returns cram_block pointer on success; NULL on failure
cram_container* cram_read_container ( cram_fd fd)

Reads a container header.

Returns
Returns cram_container on success; NULL on failure or no container left (fd->err == 0).
cram_file_def* cram_read_file_def ( cram_fd fd)

CRAM file definition (header)

Reads a CRAM file definition structure.

Returns
Returns file_def ptr on success; NULL on failure
SAM_hdr* cram_read_SAM_hdr ( cram_fd fd)

SAM header I/O

Reads the SAM header from the first CRAM data block.

Also performs minimal parsing to extract read-group and sample information.

Returns
Returns SAM hdr ptr on success; NULL on failure
cram_slice* cram_read_slice ( cram_fd fd)

Loads an entire slice.

FIXME: In 1.0 the native unit of slices within CRAM is broken as slices contain references to objects in other slices. To work around this while keeping the slice oriented outer loop we read all slices and stitch them together into a fake large slice instead.

Returns
Returns cram_slice ptr on success; NULL on failure
void cram_ref_decr ( refs_t r,
int  id 
)
void cram_ref_incr ( refs_t r,
int  id 
)
ref_entry* cram_ref_load ( refs_t r,
int  id 
)
int cram_seek ( cram_fd fd,
off_t  offset,
int  whence 
)
int cram_set_header ( cram_fd fd,
SAM_hdr hdr 
)

Attaches a header to a cram_fd.

This should be used when creating a new cram_fd for writing where we have an SAM_hdr already constructed (eg from a file we've read in).

Returns
Returns 0 on success; -1 on failure
int cram_set_option ( cram_fd fd,
enum cram_option  opt,
  ... 
)

Sets options on the cram_fd.

See CRAM_OPT_* definitions in cram_structs.h. Use this immediately after opening.

Returns
Returns 0 on success; -1 on failure
int cram_set_voption ( cram_fd fd,
enum cram_option  opt,
va_list  args 
)

Sets options on the cram_fd.

See CRAM_OPT_* definitions in cram_structs.h. Use this immediately after opening.

Returns
Returns 0 on success; -1 on failure
int cram_uncompress_block ( cram_block b)

Uncompresses a CRAM block, if compressed.

Returns
Returns 0 on success; -1 on failure
int cram_write_block ( cram_fd fd,
cram_block b 
)

Writes a CRAM block.

Returns
Returns 0 on success; -1 on failure
int cram_write_container ( cram_fd fd,
cram_container h 
)

Writes a container structure.

Returns
Returns 0 on success; -1 on failure
int cram_write_file_def ( cram_fd fd,
cram_file_def def 
)

Writes a cram_file_def structure to cram_fd.

Returns
Returns 0 on success; -1 on failure
int cram_write_SAM_hdr ( cram_fd fd,
SAM_hdr hdr 
)

Writes a CRAM SAM header.

Returns
Returns 0 on success; -1 on failure
void expand_cache_path ( char *  path,
char *  dir,
char *  fn 
)
int int32_decode ( cram_fd fd,
int32_t *  val 
)
int int32_encode ( cram_fd fd,
int32_t  val 
)
int int32_get ( cram_block b,
int32_t *  val 
)
int int32_put ( cram_block b,
int32_t  val 
)
int itf8_decode ( cram_fd fd,
int32_t *  val 
)

Reads an integer in ITF-8 encoding from 'fd' and stores it in *val.

Returns
Returns the number of bytes read on success; -1 on failure
int itf8_encode ( cram_fd fd,
int32_t  val 
)
int itf8_get ( char *  cp,
int32_t *  val_p 
)
int itf8_put ( char *  cp,
int32_t  val 
)
int itf8_put_blk ( cram_block blk,
int  val 
)

Pushes a value in ITF8 format onto the end of a block.

This shouldn't be used for high-volume data as it is not the fastest method.

Returns
Returns the number of bytes written
int ltf8_decode ( cram_fd fd,
int64_t *  val_p 
)
int ltf8_get ( char *  cp,
int64_t *  val_p 
)
int ltf8_put ( char *  cp,
int64_t  val 
)
void mkdir_prefix ( char *  path,
int  mode 
)
int paranoid_fclose ( FILE *  fp)
int refs2id ( refs_t r,
SAM_hdr bfd 
)

Generates a lookup table in refs based on the SQ headers in SAM_hdr.

Indexes references by the order they appear in a BAM file. This may not necessarily be the same order they appear in the fasta reference file.

Returns
Returns 0 on success; -1 on failure
void refs_free ( refs_t r)
char* zlib_mem_inflate ( char *  cdata,
size_t  csize,
size_t *  size 
)

Uncompress a memory block using Zlib.

Returns
Returns 0 on success; -1 on failure