Gamgee
You miserable little maggot. I'll stove your head in!
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
cram_structs.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2012-2013 Genome Research Ltd.
3 Author: James Bonfield <jkb@sanger.ac.uk>
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 
8  1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 
11  2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
14 
15  3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16 Institute nor the names of its contributors may be used to endorse or promote
17 products derived from this software without specific prior written permission.
18 
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 
31 #ifndef _CRAM_STRUCTS_H_
32 #define _CRAM_STRUCTS_H_
33 
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
37 
38 /*
39  * Defines in-memory structs for the basic file-format objects in the
40  * CRAM format.
41  *
42  * The basic file format is:
43  * File-def SAM-hdr Container Container ...
44  *
45  * Container:
46  * Service-block data-block data-block ...
47  *
48  * Multiple blocks in a container are grouped together as slices,
49  * also sometimes referred to as landmarks in the spec.
50  */
51 
52 
53 #include <stdint.h>
54 
55 #include "cram/thread_pool.h"
56 #include "cram/string_alloc.h"
57 #include "htslib/khash.h"
58 
59 // Generic hash-map integer -> integer
60 KHASH_MAP_INIT_INT(m_i2i, int)
61 
62 // Generic hash-set integer -> (existance)
63 KHASH_SET_INIT_INT(s_i2i)
64 
65 // For brevity
66 typedef unsigned char uc;
67 
68 /*
69  * A union for the preservation map. Required for khash.
70  */
71 typedef union {
72  int i;
73  char *p;
74 } pmap_t;
75 
76 // Generates static functions here which isn't ideal, but we have no way
77 // currently to declare the kh_map_t structure here without also declaring a
78 // duplicate in the .c files due to the nature of the KHASH macros.
80 
81 struct hFILE;
82 
83 #define SEQS_PER_SLICE 10000
84 #define SLICE_PER_CNT 1
85 
86 #define CRAM_SUBST_MATRIX "CGTNAGTNACTNACGNACGT"
87 
88 #define MAX_STAT_VAL 1024
89 //#define MAX_STAT_VAL 16
90 typedef struct {
91  int freqs[MAX_STAT_VAL];
92  khash_t(m_i2i) *h;
93  int nsamp; // total number of values added
94  int nvals; // total number of unique values added
95 } cram_stats;
96 
97 /* NB: matches java impl, not the spec */
99  E_NULL = 0,
101  E_GOLOMB = 2,
105  E_BETA = 6,
106  E_SUBEXP = 7,
109 };
110 
112  E_INT = 1,
113  E_LONG = 2,
114  E_BYTE = 3,
117 };
118 
119 /* External IDs used by this implementation (only assumed during writing) */
121  DS_CORE = 0,
122  DS_aux = 1, // aux_blk
127  DS_aux_FZ = 6, // also ZM:B
128  DS_aux_oq = 7, // other qualities
129  DS_aux_os = 8, // other sequences
130  DS_aux_oz = 9, // other strings
132  DS_RN, // name_blk
133  DS_QS, // qual_blk
134  DS_IN, // base_blk
135  DS_SC, // soft_blk
136 
137  DS_BF, // start loop
161 
162  DS_TN, // end loop
163 
168 
169  DS_TC, // CRAM v1.0 tags
170  DS_TM, // test
171  DS_TV, // test
172 
174 };
175 
176 /* "File Definition Structure" */
177 typedef struct {
178  char magic[4];
179  uint8_t major_version;
180  uint8_t minor_version;
181  char file_id[20]; // Filename or SHA1 checksum
182 } cram_file_def;
183 
184 #define CRAM_MAJOR_VERS(v) ((v) >> 8)
185 #define CRAM_MINOR_VERS(v) ((v) & 0xff)
186 
187 struct cram_slice;
188 
190  ERROR = -1,
191  RAW = 0,
192  GZIP = 1,
193  BZIP2 = 2,
194  LZMA = 3,
195  RANS = 4, // Generic; either order
196  RANS0 = 4,
197  RANS1 = 10, // Not externalised; stored as RANS (generic)
198  GZIP_RLE = 11, // NB: not externalised in CRAM
199 };
200 
202  CT_ERROR = -1,
206  UNMAPPED_SLICE = 3, // CRAM V1.0 only
207  EXTERNAL = 4,
208  CORE = 5,
209 };
210 
211 /* Compression metrics */
212 typedef struct {
213  // number of trials and time to next trial
214  int trial;
216 
217  // aggregate sizes during trials
220  int sz_rans0;
221  int sz_rans1;
222  int sz_bzip2;
223  int sz_lzma;
224 
225  // resultant method from trials
226  int method;
227  int strat;
228 
229  // Revisions of method, to allow culling of continually failing ones.
235  int lzma_cnt;
237 
238  double gz_rle_extra;
239  double gz_def_extra;
240  double rans0_extra;
241  double rans1_extra;
242  double bzip2_extra;
243  double lzma_extra;
244 } cram_metrics;
245 
246 /* Block */
247 typedef struct {
248  enum cram_block_method method, orig_method;
249  enum cram_content_type content_type;
250  int32_t content_id;
251  int32_t comp_size;
252  int32_t uncomp_size;
253  uint32_t crc32;
254  int32_t idx; /* offset into data */
255  unsigned char *data;
256 
257  // For bit I/O
258  size_t alloc;
259  size_t byte;
260  int bit;
261 } cram_block;
262 
263 struct cram_codec; /* defined in cram_codecs.h */
264 struct cram_map;
265 
266 #define CRAM_MAP_HASH 32
267 #define CRAM_MAP(a,b) (((a)*3+(b))&(CRAM_MAP_HASH-1))
268 
269 /* Compression header block */
270 typedef struct {
271  int32_t ref_seq_id;
272  int32_t ref_seq_start;
273  int32_t ref_seq_span;
274  int32_t num_records;
275  int32_t num_landmarks;
276  int32_t *landmark;
277 
278  /* Flags from preservation map */
284  int AP_delta;
285  // indexed by ref-base and subst. code
286  char substitution_matrix[5][4];
287 
288  // TD Dictionary as a concatenated block
289  cram_block *TD_blk; // Tag Dictionary
290  int nTL; // number of TL entries in TD
291  unsigned char **TL; // array of size nTL, pointer into TD_blk.
292  khash_t(m_s2i) *TD_hash; // Keyed on TD strings, map to TL[] indices
293  string_alloc_t *TD_keys; // Pooled keys for TD hash.
294 
295  khash_t(map) *preservation_map;
296  struct cram_map *rec_encoding_map[CRAM_MAP_HASH];
297  struct cram_map *tag_encoding_map[CRAM_MAP_HASH];
298 
299  struct cram_codec *codecs[DS_END];
300 
301  char *uncomp; // A single block of uncompressed data
302  size_t uncomp_size, uncomp_alloc;
303 
304  unsigned int data_series; // See cram_fields enum below
306 
307 typedef struct cram_map {
308  int key; /* 0xe0 + 3 bytes */
310  int offset; /* Offset into a single block of memory */
311  int size; /* Size */
312  struct cram_codec *codec;
313  struct cram_map *next; // for noddy internal hash
314 } cram_map;
315 
316 /* Mapped or unmapped slice header block */
317 typedef struct {
318  enum cram_content_type content_type;
319  int32_t ref_seq_id; /* if content_type == MAPPED_SLICE */
320  int32_t ref_seq_start; /* if content_type == MAPPED_SLICE */
321  int32_t ref_seq_span; /* if content_type == MAPPED_SLICE */
322  int32_t num_records;
323  int32_t record_counter;
324  int32_t num_blocks;
327  int32_t ref_base_id; /* if content_type == MAPPED_SLICE */
328  unsigned char md5[16];
330 
331 struct ref_entry;
332 
333 /*
334  * Container.
335  *
336  * Conceptually a container is split into slices, and slices into blocks.
337  * However on disk it's just a list of blocks and we need to query the
338  * block types to identify the start/end points of the slices.
339  *
340  * OR... are landmarks the start/end points of slices?
341  */
342 typedef struct {
343  int32_t length;
344  int32_t ref_seq_id;
345  int32_t ref_seq_start;
346  int32_t ref_seq_span;
347  int32_t record_counter;
348  int64_t num_bases;
349  int32_t num_records;
350  int32_t num_blocks;
351  int32_t num_landmarks;
352  int32_t *landmark;
353 
354  /* Size of container header above */
355  size_t offset;
356 
357  /* Compression header is always the first block? */
360 
361  /* For construction purposes */
362  int max_slice, curr_slice; // maximum number of slices
363  int max_rec, curr_rec; // current and max recs per slice
364  int max_c_rec, curr_c_rec; // current and max recs per container
365  int slice_rec; // rec no. for start of this slice
366  int curr_ref; // current ref ID. -2 for no previous
367  int last_pos; // last record position
368  struct cram_slice **slices, *slice;
369  int pos_sorted; // boolean, 1=>position sorted data
370  int max_apos; // maximum position, used if pos_sorted==0
371  int last_slice; // number of reads in last slice (0 for 1st)
372  int multi_seq; // true if packing multi seqs per cont/slice
373  int unsorted; // true is AP_delta is 0.
374 
375  /* Copied from fd before encoding, to allow multi-threading */
376  int ref_start, first_base, last_base, ref_id, ref_end;
377  char *ref;
378  //struct ref_entry *ref;
379 
380  /* For multi-threading */
382 
383  /* Statistics for encoding */
385 
386  khash_t(s_i2i) *tags_used; // set of tag types in use, for tag encoding map
387  int *refs_used; // array of frequency of ref seq IDs
388 
389  uint32_t crc32; // CRC32
391 
392 /*
393  * A single cram record
394  */
395 typedef struct {
396  struct cram_slice *s; // Filled out by cram_decode only
397 
398  int32_t ref_id; // fixed for all recs in slice?
399  int32_t flags; // BF
400  int32_t cram_flags; // CF
401  int32_t len; // RL
402  int32_t apos; // AP
403  int32_t rg; // RG
404  int32_t name; // RN; idx to s->names_blk
405  int32_t name_len;
406  int32_t mate_line; // index to another cram_record
407  int32_t mate_ref_id;
408  int32_t mate_pos; // NP
409  int32_t tlen; // TS
410 
411  // Auxiliary data
412  int32_t ntags; // TC
413  int32_t aux; // idx to s->aux_blk
414  int32_t aux_size; // total size of packed ntags in aux_blk
415 #ifndef TN_external
416  int32_t TN_idx; // TN; idx to s->TN;
417 #else
418  int32_t tn; // idx to s->tn_blk
419 #endif
420  int TL;
421 
422  int32_t seq; // idx to s->seqs_blk
423  int32_t qual; // idx to s->qual_blk
424  int32_t cigar; // idx to s->cigar
425  int32_t ncigar;
426  int32_t aend; // alignment end
427  int32_t mqual; // MQ
428 
429  int32_t feature; // idx to s->feature
430  int32_t nfeature; // number of features
431  int32_t mate_flags; // MF
432 } cram_record;
433 
434 // Accessor macros as an analogue of the bam ones
435 #define cram_qname(c) (&(c)->s->name_blk->data[(c)->name])
436 #define cram_seq(c) (&(c)->s->seqs_blk->data[(c)->seq])
437 #define cram_qual(c) (&(c)->s->qual_blk->data[(c)->qual])
438 #define cram_aux(c) (&(c)->s->aux_blk->data[(c)->aux])
439 #define cram_seqi(c,i) (cram_seq((c))[(i)])
440 #define cram_name_len(c) ((c)->name_len)
441 #define cram_strand(c) (((c)->flags & BAM_FREVERSE) != 0)
442 #define cram_mstrand(c) (((c)->flags & BAM_FMREVERSE) != 0)
443 #define cram_cigar(c) (&((cr)->s->cigar)[(c)->cigar])
444 
445 /*
446  * A feature is a base difference, used for the sequence reference encoding.
447  * (We generate these internally when writing CRAM.)
448  */
449 typedef struct {
450  union {
451  struct {
452  int pos;
453  int code;
454  int base; // substitution code
455  } X;
456  struct {
457  int pos;
458  int code;
459  int base; // actual base & qual
460  int qual;
461  } B;
462  struct {
463  int pos;
464  int code;
465  int seq_idx; // index to s->seqs_blk
466  int len;
467  } b;
468  struct {
469  int pos;
470  int code;
471  int qual;
472  } Q;
473  struct {
474  int pos;
475  int code;
476  int len;
477  int seq_idx; // soft-clip multiple bases
478  } S;
479  struct {
480  int pos;
481  int code;
482  int len;
483  int seq_idx; // insertion multiple bases
484  } I;
485  struct {
486  int pos;
487  int code;
488  int base; // insertion single base
489  } i;
490  struct {
491  int pos;
492  int code;
493  int len;
494  } D;
495  struct {
496  int pos;
497  int code;
498  int len;
499  } N;
500  struct {
501  int pos;
502  int code;
503  int len;
504  } P;
505  struct {
506  int pos;
507  int code;
508  int len;
509  } H;
510  };
511 } cram_feature;
512 
513 /*
514  * A slice is really just a set of blocks, but it
515  * is the logical unit for decoding a number of
516  * sequences.
517  */
518 typedef struct cram_slice {
523 
524  /* State used during encoding/decoding */
526 
527  /* Identifier used for auto-assigning read names */
528  uint64_t id;
529 
530  /* Array of decoded cram records */
532 
533  /* An dynamically growing buffers for data pointed
534  * to by crecs[] array.
535  */
536  uint32_t *cigar;
537  uint32_t cigar_alloc;
538  uint32_t ncigar;
539 
542  int afeatures; // allocated size of features
543 
544 #ifndef TN_external
545  // TN field (Tag Name)
546  uint32_t *TN;
547  int nTN, aTN; // used and allocated size for TN[]
548 #else
549  cram_block *tn_blk;
550  int tn_id;
551 #endif
552 
553  // For variable sized elements which are always external blocks.
568 
569  string_alloc_t *pair_keys; // Pooled keys for pair hash.
570  khash_t(m_s2i) *pair[2]; // for identifying read-pairs in this slice.
571 
572  char *ref; // slice of current reference
573  int ref_start; // start position of current reference;
574  int ref_end; // end position of current reference;
575  int ref_id;
576 } cram_slice;
577 
578 /*-----------------------------------------------------------------------------
579  * Consider moving reference handling to cram_refs.[ch]
580  */
581 // from fa.fai / samtools faidx files
582 typedef struct ref_entry {
583  char *name;
584  char *fn;
585  int64_t length;
586  int64_t offset;
589  int64_t count; // for shared references so we know to dealloc seq
590  char *seq;
591 } ref_entry;
592 
594 
595 // References structure.
596 typedef struct {
597  string_alloc_t *pool; // String pool for holding filenames and SN vals
598 
599  khash_t(refs) *h_meta; // ref_entry*, index by name
600  ref_entry **ref_id; // ref_entry*, index by ID
601  int nref; // number of ref_entry
602 
603  char *fn; // current file opened
604  BGZF *fp; // and the hFILE* to go with it.
605 
606  int count; // how many cram_fd sharing this refs struct
607 
608  pthread_mutex_t lock; // Mutex for multi-threaded updating
609  ref_entry *last; // Last queried sequence
610  int last_id; // Used in cram_ref_decr_locked to delay free
611 } refs_t;
612 
613 /*-----------------------------------------------------------------------------
614  * CRAM index
615  *
616  * Detect format by number of entries per line.
617  * 5 => 1.0 (refid, start, nseq, C offset, slice)
618  * 6 => 1.1 (refid, start, span, C offset, S offset, S size)
619  *
620  * Indices are stored in a nested containment list, which is trivial to set
621  * up as the indices are on sorted data so we're appending to the nclist
622  * in sorted order. Basically if a slice entirely fits within a previous
623  * slice then we append to that slices list. This is done recursively.
624  *
625  * Lists are sorted on two dimensions: ref id + slice coords.
626  */
627 typedef struct cram_index {
628  int nslice, nalloc; // total number of slices
629  struct cram_index *e; // array of size nslice
630 
631  int refid; // 1.0 1.1
632  int start; // 1.0 1.1
633  int end; // 1.1
634  int nseq; // 1.0 - undocumented
635  int slice; // 1.0 landmark index, 1.1 landmark value
636  int len; // 1.1 - size of slice in bytes
637  int64_t offset; // 1.0 1.1
638 } cram_index;
639 
640 typedef struct {
641  int refid;
642  int start;
643  int end;
644 } cram_range;
645 
646 /*-----------------------------------------------------------------------------
647  */
648 /* CRAM File handle */
649 
650 typedef struct spare_bams {
652  struct spare_bams *next;
653 } spare_bams;
654 
655 typedef struct cram_fd {
656  struct hFILE *fp;
657  int mode; // 'r' or 'w'
658  int version;
661 
662  char *prefix;
665  int err;
666 
667  // Most recent compression header decoded
668  //cram_block_compression_hdr *comp_hdr;
669  //cram_block_slice_hdr *slice_hdr;
670 
671  // Current container being processed.
673 
674  // positions for encoding or decoding
676 
677  // cached reference portion
678  refs_t *refs; // ref meta-data structure
679  char *ref, *ref_free; // current portion held in memory
680  int ref_id;
682  int ref_end;
683  char *ref_fn; // reference fasta filename
684 
685  // compression level and metrics
686  int level;
688 
689  // options
690  int decode_md; // Whether to export MD and NM tags
691  int verbose;
695  int no_ref;
697  int use_bz2;
698  int use_rans;
699  int use_lzma;
701  unsigned int required_fields;
703 
704  // lookup tables, stored here so we can be trivially multi-threaded
705  unsigned int bam_flag_swap[0x1000]; // cram -> bam flags
706  unsigned int cram_flag_swap[0x1000];// bam -> cram flags
707  unsigned char L1[256]; // ACGT{*} ->0123{4}
708  unsigned char L2[256]; // ACGTN{*}->01234{5}
709  char cram_sub_matrix[32][32]; // base substituion codes
710 
711  int index_sz;
712  cram_index *index; // array, sizeof index_sz
714  int eof;
715  int last_slice; // number of recs encoded in last slice
717  int unsorted;
718  int empty_container; // Marker for EOF block
719 
720  // thread pool
721  int own_pool;
724  pthread_mutex_t metrics_lock;
725  pthread_mutex_t ref_lock;
727  pthread_mutex_t bam_list_lock;
728  void *job_pending;
729  int ooc; // out of containers.
730 } cram_fd;
731 
732 // Translation of required fields to cram data series
734  CRAM_BF = 0x00000001,
735  CRAM_AP = 0x00000002,
736  CRAM_FP = 0x00000004,
737  CRAM_RL = 0x00000008,
738  CRAM_DL = 0x00000010,
739  CRAM_NF = 0x00000020,
740  CRAM_BA = 0x00000040,
741  CRAM_QS = 0x00000080,
742  CRAM_FC = 0x00000100,
743  CRAM_FN = 0x00000200,
744  CRAM_BS = 0x00000400,
745  CRAM_IN = 0x00000800,
746  CRAM_RG = 0x00001000,
747  CRAM_MQ = 0x00002000,
748  CRAM_TL = 0x00004000,
749  CRAM_RN = 0x00008000,
750  CRAM_NS = 0x00010000,
751  CRAM_NP = 0x00020000,
752  CRAM_TS = 0x00040000,
753  CRAM_MF = 0x00080000,
754  CRAM_CF = 0x00100000,
755  CRAM_RI = 0x00200000,
756  CRAM_RS = 0x00400000,
757  CRAM_PD = 0x00800000,
758  CRAM_HC = 0x01000000,
759  CRAM_SC = 0x02000000,
760  CRAM_BB = 0x04000000,
761  CRAM_BB_len = 0x08000000,
762  CRAM_QQ = 0x10000000,
763  CRAM_QQ_len = 0x20000000,
764  CRAM_aux= 0x40000000,
765  CRAM_ALL= 0x7fffffff,
766 };
767 
768 // A CIGAR opcode, but not necessarily the implications of it. Eg FC/FP may
769 // encode a base difference, but we don't need to know what it is for CIGAR.
770 // If we have a soft-clip or insertion, we do need SC/IN though to know how
771 // long that array is.
772 #define CRAM_CIGAR (CRAM_FN | CRAM_FP | CRAM_FC | CRAM_DL | CRAM_IN | \
773  CRAM_SC | CRAM_HC | CRAM_PD | CRAM_RS | CRAM_RL | CRAM_BF)
774 
775 #define CRAM_SEQ (CRAM_CIGAR | CRAM_BA | CRAM_QS | CRAM_BS | \
776  CRAM_RL | CRAM_AP | CRAM_BB | CRAM_QQ)
777 
778 /* BF bitfields */
779 /* Corrected in 1.1. Use bam_flag_swap[bf] and BAM_* macros for 1.0 & 1.1 */
780 #define CRAM_FPAIRED 256
781 #define CRAM_FPROPER_PAIR 128
782 #define CRAM_FUNMAP 64
783 #define CRAM_FREVERSE 32
784 #define CRAM_FREAD1 16
785 #define CRAM_FREAD2 8
786 #define CRAM_FSECONDARY 4
787 #define CRAM_FQCFAIL 2
788 #define CRAM_FDUP 1
789 
790 #define DS_aux_S "\001"
791 #define DS_aux_OQ_S "\002"
792 #define DS_aux_BQ_S "\003"
793 #define DS_aux_BD_S "\004"
794 #define DS_aux_BI_S "\005"
795 #define DS_aux_FZ_S "\006"
796 #define DS_aux_oq_S "\007"
797 #define DS_aux_os_S "\010"
798 #define DS_aux_oz_S "\011"
799 
800 #define CRAM_M_REVERSE 1
801 #define CRAM_M_UNMAP 2
802 
803 
804 /* CF bitfields */
805 #define CRAM_FLAG_PRESERVE_QUAL_SCORES (1<<0)
806 #define CRAM_FLAG_DETACHED (1<<1)
807 #define CRAM_FLAG_MATE_DOWNSTREAM (1<<2)
808 
809 #ifdef __cplusplus
810 }
811 #endif
812 
813 #endif /* _CRAM_STRUCTS_H_ */
int32_t mate_ref_id
Definition: cram_structs.h:407
int lzma_cnt
Definition: cram_structs.h:235
Definition: cram_structs.h:755
uint8_t minor_version
Definition: cram_structs.h:180
#define KHASH_MAP_INIT_INT(name, khval_t)
Definition: khash.h:583
cram_fields
Definition: cram_structs.h:733
Definition: cram_structs.h:144
cram_block * aux_BD_blk
Definition: cram_structs.h:562
Definition: cram_structs.h:114
Definition: cram_structs.h:102
Definition: cram_structs.h:128
int eof
Definition: cram_structs.h:714
int32_t len
Definition: cram_structs.h:401
char * fn
Definition: cram_structs.h:603
int32_t cram_flags
Definition: cram_structs.h:400
Definition: cram_structs.h:192
Definition: cram_structs.h:762
unsigned int required_fields
Definition: cram_structs.h:701
Definition: cram_structs.h:763
enum cram_encoding encoding
Definition: cram_structs.h:309
Definition: cram_structs.h:152
int ref_id
Definition: cram_structs.h:680
Definition: cram_structs.h:170
int bzip2_cnt
Definition: cram_structs.h:234
size_t offset
Definition: cram_structs.h:355
int ignore_md5
Definition: cram_structs.h:696
int32_t num_content_ids
Definition: cram_structs.h:325
int32_t ref_base_id
Definition: cram_structs.h:327
int embed_ref
Definition: cram_structs.h:694
Definition: cram_structs.h:138
int32_t * block_content_ids
Definition: cram_structs.h:326
int max_apos
Definition: cram_structs.h:525
Definition: cram_structs.h:121
char * ref
Definition: cram_structs.h:377
Definition: cram_structs.h:190
int seq_idx
Definition: cram_structs.h:465
int32_t ncigar
Definition: cram_structs.h:425
Definition: cram_structs.h:146
int32_t aux
Definition: cram_structs.h:413
int nslice
Definition: cram_structs.h:628
Definition: cram_structs.h:126
int unmapped_qs_included
Definition: cram_structs.h:280
int qual
Definition: cram_structs.h:460
Definition: hfile.h:42
ref_entry * last
Definition: cram_structs.h:609
int64_t count
Definition: cram_structs.h:589
int32_t mate_flags
Definition: cram_structs.h:431
Definition: cram_structs.h:160
int32_t aend
Definition: cram_structs.h:426
int64_t offset
Definition: cram_structs.h:637
Definition: cram_structs.h:206
struct cram_index cram_index
cram_external_type
Definition: cram_structs.h:111
Definition: string_alloc.h:52
int nfeatures
Definition: cram_structs.h:541
int end
Definition: cram_structs.h:643
Definition: cram_structs.h:749
int bases_per_line
Definition: cram_structs.h:587
Definition: cram_structs.h:736
Definition: cram_structs.h:743
cram_block * name_blk
Definition: cram_structs.h:554
int strat
Definition: cram_structs.h:227
int sz_rans1
Definition: cram_structs.h:221
int use_rans
Definition: cram_structs.h:698
Definition: cram_structs.h:156
cram_block * aux_blk
Definition: cram_structs.h:559
Definition: cram_structs.h:761
Definition: cram_structs.h:745
Definition: cram_structs.h:101
int refid
Definition: cram_structs.h:631
int sz_gz_rle
Definition: cram_structs.h:218
int32_t num_landmarks
Definition: cram_structs.h:351
char * ref_free
Definition: cram_structs.h:679
uint32_t ncigar
Definition: cram_structs.h:538
int shared_ref
Definition: cram_structs.h:700
cram_block * comp_hdr_block
Definition: cram_structs.h:359
Definition: cram_structs.h:130
char * seq
Definition: cram_structs.h:590
int nTL
Definition: cram_structs.h:290
struct ref_entry ref_entry
int slice_rec
Definition: cram_structs.h:365
Definition: cram_structs.h:169
int len
Definition: cram_structs.h:466
Definition: cram_structs.h:757
int32_t num_records
Definition: cram_structs.h:349
Definition: cram_structs.h:449
int32_t mqual
Definition: cram_structs.h:427
Definition: cram_structs.h:154
int nsamp
Definition: cram_structs.h:93
struct cram_map cram_map
Definition: cram_structs.h:124
int32_t ref_seq_id
Definition: cram_structs.h:319
int afeatures
Definition: cram_structs.h:542
Definition: cram_structs.h:198
int32_t ntags
Definition: cram_structs.h:412
int refid
Definition: cram_structs.h:641
Definition: cram_structs.h:112
Definition: cram_structs.h:105
int32_t mate_pos
Definition: cram_structs.h:408
int index_sz
Definition: cram_structs.h:711
int32_t record_counter
Definition: cram_structs.h:347
int nref
Definition: cram_structs.h:601
cram_block * qual_blk
Definition: cram_structs.h:556
int sz_lzma
Definition: cram_structs.h:223
Definition: cram_structs.h:208
cram_block ** block
Definition: cram_structs.h:521
uint32_t * cigar
Definition: cram_structs.h:536
Definition: cram_structs.h:140
int offset
Definition: cram_structs.h:310
int32_t tlen
Definition: cram_structs.h:409
Definition: cram_structs.h:108
int use_bz2
Definition: cram_structs.h:697
Definition: cram_structs.h:753
int32_t feature
Definition: cram_structs.h:429
Definition: cram_structs.h:135
Definition: sam_header.h:183
int32_t ref_seq_start
Definition: cram_structs.h:272
int revised_method
Definition: cram_structs.h:236
int sz_bzip2
Definition: cram_structs.h:222
Definition: cram_structs.h:212
int32_t nfeature
Definition: cram_structs.h:430
int32_t ref_seq_span
Definition: cram_structs.h:321
off_t first_container
Definition: cram_structs.h:713
Definition: cram_structs.h:167
Definition: cram_structs.h:759
Definition: cram_structs.h:153
Definition: cram_structs.h:113
SAM_hdr * header
Definition: cram_structs.h:660
int32_t ref_seq_start
Definition: cram_structs.h:320
int32_t * landmark
Definition: cram_structs.h:276
Definition: thread_pool.h:81
int seqs_per_slice
Definition: cram_structs.h:692
int no_ref
Definition: cram_structs.h:695
int count
Definition: cram_structs.h:606
size_t alloc
Definition: cram_structs.h:258
int multi_seq
Definition: cram_structs.h:372
Definition: cram_structs.h:165
#define KHASH_MAP_INIT_STR(name, khval_t)
Definition: khash.h:614
t_pool * pool
Definition: cram_structs.h:722
Definition: cram_structs.h:754
int32_t name_len
Definition: cram_structs.h:405
int pos
Definition: cram_structs.h:452
int slice
Definition: cram_structs.h:635
int ref_start
Definition: cram_structs.h:681
#define CRAM_MAP_HASH
Definition: cram_structs.h:266
Definition: cram_structs.h:125
bam_seq_t ** bams
Definition: cram_structs.h:381
Definition: cram_structs.h:158
double lzma_extra
Definition: cram_structs.h:243
Definition: cram_codecs.h:111
cram_block * aux_oz_blk
Definition: cram_structs.h:567
int ref_end
Definition: cram_structs.h:682
int32_t idx
Definition: cram_structs.h:254
int read_names_included
Definition: cram_structs.h:283
int key
Definition: cram_structs.h:308
Definition: cram_structs.h:171
double bzip2_extra
Definition: cram_structs.h:242
int32_t seq
Definition: cram_structs.h:422
Definition: cram_structs.h:127
bam_seq_t ** bams
Definition: cram_structs.h:651
cram_block * seqs_blk
Definition: cram_structs.h:555
Definition: cram_structs.h:123
Definition: cram_structs.h:756
int last_slice
Definition: cram_structs.h:715
cram_range range
Definition: cram_structs.h:702
cram_block * hdr_block
Definition: cram_structs.h:520
Definition: cram_structs.h:760
int32_t TN_idx
Definition: cram_structs.h:416
Definition: cram_structs.h:518
Definition: cram_structs.h:115
int32_t name
Definition: cram_structs.h:404
Definition: cram_structs.h:100
cram_block * base_blk
Definition: cram_structs.h:557
int32_t * landmark
Definition: cram_structs.h:352
unsigned char ** TL
Definition: cram_structs.h:291
int use_lzma
Definition: cram_structs.h:699
char cram_sub_matrix[32][32]
Definition: cram_structs.h:709
cram_block * aux_OQ_blk
Definition: cram_structs.h:560
int i
Definition: cram_structs.h:72
int line_length
Definition: cram_structs.h:588
Definition: cram_structs.h:177
int nvals
Definition: cram_structs.h:94
string_alloc_t * pool
Definition: cram_structs.h:597
int32_t qual
Definition: cram_structs.h:423
int nalloc
Definition: cram_structs.h:628
cram_block_method
Definition: cram_structs.h:189
Definition: sam.h:170
Definition: cram_structs.h:151
Definition: cram_structs.h:149
int code
Definition: cram_structs.h:453
uint32_t * TN
Definition: cram_structs.h:546
Definition: cram_structs.h:734
int32_t ref_id
Definition: cram_structs.h:398
#define H(x, y, z)
Definition: md5.c:53
cram_block_compression_hdr * comp_hdr
Definition: cram_structs.h:358
Definition: cram_structs.h:197
int32_t num_records
Definition: cram_structs.h:322
Definition: cram_structs.h:173
Definition: cram_structs.h:103
Definition: cram_structs.h:164
Definition: cram_structs.h:204
Definition: cram_structs.h:742
refs_t * refs
Definition: cram_structs.h:678
cram_block_slice_hdr * hdr
Definition: cram_structs.h:519
int ref_start
Definition: cram_structs.h:573
Definition: cram_structs.h:202
int32_t mate_line
Definition: cram_structs.h:406
int64_t length
Definition: cram_structs.h:585
Definition: cram_structs.h:750
cram_index * index
Definition: cram_structs.h:712
Definition: cram_structs.h:764
struct cram_slice * s
Definition: cram_structs.h:396
int32_t aux_size
Definition: cram_structs.h:414
int empty_container
Definition: cram_structs.h:718
Definition: cram_structs.h:133
Definition: cram_structs.h:141
int unmapped_placed
Definition: cram_structs.h:281
cram_block * aux_BI_blk
Definition: cram_structs.h:563
string_alloc_t * pair_keys
Definition: cram_structs.h:569
Definition: cram_structs.h:747
int aTN
Definition: cram_structs.h:547
Definition: cram_structs.h:758
char * uncomp
Definition: cram_structs.h:301
Definition: cram_structs.h:129
Definition: cram_structs.h:640
int base
Definition: cram_structs.h:454
int last_apos
Definition: cram_structs.h:525
int32_t length
Definition: cram_structs.h:343
Definition: cram_structs.h:142
int last_slice
Definition: cram_structs.h:371
cram_DS_ID
Definition: cram_structs.h:120
Definition: cram_structs.h:735
uint32_t cigar_alloc
Definition: cram_structs.h:537
int sz_rans0
Definition: cram_structs.h:220
int start
Definition: cram_structs.h:642
Definition: cram_structs.h:765
char * prefix
Definition: cram_structs.h:662
int own_pool
Definition: cram_structs.h:721
Definition: cram_structs.h:270
Definition: cram_structs.h:596
unsigned char uc
Definition: cram_structs.h:66
int max_rec
Definition: cram_structs.h:363
int * refs_used
Definition: cram_structs.h:387
Definition: cram_structs.h:196
int sz_gz_def
Definition: cram_structs.h:219
Definition: cram_structs.h:147
double rans1_extra
Definition: cram_structs.h:241
int64_t offset
Definition: cram_structs.h:586
int unsorted
Definition: cram_structs.h:717
Definition: cram_structs.h:193
Definition: cram_structs.h:195
char * ref
Definition: cram_structs.h:572
Definition: cram_structs.h:395
int ref_id
Definition: cram_structs.h:575
int slices_per_container
Definition: cram_structs.h:693
Definition: cram_structs.h:247
double gz_rle_extra
Definition: cram_structs.h:238
int32_t comp_size
Definition: cram_structs.h:251
int gz_rle_cnt
Definition: cram_structs.h:230
cram_metrics * m[DS_END]
Definition: cram_structs.h:687
int32_t content_id
Definition: cram_structs.h:250
unsigned char * data
Definition: cram_structs.h:255
struct cram_codec * codec
Definition: cram_structs.h:312
int AP_delta
Definition: cram_structs.h:284
uint8_t major_version
Definition: cram_structs.h:179
int last_base
Definition: cram_structs.h:675
Definition: cram_structs.h:203
Definition: cram_structs.h:582
int ref_start
Definition: cram_structs.h:376
unsigned int cram_flag_swap[0x1000]
Definition: cram_structs.h:706
cram_record * crecs
Definition: cram_structs.h:531
int32_t num_blocks
Definition: cram_structs.h:324
struct cram_fd cram_fd
int32_t flags
Definition: cram_structs.h:399
Definition: cram_structs.h:122
int mapped_qs_included
Definition: cram_structs.h:279
struct cram_map * next
Definition: cram_structs.h:313
Definition: cram_structs.h:751
cram_block * soft_blk
Definition: cram_structs.h:558
int32_t record_counter
Definition: cram_structs.h:323
int next_trial
Definition: cram_structs.h:215
Definition: cram_structs.h:143
int bit
Definition: cram_structs.h:260
cram_block ** block_by_id
Definition: cram_structs.h:522
Definition: cram_structs.h:106
int max_c_rec
Definition: cram_structs.h:364
Definition: cram_structs.h:90
int max_apos
Definition: cram_structs.h:370
int32_t ref_seq_id
Definition: cram_structs.h:271
int last_id
Definition: cram_structs.h:610
char * ref
Definition: cram_structs.h:679
int max_slice
Definition: cram_structs.h:362
int unsorted
Definition: cram_structs.h:373
Definition: cram_structs.h:205
int len
Definition: cram_structs.h:636
Definition: cram_structs.h:740
Definition: cram_structs.h:104
int ref_end
Definition: cram_structs.h:574
Definition: cram_structs.h:134
Definition: cram_structs.h:71
cram_block * TD_blk
Definition: cram_structs.h:289
int32_t ref_seq_start
Definition: cram_structs.h:345
char * name
Definition: cram_structs.h:583
Definition: cram_structs.h:194
Definition: cram_structs.h:166
pthread_mutex_t ref_lock
Definition: cram_structs.h:725
int decode_md
Definition: cram_structs.h:690
int32_t ref_seq_span
Definition: cram_structs.h:273
size_t byte
Definition: cram_structs.h:259
Definition: cram_structs.h:342
Definition: cram_structs.h:139
Definition: cram_structs.h:150
#define khash_t(name)
Definition: khash.h:422
Definition: cram_structs.h:148
Definition: cram_structs.h:207
Definition: cram_structs.h:627
Definition: cram_structs.h:744
Definition: cram_structs.h:116
struct spare_bams * next
Definition: cram_structs.h:652
Definition: cram_structs.h:738
void * job_pending
Definition: cram_structs.h:728
unsigned int bam_flag_swap[0x1000]
Definition: cram_structs.h:705
struct spare_bams spare_bams
ref_entry ** ref_id
Definition: cram_structs.h:600
cram_feature * features
Definition: cram_structs.h:540
unsigned char L1[256]
Definition: cram_structs.h:707
unsigned int data_series
Definition: cram_structs.h:304
Definition: cram_structs.h:159
struct cram_slice ** slices
Definition: cram_structs.h:368
char * ref_fn
Definition: cram_structs.h:683
char * p
Definition: cram_structs.h:73
Definition: cram_structs.h:99
Definition: cram_structs.h:650
Definition: cram_structs.h:132
int32_t uncomp_size
Definition: cram_structs.h:252
int qs_included
Definition: cram_structs.h:282
Definition: cram_structs.h:145
Definition: cram_structs.h:317
int slice_num
Definition: cram_structs.h:664
Definition: cram_structs.h:191
Definition: thread_pool.h:108
cram_encoding
Definition: cram_structs.h:98
size_t uncomp_size
Definition: cram_structs.h:302
Definition: cram_structs.h:137
int64_t num_bases
Definition: cram_structs.h:348
int trial
Definition: cram_structs.h:214
pthread_mutex_t metrics_lock
Definition: cram_structs.h:724
int32_t num_blocks
Definition: cram_structs.h:350
struct hFILE * fp
Definition: cram_structs.h:656
int curr_ref
Definition: cram_structs.h:366
Definition: cram_structs.h:739
cram_block * aux_oq_blk
Definition: cram_structs.h:565
cram_block * aux_os_blk
Definition: cram_structs.h:566
int nseq
Definition: cram_structs.h:634
int last_pos
Definition: cram_structs.h:367
int32_t num_records
Definition: cram_structs.h:274
int version
Definition: cram_structs.h:658
#define KHASH_SET_INIT_INT(name)
Definition: khash.h:575
t_results_queue * rqueue
Definition: cram_structs.h:723
uint32_t crc32
Definition: cram_structs.h:253
Definition: cram_structs.h:746
int32_t ref_seq_id
Definition: cram_structs.h:344
int nTN
Definition: cram_structs.h:547
double gz_def_extra
Definition: cram_structs.h:239
int TL
Definition: cram_structs.h:420
int level
Definition: cram_structs.h:686
int pos_sorted
Definition: cram_structs.h:369
BGZF * fp
Definition: cram_structs.h:604
cram_file_def * file_def
Definition: cram_structs.h:659
Definition: cram_structs.h:157
struct cram_slice cram_slice
#define MAX_STAT_VAL
Definition: cram_structs.h:88
Definition: cram_structs.h:748
Definition: cram_structs.h:752
int end
Definition: cram_structs.h:633
int gz_def_cnt
Definition: cram_structs.h:231
pthread_mutex_t lock
Definition: cram_structs.h:608
cram_content_type
Definition: cram_structs.h:201
khash_t(m_s2i)*pair[2]
unsigned char L2[256]
Definition: cram_structs.h:708
pthread_mutex_t bam_list_lock
Definition: cram_structs.h:727
cram_block * aux_BQ_blk
Definition: cram_structs.h:561
int rans1_cnt
Definition: cram_structs.h:233
int mode
Definition: cram_structs.h:657
cram_block * aux_FZ_blk
Definition: cram_structs.h:564
Definition: cram_structs.h:655
int32_t cigar
Definition: cram_structs.h:424
int multi_seq
Definition: cram_structs.h:716
uint64_t id
Definition: cram_structs.h:528
struct cram_index * e
Definition: cram_structs.h:629
int verbose
Definition: cram_structs.h:691
int record_counter
Definition: cram_structs.h:663
int err
Definition: cram_structs.h:665
int size
Definition: cram_structs.h:311
Definition: cram_structs.h:162
Definition: cram_structs.h:107
Definition: cram_structs.h:741
int32_t ref_seq_span
Definition: cram_structs.h:346
int32_t apos
Definition: cram_structs.h:402
string_alloc_t * TD_keys
Definition: cram_structs.h:293
Definition: cram_structs.h:737
uint32_t crc32
Definition: cram_structs.h:389
double rans0_extra
Definition: cram_structs.h:240
spare_bams * bl
Definition: cram_structs.h:726
Definition: bgzf.h:49
int32_t num_landmarks
Definition: cram_structs.h:275
int method
Definition: cram_structs.h:226
char * fn
Definition: cram_structs.h:584
#define I(x, y, z)
Definition: md5.c:54
int first_base
Definition: cram_structs.h:675
Definition: cram_structs.h:131
cram_container * ctr
Definition: cram_structs.h:672
int32_t rg
Definition: cram_structs.h:403
int start
Definition: cram_structs.h:632
int rans0_cnt
Definition: cram_structs.h:232
Definition: cram_structs.h:307
Definition: cram_structs.h:155
int ooc
Definition: cram_structs.h:729