Gamgee
You miserable little maggot. I'll stove your head in!
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
cram_io.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2012-2014 Genome Research Ltd.
3 Author: James Bonfield <jkb@sanger.ac.uk>
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 
8  1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 
11  2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
14 
15  3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16 Institute nor the names of its contributors may be used to endorse or promote
17 products derived from this software without specific prior written permission.
18 
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 
42 #ifndef _CRAM_IO_H_
43 #define _CRAM_IO_H_
44 
45 #ifdef __cplusplus
46 extern "C" {
47 #endif
48 
49 #define ITF8_MACROS
50 
51 #include <stdint.h>
52 #include <cram/misc.h>
53 
61 #define CRAM_KEY(a,b) (((a)<<8)|((b)))
62 
70 int itf8_decode(cram_fd *fd, int32_t *val);
71 
72 #ifndef ITF8_MACROS
73 
80 int itf8_get(char *cp, int32_t *val_p);
81 
88 int itf8_put(char *cp, int32_t val);
89 
90 #else
91 
92 /*
93  * Macro implementations of the above
94  */
95 #define itf8_get(c,v) (((uc)(c)[0]<0x80)?(*(v)=(uc)(c)[0],1):(((uc)(c)[0]<0xc0)?(*(v)=(((uc)(c)[0]<<8)|(uc)(c)[1])&0x3fff,2):(((uc)(c)[0]<0xe0)?(*(v)=(((uc)(c)[0]<<16)|((uc)(c)[1]<<8)|(uc)(c)[2])&0x1fffff,3):(((uc)(c)[0]<0xf0)?(*(v)=(((uc)(c)[0]<<24)|((uc)(c)[1]<<16)|((uc)(c)[2]<<8)|(uc)(c)[3])&0x0fffffff,4):(*(v)=(((uc)(c)[0]&0x0f)<<28)|((uc)(c)[1]<<20)|((uc)(c)[2]<<12)|((uc)(c)[3]<<4)|((uc)(c)[4]&0x0f),5)))))
96 
97 #define itf8_put(c,v) ((!((v)&~0x7f))?((c)[0]=(v),1):(!((v)&~0x3fff))?((c)[0]=((v)>>8)|0x80,(c)[1]=(v)&0xff,2):(!((v)&~0x1fffff))?((c)[0]=((v)>>16)|0xc0,(c)[1]=((v)>>8)&0xff,(c)[2]=(v)&0xff,3):(!((v)&~0xfffffff))?((c)[0]=((v)>>24)|0xe0,(c)[1]=((v)>>16)&0xff,(c)[2]=((v)>>8)&0xff,(c)[3]=(v)&0xff,4):((c)[0]=0xf0|(((v)>>28)&0xff),(c)[1]=((v)>>20)&0xff,(c)[2]=((v)>>12)&0xff,(c)[3]=((v)>>4)&0xff,(c)[4]=(v)&0xf,5))
98 
99 #define itf8_size(v) ((!((v)&~0x7f))?1:(!((v)&~0x3fff))?2:(!((v)&~0x1fffff))?3:(!((v)&~0xfffffff))?4:5)
100 
101 #endif
102 
111 int itf8_put_blk(cram_block *blk, int val);
112 
129 cram_block *cram_new_block(enum cram_content_type content_type,
130  int content_id);
131 
139 
146 int cram_write_block(cram_fd *fd, cram_block *b);
147 
150 void cram_free_block(cram_block *b);
151 
158 char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size);
159 
167 
181 int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics,
182  int method, int level);
183 
187 
188 /* --- Accessor macros for manipulating blocks on a byte by byte basis --- */
189 
190 /* Block size and data pointer. */
191 #define BLOCK_SIZE(b) ((b)->byte)
192 #define BLOCK_DATA(b) ((b)->data)
193 
194 /* Returns the address one past the end of the block */
195 #define BLOCK_END(b) (&(b)->data[(b)->byte])
196 
197 /* Request block to be at least 'l' bytes long */
198 #define BLOCK_RESIZE(b,l) \
199  do { \
200  while((b)->alloc <= (l)) { \
201  (b)->alloc = (b)->alloc ? (b)->alloc*1.5 : 1024; \
202  (b)->data = realloc((b)->data, (b)->alloc); \
203  } \
204  } while(0)
205 
206 /* Ensure the block can hold at least another 'l' bytes */
207 #define BLOCK_GROW(b,l) BLOCK_RESIZE((b), BLOCK_SIZE((b)) + (l))
208 
209 /* Append string 's' of length 'l' */
210 #define BLOCK_APPEND(b,s,l) \
211  do { \
212  BLOCK_GROW((b),(l)); \
213  memcpy(BLOCK_END((b)), (s), (l)); \
214  BLOCK_SIZE((b)) += (l); \
215  } while (0)
216 
217 /* Append as single character 'c' */
218 #define BLOCK_APPEND_CHAR(b,c) \
219  do { \
220  BLOCK_GROW((b),1); \
221  (b)->data[(b)->byte++] = (c); \
222  } while (0)
223 
224 /* Append a single unsigned integer */
225 #define BLOCK_APPEND_UINT(b,i) \
226  do { \
227  unsigned char *cp; \
228  BLOCK_GROW((b),11); \
229  cp = &(b)->data[(b)->byte]; \
230  (b)->byte += append_uint(cp, (i)) - cp; \
231  } while (0)
232 
233 static inline unsigned char *append_uint(unsigned char *cp, int32_t i) {
234  int32_t j;
235 
236  if (i == 0) {
237  *cp++ = '0';
238  return cp;
239  }
240 
241  if (i < 100) goto b1;
242  if (i < 10000) goto b3;
243  if (i < 1000000) goto b5;
244  if (i < 100000000) goto b7;
245 
246  if ((j = i / 1000000000)) {*cp++ = j + '0'; i -= j*1000000000; goto x8;}
247  if ((j = i / 100000000)) {*cp++ = j + '0'; i -= j*100000000; goto x7;}
248  b7:if ((j = i / 10000000)) {*cp++ = j + '0'; i -= j*10000000; goto x6;}
249  if ((j = i / 1000000)) {*cp++ = j + '0', i -= j*1000000; goto x5;}
250  b5:if ((j = i / 100000)) {*cp++ = j + '0', i -= j*100000; goto x4;}
251  if ((j = i / 10000)) {*cp++ = j + '0', i -= j*10000; goto x3;}
252  b3:if ((j = i / 1000)) {*cp++ = j + '0', i -= j*1000; goto x2;}
253  if ((j = i / 100)) {*cp++ = j + '0', i -= j*100; goto x1;}
254  b1:if ((j = i / 10)) {*cp++ = j + '0', i -= j*10; goto x0;}
255  if (i) *cp++ = i + '0';
256  return cp;
257 
258  x8: *cp++ = i / 100000000 + '0', i %= 100000000;
259  x7: *cp++ = i / 10000000 + '0', i %= 10000000;
260  x6: *cp++ = i / 1000000 + '0', i %= 1000000;
261  x5: *cp++ = i / 100000 + '0', i %= 100000;
262  x4: *cp++ = i / 10000 + '0', i %= 10000;
263  x3: *cp++ = i / 1000 + '0', i %= 1000;
264  x2: *cp++ = i / 100 + '0', i %= 100;
265  x1: *cp++ = i / 10 + '0', i %= 10;
266  x0: *cp++ = i + '0';
267 
268  return cp;
269 }
270 
271 #define BLOCK_UPLEN(b) \
272  (b)->comp_size = (b)->uncomp_size = BLOCK_SIZE((b))
273 
285 int cram_load_reference(cram_fd *fd, char *fn);
286 
296 int refs2id(refs_t *r, SAM_hdr *bfd);
297 
298 void refs_free(refs_t *r);
299 
313 char *cram_get_ref(cram_fd *fd, int id, int start, int end);
314 void cram_ref_incr(refs_t *r, int id);
315 void cram_ref_decr(refs_t *r, int id);
328 cram_container *cram_new_container(int nrec, int nslice);
330 
338 
346 
359 
360 
373 
376 
377 
385 
387 void cram_free_slice(cram_slice *s);
388 
396 cram_slice *cram_new_slice(enum cram_content_type type, int nrecs);
397 
411 
412 
413 
426 
434 
437 
438 
454 
461 int cram_write_SAM_hdr(cram_fd *fd, SAM_hdr *hdr);
462 
463 
477 cram_fd *cram_open(const char *filename, const char *mode);
478 
485 cram_fd *cram_dopen(struct hFILE *fp, const char *filename, const char *mode);
486 
493 int cram_close(cram_fd *fd);
494 
495 /*
496  * Seek within a CRAM file.
497  *
498  * Returns 0 on success
499  * -1 on failure
500  */
501 int cram_seek(cram_fd *fd, off_t offset, int whence);
502 
503 /*
504  * Flushes a CRAM file.
505  * Useful for when writing to stdout without wishing to close the stream.
506  *
507  * Returns 0 on success
508  * -1 on failure
509  */
510 int cram_flush(cram_fd *fd);
511 
519 int cram_eof(cram_fd *fd);
520 
530 int cram_set_option(cram_fd *fd, enum cram_option opt, ...);
531 
541 int cram_set_voption(cram_fd *fd, enum cram_option opt, va_list args);
542 
554 int cram_set_header(cram_fd *fd, SAM_hdr *hdr);
555 
556 
557 #ifdef __cplusplus
558 }
559 #endif
560 
561 #endif /* _CRAM_IO_H_ */
int cram_write_SAM_hdr(cram_fd *fd, SAM_hdr *hdr)
Definition: cram_io.c:3391
char * zlib_mem_inflate(char *cdata, size_t csize, size_t *size)
Definition: cram_io.c:528
cram_metrics * cram_new_metrics(void)
Definition: cram_io.c:1376
int cram_write_container(cram_fd *fd, cram_container *h)
Definition: cram_io.c:2697
void cram_free_container(cram_container *c)
Definition: cram_io.c:2518
int cram_write_block(cram_fd *fd, cram_block *b)
Definition: cram_io.c:840
int cram_flush(cram_fd *fd)
Definition: cram_io.c:3859
SAM_hdr * cram_read_SAM_hdr(cram_fd *fd)
Definition: cram_io.c:3263
Definition: hfile.h:42
#define itf8_get(c, v)
Definition: cram_io.h:95
int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics, int method, int level)
Definition: cram_io.c:1057
char * cram_get_ref(cram_fd *fd, int id, int start, int end)
Definition: cram_io.c:2237
#define itf8_put(c, v)
Definition: cram_io.h:97
cram_block * cram_read_block(cram_fd *fd)
Definition: cram_io.c:767
void cram_free_block(cram_block *b)
Definition: cram_io.c:884
cram_container * cram_new_container(int nrec, int nslice)
Definition: cram_io.c:2465
cram_block * cram_new_block(enum cram_content_type content_type, int content_id)
Definition: cram_io.c:744
cram_fd * cram_open(const char *filename, const char *mode)
Definition: cram_io.c:3686
cram_fd * cram_dopen(struct hFILE *fp, const char *filename, const char *mode)
Definition: cram_io.c:3711
Definition: sam_header.h:183
Definition: cram_structs.h:212
int cram_flush_container(cram_fd *fd, cram_container *c)
Definition: cram_io.c:2789
enum @17 mode
int cram_seek(cram_fd *fd, off_t offset, int whence)
Definition: cram_io.c:3830
cram_slice * cram_new_slice(enum cram_content_type type, int nrecs)
Definition: cram_io.c:3057
Definition: cram_structs.h:518
Definition: cram_structs.h:177
cram_block_method
Definition: cram_structs.h:189
int cram_set_header(cram_fd *fd, SAM_hdr *hdr)
Definition: cram_io.c:1804
char * cram_block_method2str(enum cram_block_method m)
Definition: cram_io.c:1389
void cram_free_compression_header(cram_block_compression_hdr *hdr)
Definition: cram_io.c:2904
Definition: cram_structs.h:270
Definition: cram_structs.h:596
int cram_uncompress_block(cram_block *b)
Definition: cram_io.c:895
int cram_write_file_def(cram_fd *fd, cram_file_def *def)
Definition: cram_io.c:3242
Definition: cram_structs.h:247
cram_block_compression_hdr * cram_new_compression_header(void)
Definition: cram_io.c:2878
int cram_flush_container_mt(cram_fd *fd, cram_container *c)
Definition: cram_io.c:2852
int itf8_decode(cram_fd *fd, int32_t *val)
Definition: cram_io.c:106
void cram_free_slice(cram_slice *s)
Definition: cram_io.c:2967
int itf8_put_blk(cram_block *blk, int val)
Definition: cram_io.c:459
Definition: cram_structs.h:342
char * cram_content_type2str(enum cram_content_type t)
Definition: cram_io.c:1403
cram_option
Definition: hts.h:140
cram_slice * cram_read_slice(cram_fd *fd)
Definition: cram_io.c:3120
Definition: cram_structs.h:317
int cram_close(cram_fd *fd)
Definition: cram_io.c:3878
int cram_set_voption(cram_fd *fd, enum cram_option opt, va_list args)
Definition: cram_io.c:4010
cram_container * cram_read_container(cram_fd *fd)
Definition: cram_io.c:2560
int cram_eof(cram_fd *fd)
Definition: cram_io.c:3980
int cram_load_reference(cram_fd *fd, char *fn)
Definition: cram_io.c:2430
void cram_ref_incr(refs_t *r, int id)
Definition: cram_io.c:2036
cram_file_def * cram_read_file_def(cram_fd *fd)
Definition: cram_io.c:3208
void cram_free_slice_header(cram_block_slice_hdr *hdr)
Definition: cram_io.c:2955
cram_content_type
Definition: cram_structs.h:201
Definition: cram_structs.h:655
int refs2id(refs_t *r, SAM_hdr *bfd)
Definition: cram_io.c:1711
void cram_ref_decr(refs_t *r, int id)
Definition: cram_io.c:2068
void cram_free_file_def(cram_file_def *def)
Definition: cram_io.c:3246
void refs_free(refs_t *r)
Definition: cram_io.c:1462
int cram_set_option(cram_fd *fd, enum cram_option opt,...)
Definition: cram_io.c:3992