-
Notifications
You must be signed in to change notification settings - Fork 10
/
pbwt.h
136 lines (115 loc) · 2.88 KB
/
pbwt.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#ifndef PBWT_H
#define PBWT_H
#include <stdint.h>
typedef struct { // full codec
int32_t m, l, *S0, *S;
uint8_t *u;
} pbc_t;
typedef struct {
uint32_t r;
uint32_t i;
} pbs_dat_t;
struct pbf_s;
typedef struct pbf_s pbf_t;
#ifdef __cplusplus
extern "C" {
#endif
/***********************************
* File-based high-level functions *
***********************************/
/**
* Open PBF file for write
*
* @param fn file name. NULL or "-" for stdout
* @param m number of columns
* @param g number of groups
* @param shift keeping S every 1<<shift rows
*/
pbf_t *pbf_open_w(const char *fn, int m, int g, int shift);
/**
* Open PBF for read
*
* @param fn file name. NULL or "-" for stdin
*/
pbf_t *pbf_open_r(const char *fn);
/**
* Close a PBF file handler and deallocate memory
*
* @param pb PBF file handler
*/
int pbf_close(pbf_t *pb);
/**
* Write one group to PBF
*
* @param pb PBF file handler
* @param a g-by-m matrix to encode
*/
int pbf_write(pbf_t *pb, uint8_t *const*a);
/**
* Read one group from PBF
*
* @param pb PBF file handler
* @return g-by-m matrix of decoded bits if pbf_subset() has not been called
* or g-by-n_sub matrix if called.
*/
const uint8_t **pbf_read(pbf_t *pb);
/**
* Seek to a specified row
*
* @param pb PBF file handler
* @param k seek to the k-th row
*/
int pbf_seek(pbf_t *pb, uint64_t k);
/**
* Specify a subset of columns to decode
*
* @param pb PBF file handler
* @param n_sub number of columns to decode; 0 for decoding all columns
* @param sub indicies of columns to decode
*/
int pbf_subset(pbf_t *fp, int n_sub, int *sub);
int pbf_get_g(const pbf_t *pb);
int pbf_get_m(const pbf_t *pb);
int pbf_get_n(const pbf_t *pb);
int pbf_get_shift(const pbf_t *pb);
/***********************
* Low-level functions *
***********************/
/**
* Initialize a PBWT codec with $m columns
*
* @param m number of columns
* @return pointer to a pbc_t struct. It should be freed with free().
*/
pbc_t *pbc_init(int m);
/**
* Encode a bit string
*
* @param pb codec
* @param a bit string
* @return The transformed run-length encoded string is kept in pb->u. pb->l
* gives the length of the encoded string.
*/
void pbc_enc(pbc_t *pb, const uint8_t *a);
/**
* Decode a transformed run-length encoded string
*
* The decoded string is stored in pb->u.
*
* @param pb codec
* @param b encoded string generated by pbc_enc()
*/
void pbc_dec(pbc_t *pb, const uint8_t *b);
/**
* Decode a subset of columns without decoding all columns
*
* @param m total number of columns
* @param n_sub number of columns to decode
* @param sub S(sub[i].r)=sub[i].S gives the column index to decode
* @param u encoded string generated by pbc_enc()
*/
void pbs_dec(int m, int n_sub, pbs_dat_t *sub, const uint8_t *u, uint8_t *a);
#ifdef __cplusplus
}
#endif
#endif