-
Notifications
You must be signed in to change notification settings - Fork 10
/
bgt.h
129 lines (105 loc) · 3 KB
/
bgt.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#ifndef BGT_H
#define BGT_H
#include "vcf.h"
#include "pbwt.h"
#include "fmf.h"
#define BGT_F_SET_AC 0x0001
#define BGT_F_NO_GT 0x0002
#define BGT_F_CNT_AL 0x0004
#define BGT_F_CNT_HAP 0x0008
#define BGT_MAX_GROUPS 32
#define BGT_MAX_ALLELES 64
#define BGT_SET_ALL_SAMPLES (-1)
typedef struct {
char *prefix;
fmf_t *f;
bcf_hdr_t *h0; // site-only BCF header
hts_idx_t *idx; // BCF index
int32_t *mgs;
} bgt_file_t;
typedef struct {
const bgt_file_t *f;
pbf_t *pb;
BGZF *bcf;
bcf1_t *b0; // site-only BCF record
hts_itr_t *itr;
const void *bed;
int bed_excl, n_out, n_groups, mgs_def, *out;
uint32_t *group, *gtag;
bcf_hdr_t *h_out;
const void *h_al; // hash table for alleles; to be set by bgtm
} bgt_t;
typedef struct { // during reading, these are all links
const bcf1_t *b0;
const uint8_t *a[2];
} bgt_rec_t;
typedef struct {
int32_t ac[2], an, n_groups;
int32_t gan[BGT_MAX_GROUPS], gac[BGT_MAX_GROUPS][2];
} bgt_info_t;
typedef struct {
kstring_t chr;
char *al;
int rid, pos, rlen;
} bgt_allele_t;
typedef struct {
uint64_t hap;
int tot, *cnt;
} bgt_hapcnt_t;
typedef struct {
int n_bgt, n_out, n_groups, flag;
uint64_t n_gt_read;
uint64_t *sample_idx;
uint32_t *group;
int32_t *mgs, mgs_def;
bgt_t **bgt;
bgt_rec_t *r;
kexpr_t *site_flt;
bcf_hdr_t *h_out;
uint8_t *a[2];
int n_fields;
kexpr_t **fields;
kstring_t tbl_line;
int n_aal;
bgt_allele_t *aal;
void *h_al;
int *alcnt;
uint64_t *hap;
} bgtm_t;
extern int bgt_no_file;
#ifdef __cplusplus
extern "C" {
#endif
bgt_file_t *bgt_open(const char *prefix);
void bgt_close(bgt_file_t *bgt);
bgt_t *bgt_reader_init(const bgt_file_t *bf);
void bgt_reader_destroy(bgt_t *bgt);
void bgt_set_bed(bgt_t *bgt, const void *bed, int excl);
int bgt_set_region(bgt_t *bgt, const char *reg);
int bgt_set_start(bgt_t *bgt, int64_t n);
int bgt_read(bgt_t *bgt, bcf1_t *b);
bgtm_t *bgtm_reader_init(int n_files, bgt_file_t *const*fns);
void bgtm_reader_destroy(bgtm_t *bm);
void bgtm_set_flag(bgtm_t *bm, int flag);
int bgtm_set_flt_site(bgtm_t *bm, const char *expr);
void bgtm_set_bed(bgtm_t *bm, const void *bed, int excl);
int bgtm_set_region(bgtm_t *bm, const char *reg);
int bgtm_set_start(bgtm_t *bm, int64_t n);
int bgtm_set_table(bgtm_t *bm, const char *fmt);
int bgtm_set_alleles(bgtm_t *bm, const char *expr, const fmf_t *f, const char *fn); // call this AFTER bgtm_set_region()
int bgtm_set_mgs(bgtm_t *bm, int mgs_def);
int bgtm_add_group(bgtm_t *bm, const char *expr);
int bgtm_add_allele(bgtm_t *bm, const char *al);
int bgtm_prepare(bgtm_t *bm);
int bgtm_test_mgs(const bgtm_t *bm);
int bgtm_read(bgtm_t *bm, bcf1_t *b);
bgt_hapcnt_t *bgtm_hapcnt(const bgtm_t *bm, int *n_hap);
char *bgtm_hapcnt_print_destroy(const bgtm_t *bm, int n_hap, bgt_hapcnt_t *hc);
char *bgtm_alcnt_print(const bgtm_t *bm);
int bgt_al_parse(const char *al, bgt_allele_t *a);
void bgt_al_format(const bgt_allele_t *a, kstring_t *s);
void bgt_al_from_bcf(const bcf_hdr_t *h, const bcf1_t *b, bgt_allele_t *a, bgt_allele_t *r);
#ifdef __cplusplus
}
#endif
#endif