-
Notifications
You must be signed in to change notification settings - Fork 13
/
bseq.c
76 lines (71 loc) · 2.04 KB
/
bseq.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#include <zlib.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "bseq.h"
#include "kseq.h"
KSEQ_INIT(gzFile, gzread)
unsigned char seq_nt6_table[256] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 1, 5, 2, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 1, 5, 2, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
};
struct bseq_file_s {
gzFile fp;
kseq_t *ks;
};
bseq_file_t *bseq_open(const char *fn)
{
bseq_file_t *fp;
gzFile f;
f = fn && strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
if (f == 0) return 0;
fp = calloc(1, sizeof(bseq_file_t));
fp->fp = f;
fp->ks = kseq_init(fp->fp);
return fp;
}
void bseq_close(bseq_file_t *fp)
{
kseq_destroy(fp->ks);
gzclose(fp->fp);
free(fp);
}
bseq1_t *bseq_read(bseq_file_t *fp, int chunk_size, int keep_comment, int *n_)
{
int size = 0, m, n;
bseq1_t *seqs;
kseq_t *ks = fp->ks;
m = n = 0; seqs = 0;
while (kseq_read(ks) >= 0) {
bseq1_t *s;
if (n >= m) {
m = m? m<<1 : 256;
seqs = realloc(seqs, m * sizeof(bseq1_t));
}
s = &seqs[n];
s->name = strdup(ks->name.s);
s->comment = (ks->comment.s && keep_comment)? strdup(ks->comment.s) : 0;
s->seq = strdup(ks->seq.s);
s->qual = ks->qual.l? strdup(ks->qual.s) : 0;
s->l_seq = ks->seq.l;
s->aux = 0;
size += seqs[n++].l_seq;
if (size >= chunk_size) break;
}
*n_ = n;
return seqs;
}