-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnucformat.h
137 lines (111 loc) · 3.34 KB
/
nucformat.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/* nucformat.h - to be included in C kernels where the definition
* of the read struct and k* structs are required.
*/
#ifndef __NUCFORMAT_H__
#define __NUCFORMAT_H__ 1
#include <stdint.h>
#if (DEBUG == 1)
#include <inttypes.h>
/* NOTE: FOR DEBUGGING YOU CAN USE THIS AS A PRINT TYPE FOR printf
* e.g. fprintf(stderr,"%"PRINUCREADT"\n",foo) where foo's type is
* nuc_read_t
*/
#define PRINUCREADT PRIx64
#define PRINUCKT PRIx64
#endif
/* NOTE: To change cutoff of bases for Read structure
* change only the NUMBER_OF_BASES_CUTOFF definition,
* likewise to change the bits per base only modify the
* definition of the BITS_PER_BASE_XX definition.
*/
#define NUMBER_OF_BASES_CUTOFF 48
#define BITS_PER_BASE_READ 4
#define BITS_PER_BASE_K 2
#define BASES_PER_NUC_READ_T (64 / BITS_PER_BASE_READ)
#define BASES_PER_NUC_K_T (64 / BITS_PER_BASE_K)
#define READ_ARRAY_LENGTH (NUMBER_OF_BASES_CUTOFF / BASES_PER_NUC_READ_T)
/* NOTE: K_LENGTH should be specified as a compile time
* definition. If not, 7 will be used for you and you
* will be warned. When compiling you can use:
* -DK_LENGTH=XX where XX = desired length of k*
*/
#ifndef K_LENGTH
#define K_LENGTH 7
#warning K_LENGTH NOT DEFINED, USING 7!!
#endif
#ifndef USEPHRED
#define USEPHRED 0
#warning COMPILING WITHOUT PHRED SCORES!!
#endif
#define PACKREAD(VECTOR,BASE) (VECTOR | BASE) << BITS_PER_BASE_READ
#define PACKK(VECTOR,BASE) (VECTOR | BASE) << BITS_PER_BASE_K
/* Nucleotide stuff:
* Prefix of 4 = 4 bits
* Prefix of 2 = 2 bits
*/
/* convert 4 bit base to two bit base */
#define FOURTOTWO(FOUR) (FOUR >> 1)
/* convert char into four bit base */
#define CHARTOFOUR(CHAR) (CHAR & 0x0f)
/* list of four bit bases */
#define A4 1
#define C4 3
#define G4 7
#define T4 4
#define X4 8
/* list of two bit bases */
#define A2 0
#define C2 1
#define G2 3
#define T2 2
/* nucleotide vector types */
typedef uint64_t nuc_read_t;
typedef uint64_t nuc_k_t;
#if (USEPHRED == 1)
typedef uint8_t score_t;
#endif
typedef struct READ_INDEX{
/* set_delimiter - set high when this and everything
* till the next high set_delimiter is in this set
* this should be set by the sort algorithm.
*/
/* read_index - self explanatory */
uint32_t
set_delimiter : 1,
eod_delimiter : 1,
read_index : 31;
}READ_INDEX;
/* Read - used by parser to represent a read */
typedef struct Read{
READ_INDEX read_index;
uint8_t read_length;
nuc_read_t read[READ_ARRAY_LENGTH];
#if (USEPHRED == 1)
score_t read_score[NUMBER_OF_BASES_CUTOFF];
#endif
}Read;
/* K - used by hash kernel to represent a hashed k_star sequence */
typedef struct K{
READ_INDEX read_index;
uint8_t k_offset;
uint8_t k_length;
nuc_k_t k_hash;
}K;
/* K_SMALL - output of sort kernel */
typedef struct K_Small{
READ_INDEX read_index;
uint8_t k_offset;
}K_SMALL;
/* ReadMerged - output of merge block */
typedef struct ReadMerged{
Read read;
uint8_t k_offset;
}ReadMerged;
/* ReadMatch - output of match block */
typedef struct ReadMatch{
READ_INDEX a;
READ_INDEX b;
}ReadMatch;
#define READSTRUCTSIZE sizeof(Read)
#define KSTRUCTSIZE sizeof(K)
#endif /* end nucformat.h */