-
Notifications
You must be signed in to change notification settings - Fork 5
/
libfastk.h
130 lines (103 loc) · 4.41 KB
/
libfastk.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/*******************************************************************************************
*
* C library routines to access and operate upon FastK histogram, k-mer tables, and profiles
*
* Author: Gene Myers
* Date : November 2020
*
*******************************************************************************************/
#ifndef _LIBFASTK
#define _LIBFASTK
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdint.h>
#include <math.h>
#include <dirent.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <unistd.h>
#include <errno.h>
#include "gene_core.h"
// HISTOGRAM
typedef struct
{ int kmer; // Histogram is for k-mers of this length
int unique; // 1 => count of unique k-mers, 0 => count of k-mer instances
int low; // Histogram is for range [low,hgh]
int high;
int64 *hist; // hist[i] for i in [low,high] = # of k-mers occuring i times
} Histogram;
Histogram *Load_Histogram(char *name);
void Modify_Histogram(Histogram *H, int low, int high, int unique);
int Write_Histogram(char *name, Histogram *H);
void Free_Histogram(Histogram *H);
// K-MER TABLE
typedef struct
{ int kmer; // Kmer length
int minval; // The minimum count of a k-mer in the table
int64 nels; // # of unique, sorted k-mers in the table
void *private[7]; // Private fields
} Kmer_Table;
Kmer_Table *Load_Kmer_Table(char *name, int cut_off);
void Free_Kmer_Table(Kmer_Table *T);
char *Fetch_Kmer(Kmer_Table *T, int64 i, char *seq);
int Fetch_Count(Kmer_Table *T, int64 i);
int64 Find_Kmer(Kmer_Table *T, char *kseq);
// K-MER STREAM
typedef struct
{ int kmer; // Kmer length
int minval; // The minimum count of a k-mer in the stream
int64 nels; // # of elements in entire table
// Current position (visible part)
int64 cidx; // current element index
uint8 *csuf; // current element suffix
int cpre; // current element prefix
// Other useful parameters
int ibyte; // # of bytes in prefix
int kbyte; // Kmer encoding in bytes
int tbyte; // Kmer+count entry in bytes
int hbyte; // Kmer suffix in bytes (= kbyte - ibyte)
int pbyte; // Kmer,count suffix in bytes (= tbyte - ibyte)
// Hidden parts
int ixlen; // length of prefix index (= 4^(4*ibyte))
int shift; // shift for inverse mapping
uint8 *table; // The (huge) table in memory
int64 *index; // Prefix compression index
int *inver; // inverse prefix index
int copn; // File currently open
int part; // Thread # of file currently open
int nthr; // # of thread parts
int nlen; // length of path name
char *name; // Path name for table parts (only # missing)
uint8 *ctop; // Ptr top of current table block in buffer
int64 *neps; // Size of each thread part in elements
int clone; // Is this a clone?
} Kmer_Stream;
Kmer_Stream *Open_Kmer_Stream(char *name);
Kmer_Stream *Clone_Kmer_Stream(Kmer_Stream *S);
void Free_Kmer_Stream(Kmer_Stream *S);
void First_Kmer_Entry(Kmer_Stream *S);
void Next_Kmer_Entry(Kmer_Stream *S);
char *Current_Kmer(Kmer_Stream *S, char *seq);
int Current_Count(Kmer_Stream *S);
uint8 *Current_Entry(Kmer_Stream *S, uint8 *seq);
void GoTo_Kmer_Index(Kmer_Stream *S, int64 idx);
int GoTo_Kmer_String(Kmer_Stream *S, char *seq);
int GoTo_Kmer_Entry(Kmer_Stream *S, uint8 *entry);
// PROFILES
typedef struct
{ int kmer; // Kmer length
int nparts; // # of threads/parts for the profiles
int nreads; // total # of reads in data set
int64 *nbase; // nbase[i] for i in [0,nparts) = id of last read in part i + 1
int64 *index; // index[i] for i in [0,nreads) = offset in relevant part of
// compressed profile for read i
void *private[4]; // Private fields
} Profile_Index;
Profile_Index *Open_Profiles(char *name);
Profile_Index *Clone_Profiles(Profile_Index *P);
void Free_Profiles(Profile_Index *P);
int Fetch_Profile(Profile_Index *P, int64 id, int plen, uint16 *profile);
#endif // _LIBFASTK