-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
eric
committed
Nov 4, 2014
0 parents
commit d14421c
Showing
29 changed files
with
6,972 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# MAKEFILE SOURCE: | ||
# Author: yanick.rochon@gmail.com | ||
# Date : 2011-08-10 | ||
|
||
# project name (generate executable with this name) | ||
TARGET = pollux | ||
|
||
CC = gcc | ||
# compiling flags here | ||
CFLAGS = -std=c99 -I. | ||
|
||
LINKER = gcc -o | ||
# linking flags here | ||
LFLAGS = -Wall -I. -lm | ||
|
||
# change these to set the proper directories where each files should be | ||
SRCDIR = source | ||
OBJDIR = source | ||
BINDIR = . | ||
|
||
SOURCES := $(wildcard $(SRCDIR)/*.c) | ||
INCLUDES := $(wildcard $(SRCDIR)/*.h) | ||
OBJECTS := $(SOURCES:$(SRCDIR)/%.c=$(OBJDIR)/%.o) | ||
rm = rm -f | ||
|
||
|
||
$(BINDIR)/$(TARGET): $(OBJECTS) | ||
@$(LINKER) $@ $(LFLAGS) $(OBJECTS) | ||
@echo "Linking complete!" | ||
|
||
$(OBJECTS): $(OBJDIR)/%.o : $(SRCDIR)/%.c | ||
@$(CC) $(CFLAGS) -c $< -o $@ | ||
@echo "Compiled "$<" successfully!" | ||
|
||
.PHONEY: clean | ||
clean: | ||
@$(rm) $(OBJECTS) | ||
@echo "Cleanup complete!" | ||
|
||
.PHONEY: remove | ||
remove: clean | ||
@$(rm) $(BINDIR)/$(TARGET) | ||
@echo "Executable removed!" | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
====================================================================== | ||
Pollux | ||
Copyright (C) 2014 Eric Marinier | ||
|
||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
|
||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
|
||
You should have received a copy of the GNU General Public License | ||
along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
====================================================================== | ||
|
||
-- Description -- | ||
|
||
Pollux is a platform independent error corrector which targets errors | ||
produced by second-generation sequencing technologies. | ||
|
||
-- Release -- | ||
|
||
Pollux 1.00 | ||
4 November 2014 | ||
|
||
This is the initial release of Pollux. | ||
|
||
-- Requirements -- | ||
|
||
Pollux requires a 64 bit Unix-based operating system. | ||
|
||
-- Installation -- | ||
|
||
make | ||
|
||
-- Running Pollux -- | ||
|
||
Pollux's command line arguments can be found by running: | ||
./pollux | ||
|
||
Simple correction: | ||
./pollux -i <fastq_reads> | ||
|
||
-- Contact -- | ||
|
||
Brendan McConkey: mcconkey@uwaterloo.ca | ||
Eric Marinier: eric.marinier@uwaterloo.ca | ||
|
||
-- Credits -- | ||
|
||
The Makefile is derived from yanick.rochon@gmail.com (2010-11-05). | ||
(http://stackoverflow.com/users/320700/yanick-rochon) | ||
|
||
The source makes use of data structures provided by Simon Howard. | ||
(http://c-algorithms.sourceforge.net/) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
/* | ||
Pollux | ||
Copyright (C) 2014 Eric Marinier | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#include <stdlib.h> | ||
#include "Correction.h" | ||
|
||
Correction* createCorrection(Reads** reads, unsigned int numReadSets, | ||
KMerHashTable* kmers, unsigned int kmerSize, unsigned int lowKMerThreshold, | ||
char* outputDirectory, CorrectionFunction correctionFunction) | ||
{ | ||
Correction* correction = (Correction*)malloc(sizeof(Correction)); | ||
|
||
correction->reads = reads; | ||
correction->numReadSets = numReadSets; | ||
|
||
correction->kmers = kmers; | ||
correction->kmerSize = kmerSize; | ||
correction->lowKMerThreshold = lowKMerThreshold; | ||
|
||
correction->substitutions = true; | ||
correction->insertions = true; | ||
correction->deletions = true; | ||
correction->homopolymers = true; | ||
|
||
correction->outputDirectory = outputDirectory; | ||
|
||
correction->correctionFunction = correctionFunction; | ||
|
||
return correction; | ||
} | ||
|
||
Reads** correctionGetReads(Correction* correction) | ||
{ | ||
return correction->reads; | ||
} | ||
|
||
unsigned int correctionGetNumReadSets(Correction* correction) | ||
{ | ||
return correction->numReadSets; | ||
} | ||
|
||
KMerHashTable* correctionGetKMers(Correction* correction) | ||
{ | ||
return correction->kmers; | ||
} | ||
|
||
unsigned int correctionGetKMerSize(Correction* correction) | ||
{ | ||
return correction->kmerSize; | ||
} | ||
|
||
unsigned int correctionGetLowThreshold(Correction* correction) | ||
{ | ||
return correction->lowKMerThreshold; | ||
} | ||
|
||
CorrectionFunction correctionGetFunction(Correction* correction) | ||
{ | ||
return correction->correctionFunction; | ||
} | ||
|
||
char* correctionGetOutputDirectory(Correction* correction) | ||
{ | ||
return correction->outputDirectory; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
/* | ||
Pollux | ||
Copyright (C) 2014 Eric Marinier | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#include "Reads.h" | ||
#include "KMerHashTable.h" | ||
#include "Utility.h" | ||
|
||
#ifndef CORRECTION_H | ||
#define CORRECTION_H | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
typedef struct Correction Correction; | ||
typedef bool (*CorrectionFunction)(struct read* read, Correction* correction); | ||
|
||
struct Correction | ||
{ | ||
Reads** reads; | ||
unsigned int numReadSets; | ||
|
||
KMerHashTable* kmers; | ||
unsigned int kmerSize; | ||
unsigned int lowKMerThreshold; | ||
|
||
// Enabled Corrections: | ||
bool substitutions; | ||
bool insertions; | ||
bool deletions; | ||
bool homopolymers; | ||
|
||
bool filtering; | ||
bool qualityUpdating; | ||
|
||
char* outputDirectory; | ||
|
||
CorrectionFunction correctionFunction; // Correction function pointer. | ||
|
||
}; | ||
|
||
Correction* createCorrection(Reads** reads, unsigned int numReadSets, | ||
KMerHashTable* kmers, unsigned int kmerSize, unsigned int lowKMerThreshold, | ||
char* outputDirectory, CorrectionFunction correctionFunction); | ||
|
||
Reads** correctionGetReads(Correction* correction); | ||
unsigned int correctionGetNumReadSets(Correction* correction); | ||
KMerHashTable* correctionGetKMers(Correction* correction); | ||
unsigned int correctionGetKMerSize(Correction* correction); | ||
unsigned int correctionGetLowThreshold(Correction* correction); | ||
CorrectionFunction correctionGetFunction(Correction* correction); | ||
char* correctionGetOutputDirectory(Correction* correction); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
|
||
#endif /* CORRECTION_H */ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
/* | ||
Pollux | ||
Copyright (C) 2014 Eric Marinier | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#include "Counting.h" | ||
#include "Utility.h" | ||
|
||
void getKMerCounts(unsigned long long int* sequence, unsigned int length, | ||
KMerHashTable* kmers, unsigned int kmerSize, unsigned int* counts) | ||
{ | ||
// Variables: | ||
unsigned long long int kmer; | ||
int total = length - kmerSize + 1; | ||
|
||
// Iterate over all k-mers within the read: | ||
for(int i = 0; i < total; i++) | ||
{ | ||
// Get the next k-mer: | ||
kmer = getKMer(sequence, i, i + kmerSize); | ||
|
||
// Get the count: | ||
counts[i] = KMerTableLookup(kmers, kmer); | ||
} | ||
} | ||
|
||
unsigned int areCountsBelowThreshold(unsigned int* counts, unsigned int start, | ||
unsigned int end, const unsigned int THRESHOLD) | ||
{ | ||
for(int i = start; i < end; i++) | ||
{ | ||
if(counts[i] > THRESHOLD) | ||
{ | ||
return 0; | ||
} | ||
} | ||
|
||
return 1; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/* | ||
Pollux | ||
Copyright (C) 2014 Eric Marinier | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#include "KMerHashTable.h" | ||
|
||
#ifndef COUNTING_H | ||
#define COUNTING_H | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
/** | ||
* This function fills the passed array with the number of the occurances of a | ||
* given k-mer over the entire length of the sequence. | ||
* | ||
* @param sequence The sequence to get the k-mer counts for. | ||
* @param length The length of the sequence. | ||
* @param kmers The k-mer hash table data structure. | ||
* @param kmerSize The length of the kmers. | ||
* @param counts The counts array to fill. There will be (length - kmerSize + 1) | ||
* entries expected to be filled. | ||
*/ | ||
void getKMerCounts(unsigned long long int* sequence, unsigned int length, | ||
KMerHashTable* kmers, unsigned int kmerSize, unsigned int* counts); | ||
|
||
/** | ||
* This function determines whether or not all the entries between the specified | ||
* range in the passed array are below the given threshold. | ||
* | ||
* @param counts The array to examine. | ||
* @param start The starting index. | ||
* @param end The ending index. | ||
* @param THRESHOLD The threshold. Less than or equal to the threshold! | ||
* | ||
* @return Whether or not the values are less than or equal to the threshold. | ||
*/ | ||
unsigned int areCountsBelowThreshold(unsigned int* counts, unsigned int start, | ||
unsigned int end, const unsigned int THRESHOLD); | ||
|
||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
|
||
#endif /* COUNTING_H */ | ||
|
Oops, something went wrong.