Skip to content

Commit

Permalink
Initial commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
eric committed Nov 4, 2014
0 parents commit d14421c
Show file tree
Hide file tree
Showing 29 changed files with 6,972 additions and 0 deletions.
675 changes: 675 additions & 0 deletions LISCENCE

Large diffs are not rendered by default.

45 changes: 45 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# MAKEFILE SOURCE:
# Author: yanick.rochon@gmail.com
# Date : 2011-08-10

# project name (generate executable with this name)
TARGET = pollux

CC = gcc
# compiling flags here
CFLAGS = -std=c99 -I.

LINKER = gcc -o
# linking flags here
LFLAGS = -Wall -I. -lm

# change these to set the proper directories where each files should be
SRCDIR = source
OBJDIR = source
BINDIR = .

SOURCES := $(wildcard $(SRCDIR)/*.c)
INCLUDES := $(wildcard $(SRCDIR)/*.h)
OBJECTS := $(SOURCES:$(SRCDIR)/%.c=$(OBJDIR)/%.o)
rm = rm -f


$(BINDIR)/$(TARGET): $(OBJECTS)
@$(LINKER) $@ $(LFLAGS) $(OBJECTS)
@echo "Linking complete!"

$(OBJECTS): $(OBJDIR)/%.o : $(SRCDIR)/%.c
@$(CC) $(CFLAGS) -c $< -o $@
@echo "Compiled "$<" successfully!"

.PHONEY: clean
clean:
@$(rm) $(OBJECTS)
@echo "Cleanup complete!"

.PHONEY: remove
remove: clean
@$(rm) $(BINDIR)/$(TARGET)
@echo "Executable removed!"


61 changes: 61 additions & 0 deletions README
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
======================================================================
Pollux
Copyright (C) 2014 Eric Marinier

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.

======================================================================

-- Description --

Pollux is a platform independent error corrector which targets errors
produced by second-generation sequencing technologies.

-- Release --

Pollux 1.00
4 November 2014

This is the initial release of Pollux.

-- Requirements --

Pollux requires a 64 bit Unix-based operating system.

-- Installation --

make

-- Running Pollux --

Pollux's command line arguments can be found by running:
./pollux

Simple correction:
./pollux -i <fastq_reads>

-- Contact --

Brendan McConkey: mcconkey@uwaterloo.ca
Eric Marinier: eric.marinier@uwaterloo.ca

-- Credits --

The Makefile is derived from yanick.rochon@gmail.com (2010-11-05).
(http://stackoverflow.com/users/320700/yanick-rochon)

The source makes use of data structures provided by Simon Howard.
(http://c-algorithms.sourceforge.net/)


82 changes: 82 additions & 0 deletions source/Correction.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
Pollux
Copyright (C) 2014 Eric Marinier
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include <stdlib.h>
#include "Correction.h"

Correction* createCorrection(Reads** reads, unsigned int numReadSets,
KMerHashTable* kmers, unsigned int kmerSize, unsigned int lowKMerThreshold,
char* outputDirectory, CorrectionFunction correctionFunction)
{
Correction* correction = (Correction*)malloc(sizeof(Correction));

correction->reads = reads;
correction->numReadSets = numReadSets;

correction->kmers = kmers;
correction->kmerSize = kmerSize;
correction->lowKMerThreshold = lowKMerThreshold;

correction->substitutions = true;
correction->insertions = true;
correction->deletions = true;
correction->homopolymers = true;

correction->outputDirectory = outputDirectory;

correction->correctionFunction = correctionFunction;

return correction;
}

Reads** correctionGetReads(Correction* correction)
{
return correction->reads;
}

unsigned int correctionGetNumReadSets(Correction* correction)
{
return correction->numReadSets;
}

KMerHashTable* correctionGetKMers(Correction* correction)
{
return correction->kmers;
}

unsigned int correctionGetKMerSize(Correction* correction)
{
return correction->kmerSize;
}

unsigned int correctionGetLowThreshold(Correction* correction)
{
return correction->lowKMerThreshold;
}

CorrectionFunction correctionGetFunction(Correction* correction)
{
return correction->correctionFunction;
}

char* correctionGetOutputDirectory(Correction* correction)
{
return correction->outputDirectory;
}
76 changes: 76 additions & 0 deletions source/Correction.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
Pollux
Copyright (C) 2014 Eric Marinier
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include "Reads.h"
#include "KMerHashTable.h"
#include "Utility.h"

#ifndef CORRECTION_H
#define CORRECTION_H

#ifdef __cplusplus
extern "C" {
#endif

typedef struct Correction Correction;
typedef bool (*CorrectionFunction)(struct read* read, Correction* correction);

struct Correction
{
Reads** reads;
unsigned int numReadSets;

KMerHashTable* kmers;
unsigned int kmerSize;
unsigned int lowKMerThreshold;

// Enabled Corrections:
bool substitutions;
bool insertions;
bool deletions;
bool homopolymers;

bool filtering;
bool qualityUpdating;

char* outputDirectory;

CorrectionFunction correctionFunction; // Correction function pointer.

};

Correction* createCorrection(Reads** reads, unsigned int numReadSets,
KMerHashTable* kmers, unsigned int kmerSize, unsigned int lowKMerThreshold,
char* outputDirectory, CorrectionFunction correctionFunction);

Reads** correctionGetReads(Correction* correction);
unsigned int correctionGetNumReadSets(Correction* correction);
KMerHashTable* correctionGetKMers(Correction* correction);
unsigned int correctionGetKMerSize(Correction* correction);
unsigned int correctionGetLowThreshold(Correction* correction);
CorrectionFunction correctionGetFunction(Correction* correction);
char* correctionGetOutputDirectory(Correction* correction);

#ifdef __cplusplus
}
#endif

#endif /* CORRECTION_H */

54 changes: 54 additions & 0 deletions source/Counting.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
Pollux
Copyright (C) 2014 Eric Marinier
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include "Counting.h"
#include "Utility.h"

void getKMerCounts(unsigned long long int* sequence, unsigned int length,
KMerHashTable* kmers, unsigned int kmerSize, unsigned int* counts)
{
// Variables:
unsigned long long int kmer;
int total = length - kmerSize + 1;

// Iterate over all k-mers within the read:
for(int i = 0; i < total; i++)
{
// Get the next k-mer:
kmer = getKMer(sequence, i, i + kmerSize);

// Get the count:
counts[i] = KMerTableLookup(kmers, kmer);
}
}

unsigned int areCountsBelowThreshold(unsigned int* counts, unsigned int start,
unsigned int end, const unsigned int THRESHOLD)
{
for(int i = start; i < end; i++)
{
if(counts[i] > THRESHOLD)
{
return 0;
}
}

return 1;
}
64 changes: 64 additions & 0 deletions source/Counting.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
Pollux
Copyright (C) 2014 Eric Marinier
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include "KMerHashTable.h"

#ifndef COUNTING_H
#define COUNTING_H

#ifdef __cplusplus
extern "C" {
#endif

/**
* This function fills the passed array with the number of the occurances of a
* given k-mer over the entire length of the sequence.
*
* @param sequence The sequence to get the k-mer counts for.
* @param length The length of the sequence.
* @param kmers The k-mer hash table data structure.
* @param kmerSize The length of the kmers.
* @param counts The counts array to fill. There will be (length - kmerSize + 1)
* entries expected to be filled.
*/
void getKMerCounts(unsigned long long int* sequence, unsigned int length,
KMerHashTable* kmers, unsigned int kmerSize, unsigned int* counts);

/**
* This function determines whether or not all the entries between the specified
* range in the passed array are below the given threshold.
*
* @param counts The array to examine.
* @param start The starting index.
* @param end The ending index.
* @param THRESHOLD The threshold. Less than or equal to the threshold!
*
* @return Whether or not the values are less than or equal to the threshold.
*/
unsigned int areCountsBelowThreshold(unsigned int* counts, unsigned int start,
unsigned int end, const unsigned int THRESHOLD);


#ifdef __cplusplus
}
#endif

#endif /* COUNTING_H */

Loading

0 comments on commit d14421c

Please sign in to comment.