Skip to content

Commit

Permalink
Merge changes for release 2.2
Browse files Browse the repository at this point in the history
  • Loading branch information
iainrb committed Apr 30, 2015
1 parent 23d0bb7 commit 76d8efc
Show file tree
Hide file tree
Showing 7 changed files with 157 additions and 67 deletions.
41 changes: 27 additions & 14 deletions Egt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ needed for FCR output)
*/

#include <string>
#include <iostream>
#include <sstream>
#include "Egt.h"

using namespace std;
Expand Down Expand Up @@ -193,13 +195,13 @@ void Egt::readHeader(ifstream &file)
// populate instance variables with header values
file.seekg(0); // set read position to zero, if not already there
fileVersion = readInteger(file);
gcVersion = readString(file);
clusterVersion = readString(file);
callVersion = readString(file);
normalizationVersion = readString(file);
dateCreated = readString(file);
gcVersion = readString(file, "GC version");
clusterVersion = readString(file, "Cluster version");
callVersion = readString(file, "Call version");
normalizationVersion = readString(file, "Normalization version");
dateCreated = readString(file, "Date created");
mode = file.get();
manifest = readString(file);
manifest = readString(file, "Manifest name");
}

int Egt::readInteger(ifstream &file)
Expand All @@ -214,7 +216,7 @@ void Egt::readPreface(ifstream &file) {
// read the 'preface' from the body of an EGT file
// assumes file is positioned at start of the body
dataVersion = readInteger(file);
opa = readString(file);
opa = readString(file, "OPA");
snpTotal = readInteger(file);
}

Expand All @@ -226,29 +228,40 @@ void Egt::readSNPNames(ifstream &file, string names[]) {
for (int i=0;i<snpTotal;i++) {
// skip genotype scores
// length of strings is unknown, so must read each one and discard it
readString(file);
readString(file, "genotype score");
}
for (int i=0;i<snpTotal;i++) {
names[i] = readString(file);
stringstream sstream;
sstream << "SNP name at index " << i;
names[i] = readString(file, sstream.str());
}
}

string Egt::readString(ifstream &file) {
string Egt::readString(ifstream &file, string name) {
// EGT string format is as follows:
// - First byte is a *signed* char encoding the string length
// - Subsequent bytes contain the string
// Total bytes read is (length encoded in first byte)+1 -- at most 128
// Second argument is an (optional) identifier used for logging
int length = int(file.get()); // get a single byte
if (not file.good())
if (not file.good()) {
throw("Cannot read length from EGT file, file state is not good");
else if (length <= 0)
} else if (length < 0) {
throw("Illegal string length in EGT file");
} else if (length == 0) {
cerr << "Warning: String '" << name << "' has zero length." << endl;
return "";
}
char * buffer;
buffer = new char[length+1];
buffer[length] = '\0'; // ensure buffer ends with a null character
file.read(buffer, length);
if (not file.good())
throw("Cannot read string from EGT file, file state is not good");
if (not file.good()) {
stringstream sstream;
sstream << "Cannot read string '" << name << \
"' from EGT file, file state is not good";
throw(sstream.str());
}
string result = string(buffer);
delete buffer;
return result;
Expand Down
2 changes: 1 addition & 1 deletion Egt.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class Egt {
float readFloat(ifstream &file);
void readPreface(ifstream &file);
void readSNPNames(ifstream &file, string names[]);
string readString(ifstream &file);
string readString(ifstream &file, string name="UNKNOWN_NAME");

};

Expand Down
7 changes: 4 additions & 3 deletions Fcr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,13 +169,14 @@ void FcrWriter::write(Egt *egt, Manifest *manifest, ostream *outStream,
else sampleName = gtc->sampleName;
for (unsigned int j = 0; j < manifest->snps.size(); j++) {
string snpName = manifest->snps[j].name;
double x_raw = gtc->xRawIntensity[j];
double y_raw = gtc->yRawIntensity[j];
unsigned short x_raw = gtc->xRawIntensity[j];
unsigned short y_raw = gtc->yRawIntensity[j];
float score = gtc->scores[j];
double x_norm;
double y_norm;
unsigned int norm_id = manifest->normIdMap[manifest->snps[j].normId];
gtc->normalizeIntensity(x_raw, y_raw, x_norm, y_norm, norm_id);
XFormClass xf = gtc->XForm[norm_id];
xf.normalize(x_raw, y_raw, x_norm, y_norm);
// correction of negative intensities, for consistency with GenomeStudio
if (x_norm < epsilon) { x_norm = 0.0; }
if (y_norm < epsilon) { y_norm = 0.0; }
Expand Down
99 changes: 65 additions & 34 deletions Gtc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,56 @@
//
#include "Gtc.h"
#include <cstring>
#include <cstdio>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <sstream>
#include <vector>

using namespace std;

XFormClass::XFormClass(void)
XFormClass::XFormClass(int version, float xOffset, float yOffset,
float xScale, float yScale, float shear, float theta)
{
this->version = version;
this->xOffset = xOffset;
this->yOffset = yOffset;
this->xScale = xScale;
this->yScale = yScale;
this->shear = shear;
this->theta = theta;
}

void XFormClass::normalize(unsigned short x_raw, unsigned short y_raw,
double &x_norm, double &y_norm)
{
// This is the intensity normalization calculation, according to Illumina
double tempx = x_raw - xOffset;
double tempy = y_raw - yOffset;
double cos_theta = cos(theta);
double sin_theta = sin(theta);
double tempx2 = cos_theta * tempx + sin_theta * tempy;
double tempy2 = -sin_theta * tempx + cos_theta * tempy;
double tempx3 = tempx2 - shear * tempy2;
double tempy3 = tempy2;
x_norm = tempx3 / xScale;
y_norm = tempy3 / yScale;
}

string XFormClass::toString(void)
{
int digits = 6;
stringstream sstream;
sstream << "Version: " << version << endl;
sstream << setprecision(digits);
sstream << "XOffset: " << xOffset << endl;
sstream << "YOffset: " << yOffset << endl;
sstream << "XScale: " << xScale << endl;
sstream << "YScale: " << yScale << endl;
sstream << "Shear: " << shear << endl;
sstream << "Theta: " << theta << endl;
return sstream.str();
}

Gtc::Gtc(void)
Expand Down Expand Up @@ -219,48 +260,38 @@ string Gtc::json_dump(void)
return s.str();
}

void Gtc::normalizeIntensity(double x_raw, double y_raw,
double &x_norm, double &y_norm,
unsigned int norm_id) {
// This is the normalization calculation, according to Illumina
XFormClass *XF = &(this->XForm[norm_id]);
double tempx = x_raw - XF->xOffset;
double tempy = y_raw - XF->yOffset;
double cos_theta = cos(XF->theta);
double sin_theta = sin(XF->theta);
double tempx2 = cos_theta * tempx + sin_theta * tempy;
double tempy2 = -sin_theta * tempx + cos_theta * tempy;
double tempx3 = tempx2 - XF->shear * tempy2;
double tempy3 = tempy2;
x_norm = tempx3 / XF->xScale;
y_norm = tempy3 / XF->yScale;
}

void Gtc::readXForm(ifstream &file, int offset)
{
int arrayLen;
int total_reserved = 6; // 'reserved' floats after the xform fields
float reserved;

int version;
float xOffset;
float yOffset;
float xScale;
float yScale;
float shear;
float theta;

ios::pos_type pos = file.tellg();
file.seekg(offset);
file.read((char*)&arrayLen,4);

for (int n=0; n<arrayLen; n++) {
XFormClass X;
file.read((char*)&X.version, 4);
file.read((char*)&X.xOffset, 4);
file.read((char*)&X.yOffset, 4);
file.read((char*)&X.xScale, 4);
file.read((char*)&X.yScale, 4);
file.read((char*)&X.shear, 4);
file.read((char*)&X.theta, 4);
file.read((char*)&reserved, 4);
file.read((char*)&reserved, 4);
file.read((char*)&reserved, 4);
file.read((char*)&reserved, 4);
file.read((char*)&reserved, 4);
file.read((char*)&reserved, 4);
this->XForm.push_back(X);
for (int i=0; i<arrayLen; i++) {
file.read((char*)&version, 4);
file.read((char*)&xOffset, 4);
file.read((char*)&yOffset, 4);
file.read((char*)&xScale, 4);
file.read((char*)&yScale, 4);
file.read((char*)&shear, 4);
file.read((char*)&theta, 4);
XFormClass X = XFormClass(version, xOffset, yOffset, xScale,
yScale, shear, theta);
this->XForm.push_back(X);
for (int j=0; j<total_reserved; j++) {
file.read((char*)&reserved, 4);
}
}
file.seekg(pos);
}
Expand Down
25 changes: 15 additions & 10 deletions Gtc.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,21 @@ using namespace std;

class XFormClass {
public:
XFormClass();
int version;
float xOffset;
float yOffset;
float xScale;
float yScale;
float shear;
float theta;
XFormClass(int version, float xOffset, float yOffset, float xScale,
float yScale, float shear, float theta);
int version;
float xOffset;
float yOffset;
float xScale;
float yScale;
float shear;
float theta;

void normalize(unsigned short x_raw, unsigned short y_raw,
double &x_norm, double &y_norm);

string toString();

};

class BaseCallClass {
Expand Down Expand Up @@ -81,8 +88,6 @@ class Gtc {
double passRate(double cutoff);
double correctedPassRate(double cutoff);

void normalizeIntensity(double x_raw, double y_raw, double &x_norm, double &y_norm, unsigned int norm_id);

string errorMsg;
string filename;
int version; // file version (expected to be 3
Expand Down
7 changes: 4 additions & 3 deletions commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,11 +218,12 @@ void Commander::commandCreate(string infile, string outfile, bool normalize, str
double xn;
double yn;
int idx = snp->index - 1; // index is zero based in arrays, but starts from 1 in the map file
double x_raw = gtc->xRawIntensity[idx];
double y_raw = gtc->yRawIntensity[idx];
unsigned short x_raw = gtc->xRawIntensity[idx];
unsigned short y_raw = gtc->yRawIntensity[idx];
unsigned int norm_id = manifest->normIdMap[snp->normId];
if (normalize) {
gtc->normalizeIntensity(x_raw, y_raw, xn, yn, norm_id);
XFormClass xf = gtc->XForm[norm_id];
xf.normalize(x_raw, y_raw, xn, yn);
} else {
xn = gtc->xRawIntensity[idx];
yn = gtc->yRawIntensity[idx];
Expand Down
43 changes: 41 additions & 2 deletions test_simtools.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ class FcrTest : public TestBase
}
};

class NormalizeTest : public TestBase
class ManifestTest : public TestBase
{
public:

Expand All @@ -257,7 +257,7 @@ class NormalizeTest : public TestBase
TS_TRACE("Finished manifest test");
}

void testNormalize(void)
void testManifestNormalize(void)
{
// compare normalized output with reference file
string infile = "data/mock.bpm.csv";
Expand Down Expand Up @@ -489,5 +489,44 @@ class Win2UnixTest : public TestBase

};

class XFormTest : public TestBase
{
// tests the intensity normalization for writing .sim files

public:

void testXForm(void) {

int version= 1;
float xOffset = 150.0;
float yOffset = 90.0;
float xScale = 12000.0;
float yScale = 8000.0;
float shear = 0.02;
float theta = 0.01;

XFormClass *X;
TS_ASSERT_THROWS_NOTHING(X = new XFormClass(version, xOffset, yOffset,
xScale, yScale, shear, theta)
);
TS_TRACE("Created XFormClass object");

unsigned short x_raw = 400;
unsigned short y_raw = 200;
double x_norm;
double y_norm;
double x_norm_expected = 0.020744799066257; // 14 significant digits
double y_norm_expected = 0.013436817602487;
TS_ASSERT_THROWS_NOTHING(X->normalize(x_raw, y_raw, x_norm, y_norm));
TS_TRACE("Ran intensity normalization");

double epsilon = 1e-14;
TS_ASSERT(abs(x_norm - x_norm_expected) < epsilon);
TS_ASSERT(abs(y_norm - y_norm_expected) < epsilon);
TS_TRACE("Normalized intensities checked against expected values");
}

};

// Putting TestSuite classes in separate files appears not to work
// See Cxx manual section 4.4; possible issue with compiler options

0 comments on commit 76d8efc

Please sign in to comment.