Skip to content
This repository has been archived by the owner on Dec 13, 2017. It is now read-only.

Plotting and chaining #6

Open
wants to merge 30 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,26 @@ of a query may not be adjacent.
[hough]: https://en.wikipedia.org/wiki/Hough_transform
[invhash]: https://gist.github.com/lh3/974ced188be2f90422cc
[miniasm]: https://github.com/lh3/miniasm


## Plotting

There are utilities for chaining and plotting in the utils folder.

### Installing

``` cd utils && make ```

### Running example (gorilla vs. GRCh38)

```
cd utils
cd examples
cat susie_ggo_grch38.minimap.txt | ../bin/layout -i chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY > susie_ggo_human_layout.txt
Rscript --vanilla ../plot/plotLayout.R -f susie_ggo_human_layout.txt -p gor-vs-human.pdf
```

The resulting figure will look like:

![alt tag](https://github.com/zeeev/minimap/blob/master/utils/example/gor-vs-human.png)

35 changes: 35 additions & 0 deletions utils/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
######################################
# Makefile written by Zev Kronenberg #
# zev.kronenberg@gmail.com #
######################################

CXX=g++
GIT_VERSION := $(shell git describe --abbrev=4 --dirty --always)
CFLAGS = -Wall -DVERSION=\"$(GIT_VERSION)\" -std=c++0x -g
INCLUDE = -I lib/
CPP_FILES = $(wildcard lib/*cpp)
OBJ_FILES = $(addprefix lib/,$(notdir $(CPP_FILES:.cpp=.o)))
BIN_FILES = $(wildcard src/*cpp)
BIN_NAMES = $(notdir $(BIN_FILES:.cpp=))

.PHONY: bins

bins: createBin bin/layout bin/qChain bin/synblocks

bin/synblocks: $(OBJ_FILES)
$(CXX) $(CFLAGS) $(INCLUDE) $(OBJ_FILES) -o bin/synblocks src/synblocks.cpp

bin/layout: $(OBJ_FILES)
$(CXX) $(CFLAGS) $(INCLUDE) $(OBJ_FILES) -o bin/layout src/layout.cpp

bin/qChain: $(OBJ_FILES)
$(CXX) $(CFLAGS) $(INCLUDE) $(OBJ_FILES) -o bin/qChain src/chaining.cpp

src/%.o: src/%.cpp $(BAMTOOLS_LIB)
$(CXX) $(CFLAGS) $(INCLUDE) -c -o $@ $<

createBin:
-mkdir bin

clean:
-cd lib && rm *.o && rm -rf ../bin
92 changes: 92 additions & 0 deletions utils/chain/chain.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#include "chain.hpp"

bool chain::addAlignment(int s, int e, int m){

node * n = new node;
n->matches = double(m);
n->overallScore = double(m);
n->start = s;
n->end = e;
n->index = current_index;

nodes.push_back(n);

current_index++;

return true;
}

bool _endCmp(const node * L, const node * R){
return L->end < R->end;
}

chain::chain(void){
/* setting up the source node */
current_index = -1 ;
addAlignment(-2, -1, 0);
}

chain::~chain(void){
for(std::vector<node *>::iterator it = nodes.begin();
it != nodes.end(); it++){
delete *it;
}
}

bool chain::buildLinks(void){

// sorted by end position
sort(nodes.begin(), nodes.end(), _endCmp);
// adding sink
addAlignment(nodes.back()->end + 1, nodes.back()->end +2, 0);

for(int i = 1 ; i < nodes.size() ; i++){

double maxScore = 0;
for(int j = i - 1; j >= 0; j--){
if(nodes[j]->end <= nodes[i]->start){
double tmp_score = nodes[j]->overallScore ;
if(tmp_score > maxScore){
maxScore = tmp_score;
}
nodes[i]->children.push_back(nodes[j]);
}
}
nodes[i]->overallScore += maxScore;
}

last = nodes.back();

return true;
}

bool chain::traceback(std::vector<int> & alns){

if(last != nodes.front() && last != nodes.back() ){
alns.push_back(last->index);
}

if( last->children.empty() ){
return false;
}

double max = last->children.front()->overallScore;
node * current = last->children.front();

for(std::vector<node *>::iterator it = last->children.begin();
it != last->children.end(); it++){

if((*it)->overallScore > max ){

max = (*it)->overallScore;
current = *it ;
}
}




last = current;

return traceback(alns);
}
36 changes: 36 additions & 0 deletions utils/chain/chain.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#ifndef CHAIN_H
#define CHAIN_H

#include <vector>
#include <algorithm>
#include <iostream>
#include <cmath>
class node{
public:
double matches ;
double overallScore ;
int start ;
int end ;
int index ;

std::vector<node *> children;

};

class chain{
private:
int current_index ;
node * last;
std::vector<node *> nodes;

public:
chain(void);
~chain(void);
bool buildLinks( void );
bool addAlignment(int, int, int );
bool traceback(std::vector<int> & );
};

bool _endCmp(const node *, const node * );

#endif
72 changes: 72 additions & 0 deletions utils/chain/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#include <map>
#include <string>
#include <vector>
#include <iostream>
#include "chain.hpp"
#include "split.hpp"

void processSeqid(std::vector<std::string> & lines,
std::map<std::string, bool> & sentinel){


chain qChain;

std::string seqid;

for(std::vector<std::string>::iterator it = lines.begin();
it != lines.end(); it++){

std::vector<std::string> lineDat = split(*it, "\t");

seqid = lineDat[0];

if(sentinel.find(seqid) != sentinel.end()){
std::cerr << "FATAL: file must be sorted by tName" << std::endl;
exit(1);
}

long int qLen = atol(lineDat[1].c_str());
long int qStart = atol(lineDat[2].c_str());
long int qEnd = atol(lineDat[3].c_str());
long int matchB = atol(lineDat[9].c_str());

qChain.addAlignment(qStart, qEnd, matchB);
}

std::vector<int> indiciesOfAlignments;
qChain.buildLinks();
qChain.traceback(indiciesOfAlignments);

for(std::vector<int>::iterator it = indiciesOfAlignments.begin();
it != indiciesOfAlignments.end() ; it++){
std::cout << lines[*it] << std::endl;
}

sentinel[seqid] = true;
}

int main(int argc, char ** argv)
{
std::string line ;
std::string seqid;
std::vector<std::string> lines;
std::map<std::string, bool> sanity;

while(getline(std::cin, line)){

std::vector<std::string> lineDat = split(line, "\t");

if(lineDat[0] != seqid){
processSeqid(lines, sanity);
lines.clear();
lines.push_back(line);
seqid = lineDat[0];
}
else{
lines.push_back(line);
}
}
processSeqid(lines, sanity);

return 0;
}
23 changes: 23 additions & 0 deletions utils/chain/split.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#include "split.hpp"


std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
std::string delims = std::string(1, delim);
tokenize(s, elems, delims);
return elems;
}

std::vector<std::string> split(const std::string &s, char delim) {
std::vector<std::string> elems;
return split(s, delim, elems);
}

std::vector<std::string> &split(const std::string &s, const std::string& delims, std::vector<std::string> &elems) {
tokenize(s, elems, delims);
return elems;
}

std::vector<std::string> split(const std::string &s, const std::string& delims) {
std::vector<std::string> elems;
return split(s, delims, elems);
}
53 changes: 53 additions & 0 deletions utils/chain/split.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#ifndef __SPLIT_H
#define __SPLIT_H

// functions to split a string by a specific delimiter
#include <string>
#include <vector>
#include <sstream>
#include <string.h>

// thanks to Evan Teran, http://stackoverflow.com/questions/236129/how-to-split-a-string/236803#236803

// split a string on a single delimiter character (delim)
std::vector<std::string>& split(const std::string &s, char delim, std::vector<std::string> &elems);
std::vector<std::string> split(const std::string &s, char delim);

// split a string on any character found in the string of delimiters (delims)
std::vector<std::string>& split(const std::string &s, const std::string& delims, std::vector<std::string> &elems);
std::vector<std::string> split(const std::string &s, const std::string& delims);

// from Marius, http://stackoverflow.com/a/1493195/238609
template < class ContainerT >
void tokenize(const std::string& str, ContainerT& tokens,
const std::string& delimiters = " ", const bool trimEmpty = false)
{

std::string::size_type pos, lastPos = 0;
while(true)
{
pos = str.find_first_of(delimiters, lastPos);
if(pos == std::string::npos)
{

pos = str.length();

if(pos != lastPos || !trimEmpty) {
tokens.push_back(typename ContainerT::value_type(str.data()+lastPos, (typename ContainerT::value_type::size_type)pos-lastPos));
}

break;
}
else
{
if(pos != lastPos || !trimEmpty) {
tokens.push_back(typename ContainerT::value_type(str.data()+lastPos, (typename ContainerT::value_type::size_type)pos-lastPos));
}
}

lastPos = pos + 1;
}
};


#endif
Binary file added utils/example/gor-vs-human.pdf
Binary file not shown.
Binary file added utils/example/gor-vs-human.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading