Skip to content

Commit

Permalink
Added export of a guide tree to the Newick format (#11).
Browse files Browse the repository at this point in the history
  • Loading branch information
agudys committed Oct 1, 2018
1 parent b5c8a9d commit 8488429
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 35 deletions.
40 changes: 37 additions & 3 deletions core/NewickTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Adam Gudys

#include <stdexcept>
#include <algorithm>
#include <ostream>
#include <sstream>
#include <string.h>

#include <boost/spirit/home/classic.hpp>
Expand All @@ -22,11 +24,10 @@ using namespace quickprobs;



void parseNewickTree(
void NewickTree::parse(
const std::vector<CSequence>& sequences,
const std::string& description,
std::vector<pair<int, int>>& guideTree,
bool verbose)
std::vector<pair<int, int>>& guideTree)
{
if (description.length() == 0) {
throw std::runtime_error("Error while parsing Newick tree: empty description.");
Expand Down Expand Up @@ -66,3 +67,36 @@ void parseNewickTree(
}
}

void NewickTree::store(
const std::vector<CSequence>& sequences,
const std::vector<pair<int, int>>& guideTree,
std::string& description) {

ostringstream oss;
oss << "(";
storeBranch(sequences, guideTree, guideTree.back().first, oss);
oss << ",";
storeBranch(sequences, guideTree, guideTree.back().second, oss);
oss << ");";
description = std::move(oss.str());
}

std::ostream& NewickTree::storeBranch(const std::vector<CSequence>& sequences, const std::vector<pair<int, int>>& guideTree, int index, std::ostream& oss) {

if (index < sequences.size()) {

if (sequences[index].id[0] == '>') {
oss << sequences[index].id.substr(1) << ":1.0";
}
else {
oss << sequences[index].id << ":1.0";
}
}
else {
oss << "(";
storeBranch(sequences, guideTree, guideTree[index].first, oss) << ",";
storeBranch(sequences, guideTree, guideTree[index].second, oss) << "):1.0";
}

return oss;
}
28 changes: 23 additions & 5 deletions core/NewickTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,31 @@ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Adam Gudys
#define _NEWICK_TREE_H
#include <string>
#include <vector>
#include <ostream>

class CSequence;

void parseNewickTree(
const std::vector<CSequence>& sequences,
const std::string& description,
std::vector<std::pair<int,int>>& guideTree,
bool verbose);
class NewickTree {

protected:
bool verbose;

public:
NewickTree(bool verbose) : verbose(verbose) {}

void parse(
const std::vector<CSequence>& sequences,
const std::string& description,
std::vector<std::pair<int, int>>& guideTree);

void store(
const std::vector<CSequence>& sequences,
const std::vector<std::pair<int, int>>& guideTree,
std::string& description);

private:
std::ostream& storeBranch(const std::vector<CSequence>& sequences, const std::vector<std::pair<int, int>>& guideTree, int index, std::ostream& oss);
};


#endif
4 changes: 2 additions & 2 deletions core/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Adam Gudys
#include <cstdlib>
#include <cstdint>

#define FAMSA_VER "1.2.4"
#define FAMSA_DATE "2018-07-16"
#define FAMSA_VER "1.2.5"
#define FAMSA_DATE "2018-10-01"
#define FAMSA_AUTHORS "S. Deorowicz, A. Debudaj-Grabysz, A. Gudys"

// Uncomment for huge alignments (e.g., no of sequences > 10^6 and final alignemnt length > 10^5), when
Expand Down
36 changes: 32 additions & 4 deletions core/msa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ bool CFAMSA::ComputeGuideTree()
gt_stats.push_back(make_pair(1, 0));
}

// Construct guide tree using UPGMA algorithm
// Construct guide tree using selected algorithm
if (params.guide_tree == GT_method::single_linkage)
SingleLinkage();
else if (params.guide_tree == GT_method::UPGMA)
Expand All @@ -241,6 +241,11 @@ bool CFAMSA::ComputeGuideTree()
if (!ImportGuideTreeFromNewick()) // file can be unexisting, so need to terminate the code here
return false;

// store guide tree in Newick format
if (params.guide_tree_out_file.length() > 0) {
ExportGuideTreeToNewick();
}

// Convert sequences into gapped sequences
gapped_sequences.reserve(sequences.size());

Expand Down Expand Up @@ -549,7 +554,7 @@ bool CFAMSA::ImportGuideTreeFromNewick()
{
// Load newick description
ifstream newickFile;
newickFile.open(params.guide_treee_file_name);
newickFile.open(params.guide_tree_in_file);
if (!newickFile.good()) {
return false;
}
Expand All @@ -562,11 +567,34 @@ bool CFAMSA::ImportGuideTreeFromNewick()
description.erase(newend, description.end());

// Load guide tree
parseNewickTree(sequences, description, guide_tree, params.verbose_mode);
NewickTree nw(params.verbose_mode);
nw.parse(sequences, description, guide_tree);

return true;
}

bool CFAMSA::ExportGuideTreeToNewick()
{
// store guide tree
string description;
NewickTree nw(params.verbose_mode);
nw.store(sequences, guide_tree, description);

// Open file
ofstream newickFile;
newickFile.open(params.guide_tree_out_file);
if (!newickFile.good()) {
return false;
}

newickFile << description;

return true;
}




// *******************************************************************
void CFAMSA::RefineRandom(CProfile* profile_to_refine, vector<size_t> &dest_prof_id)
{
Expand Down Expand Up @@ -921,7 +949,7 @@ bool CFAMSA::ComputeMSA()
break;
case GT_method::imported:
cerr << "imported\n";
cerr << " guide tree file: " << params.guide_treee_file_name << "\n";
cerr << " guide tree file: " << params.guide_tree_in_file << "\n";
break;
case GT_method::single_linkage:
cerr << "single linkage\n";
Expand Down
1 change: 1 addition & 0 deletions core/msa.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class CFAMSA
void GuideTreeChained();
#endif
bool ImportGuideTreeFromNewick();
bool ExportGuideTreeToNewick();

#ifdef DEVELOPER_MODE
bool LoadRefSequences();
Expand Down
4 changes: 3 additions & 1 deletion core/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ struct CParams

GT_method guide_tree;
int guide_tree_seed;
string guide_treee_file_name;
string guide_tree_in_file;
string guide_tree_out_file;

bool test_ref_sequences;
string ref_file_name;
Expand Down Expand Up @@ -82,6 +83,7 @@ struct CParams
very_verbose_mode = _very_verbose_mode;

guide_tree = GT_method::single_linkage;
guide_tree_out_file = "";

sackin_index = 0;
ref_seq_subtree_size = 0;
Expand Down
30 changes: 18 additions & 12 deletions famsa_cpu/famsa_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ typedef struct {

GT_method guide_tree;
int guide_tree_seed;
string guide_treee_file_name;
string guide_tree_in_file;
string guide_tree_out_file;
double indel_exp;

bool test_ref_sequenes;
Expand Down Expand Up @@ -97,17 +98,17 @@ void show_usage()
#endif

#ifdef DEVELOPER_MODE
cout << " -gt <sl, upgma, chained> - guide tree method (single linkage, UPGMA, chained)\n";
cout << " -gt <sl, upgma, chained> - guide tree method (single linkage, UPGMA, chained)\n"
<< " (default: sl)\n";
cout << " -gt_chained <value> - seed for random number generator in chained method\n"
<< " (defualt: " << execution_params.guide_tree_seed << ")\n";
#else
cout << " -gt <sl, upgma> - guide tree method (single linkage, UPGMA, chained)\n";
#endif
cout << " -gt <sl, upgma, chained> - choice of guide tree method (single linkage, UPGMA, chained)\n";
cout << " (default: sl)\n";
#ifdef DEVELOPER_MODE
cout << " -gt_chained <value> - seed for random number generator in chained method\n";
cout << " (defualt: " << execution_params.guide_tree_seed << ")\n";
cout << " -gt <sl, upgma> - guide tree method (single linkage, UPGMA)\n"
<< " (default: sl)\n";
#endif

cout << " -gt_import <file_name> - import guide tree in Newick format\n";
cout << " -gt_export <file_name> - export guide tree to Newick format\n";

#ifdef DEVELOPER_MODE
cout << " -ref <file_name> - load referential sequences (for benchmarks) and calculate the minimal subtree size containing them\n";
Expand Down Expand Up @@ -141,7 +142,8 @@ void init_params()

execution_params.guide_tree = GT_method::single_linkage;
execution_params.guide_tree_seed = 0;
execution_params.guide_treee_file_name = "guide_tree.txt";
execution_params.guide_tree_in_file = "guide_tree.txt";
execution_params.guide_tree_out_file = "";

execution_params.test_ref_sequenes = false;
execution_params.ref_file_name = "";
Expand Down Expand Up @@ -210,7 +212,10 @@ bool parse_params(int argc, char **argv)
}
else if (cur_par == "-gt_import") {
execution_params.guide_tree = GT_method::imported;
execution_params.guide_treee_file_name = argv[argno++];
execution_params.guide_tree_in_file = argv[argno++];
}
else if (cur_par == "-gt_export") {
execution_params.guide_tree_out_file = argv[argno++];
}
#ifdef DEVELOPER_MODE
else if (cur_par == "-gt_chained")
Expand Down Expand Up @@ -276,7 +281,8 @@ void set_famsa_params(CParams &famsa_params)
famsa_params.indel_exp = execution_params.indel_exp;

famsa_params.guide_tree = execution_params.guide_tree;
famsa_params.guide_treee_file_name = execution_params.guide_treee_file_name;
famsa_params.guide_tree_in_file = execution_params.guide_tree_in_file;
famsa_params.guide_tree_out_file = execution_params.guide_tree_out_file;
famsa_params.guide_tree_seed = execution_params.guide_tree_seed;

famsa_params.test_ref_sequences = execution_params.test_ref_sequenes;
Expand Down
25 changes: 17 additions & 8 deletions famsa_gpu/famsa_gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ typedef struct {

GT_method guide_tree;
int guide_tree_seed;
string guide_treee_file_name;
string guide_tree_in_file;
string guide_tree_out_file;
double indel_exp;

bool test_ref_sequenes;
Expand Down Expand Up @@ -98,13 +99,17 @@ void show_usage()
#endif

#ifdef DEVELOPER_MODE
cout << " -gt <sl, upgma, chained> - guide tree method (single linkage, UPGMA, chained)\n";
cout << " -gt <sl, upgma, chained> - guide tree method (single linkage, UPGMA, chained)\n"
<< " (default: sl)\n";
cout << " -gt_chained <value> - seed for random number generator in chained method\n"
<< " (defualt: " << execution_params.guide_tree_seed << ")\n";
#else
cout << " -gt <sl, upgma> - guide tree method (single linkage, UPGMA, chained)\n";
cout << " -gt <sl, upgma> - guide tree method (single linkage, UPGMA)\n"
<< " (default: sl)\n";
#endif
cout << " -gt_chained <value> - seed for random number generator in chained method\n";
cout << " (defualt: " << execution_params.guide_tree_seed << ")\n";

cout << " -gt_import <file_name> - import guide tree in Newick format\n";
cout << " -gt_export <file_name> - export guide tree to Newick format\n";

#ifdef DEVELOPER_MODE
cout << " -ref <file_name> - load referential sequences (for benchmarks) and calculate the minimal subtree size containing them\n";
Expand Down Expand Up @@ -138,7 +143,7 @@ void init_params()

execution_params.guide_tree = GT_method::single_linkage;
execution_params.guide_tree_seed = 0;
execution_params.guide_treee_file_name = "guide_tree.txt";
execution_params.guide_tree_in_file = "guide_tree.txt";

execution_params.test_ref_sequenes = false;
execution_params.ref_file_name = "";
Expand Down Expand Up @@ -215,7 +220,10 @@ bool parse_params(int argc, char **argv)
}
else if (cur_par == "-gt_import") {
execution_params.guide_tree = GT_method::imported;
execution_params.guide_treee_file_name = argv[argno++];
execution_params.guide_tree_in_file = argv[argno++];
}
else if (cur_par == "-gt_export") {
execution_params.guide_tree_out_file = argv[argno++];
}
#ifdef DEVELOPER_MODE
else if (cur_par == "-gt_chained")
Expand Down Expand Up @@ -281,7 +289,8 @@ void set_famsa_params(CParams &famsa_params)
famsa_params.indel_exp = execution_params.indel_exp;

famsa_params.guide_tree = execution_params.guide_tree;
famsa_params.guide_treee_file_name = execution_params.guide_treee_file_name;
famsa_params.guide_tree_in_file = execution_params.guide_tree_in_file;
famsa_params.guide_tree_out_file = execution_params.guide_tree_out_file;
famsa_params.guide_tree_seed = execution_params.guide_tree_seed;

famsa_params.test_ref_sequences = execution_params.test_ref_sequenes;
Expand Down

0 comments on commit 8488429

Please sign in to comment.