Skip to content

Commit

Permalink
fix: solving issues with rebasing on dev
Browse files Browse the repository at this point in the history
  • Loading branch information
leoisl committed Dec 13, 2023
1 parent 30f7640 commit 183c397
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 79 deletions.
3 changes: 1 addition & 2 deletions include/localPRG.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,7 @@ class LocalPRG {
std::vector<LocalNodePtr>&, const uint32_t, const bool, const uint32_t,
const uint32_t& max_num_kmers_to_average, const uint32_t& sample_id,
float min_absolute_gene_coverage, float min_relative_gene_coverage,
float max_relative_gene_coverage, float min_gene_coverage_proportion) const;
float max_relative_gene_coverage, bool no_gene_coverage_filtering) const;
float max_relative_gene_coverage, float min_gene_coverage_proportion, bool no_gene_coverage_filtering) const;
std::vector<LocalNodePtr> get_valid_vcf_reference(const std::string&) const;

void add_variants_to_vcf(VCF&, pangenome::NodePtr, const std::string&,
Expand Down
19 changes: 10 additions & 9 deletions src/compare_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,13 +150,6 @@ void setup_compare_subcommand(CLI::App& app)
->type_name("FLOAT")
->group("Filtering");

compare_subcmd
->add_flag(
"--no-gene-coverage-filtering", opt->no_gene_coverage_filtering,
"Do not filter genes based on their coverage, effectively ignoring the three "
"previous params. This is useful if you are not using read datasets.")
->group("Filtering");

compare_subcmd
->add_option(
"--min-gene-coverage-proportion", opt->min_gene_coverage_proportion,
Expand All @@ -169,6 +162,14 @@ void setup_compare_subcommand(CLI::App& app)
->type_name("FLOAT")
->group("Filtering");

compare_subcmd
->add_flag(
"--no-gene-coverage-filtering", opt->no_gene_coverage_filtering,
"Do not filter genes based on their coverage, effectively ignoring params "
"--min-abs-gene-coverage, --min-rel-gene-coverage, --max-rel-gene-coverage and --min-gene-coverage-proportion. "
"This is useful if you are not using read datasets.")
->group("Filtering");

description = "Add extra step to carefully genotype sites.";
auto* gt_opt = compare_subcmd->add_flag("--genotype", opt->genotype, description)
->group("Consensus/Variant Calling");
Expand Down Expand Up @@ -354,8 +355,8 @@ int pandora_compare(CompareOptions& opt)
local_prg->add_consensus_path_to_fastaq(consensus_fq, c->second, kmp, lmp,
index.get_window_size(), opt.binomial, covg, opt.max_num_kmers_to_avg, 0,
opt.min_absolute_gene_coverage, opt.min_relative_gene_coverage,
opt.max_relative_gene_coverage, opt.min_gene_coverage_proportion);
opt.max_relative_gene_coverage, opt.no_gene_coverage_filtering);
opt.max_relative_gene_coverage, opt.min_gene_coverage_proportion,
opt.no_gene_coverage_filtering);

if (kmp.empty()) {
c = pangraph_sample->remove_node(c->second);
Expand Down
19 changes: 10 additions & 9 deletions src/denovo_discovery/discover_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,13 +158,6 @@ void setup_discover_subcommand(CLI::App& app)
->type_name("FLOAT")
->group("Filtering");

discover_subcmd
->add_flag(
"--no-gene-coverage-filtering", opt->no_gene_coverage_filtering,
"Do not filter genes based on their coverage, effectively ignoring the three "
"previous params. This is useful if you are not using read datasets.")
->group("Filtering");

discover_subcmd
->add_option(
"--min-gene-coverage-proportion", opt->min_gene_coverage_proportion,
Expand All @@ -177,6 +170,14 @@ void setup_discover_subcommand(CLI::App& app)
->type_name("FLOAT")
->group("Filtering");

discover_subcmd
->add_flag(
"--no-gene-coverage-filtering", opt->no_gene_coverage_filtering,
"Do not filter genes based on their coverage, effectively ignoring params "
"--min-abs-gene-coverage, --min-rel-gene-coverage, --max-rel-gene-coverage and --min-gene-coverage-proportion. "
"This is useful if you are not using read datasets.")
->group("Filtering");

description
= "Minimum size of a cluster of hits between a read and a loci to consider "
"the loci present";
Expand Down Expand Up @@ -312,8 +313,8 @@ void pandora_discover_core(const SampleData& sample, Index &index, DiscoverOptio
pangraph_node, kmp, lmp, index.get_window_size(), opt.binomial, covg,
opt.max_num_kmers_to_avg, 0,
opt.min_absolute_gene_coverage, opt.min_relative_gene_coverage,
opt.max_relative_gene_coverage, opt.min_gene_coverage_proportion);
opt.max_relative_gene_coverage, opt.no_gene_coverage_filtering);
opt.max_relative_gene_coverage, opt.min_gene_coverage_proportion,
opt.no_gene_coverage_filtering);

if (kmp.empty()) {
// mark the node as to remove
Expand Down
97 changes: 47 additions & 50 deletions src/localPRG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1699,8 +1699,8 @@ void LocalPRG::add_consensus_path_to_fastaq(Fastaq& output_fq, pangenome::NodePt
const bool bin, const uint32_t global_covg,
const uint32_t& max_num_kmers_to_average, const uint32_t& sample_id,
float min_absolute_gene_coverage, float min_relative_gene_coverage,
float max_relative_gene_coverage, float min_gene_coverage_proportion) const
float max_relative_gene_coverage, bool no_gene_coverage_filtering) const
float max_relative_gene_coverage, float min_gene_coverage_proportion,
bool no_gene_coverage_filtering) const
{
if (pnode->covg == 0) {
BOOST_LOG_TRIVIAL(warning) << "Node " << pnode->get_name() << " has no reads";
Expand All @@ -1725,62 +1725,59 @@ void LocalPRG::add_consensus_path_to_fastaq(Fastaq& output_fq, pangenome::NodePt
BOOST_LOG_TRIVIAL(debug) << "Found global coverage " << global_covg
<< " and path mean " << mean_covg;

// apply the coverage proportion filter
uint32_t amount_of_bases_with_coverage = std::count_if(covgs.begin(), covgs.end(),
[](uint32_t covg){return covg > 0;});
float coverage_proportion = ((float)amount_of_bases_with_coverage) / covgs.size();
const bool coverage_proportion_is_too_low = coverage_proportion < min_gene_coverage_proportion;
if (coverage_proportion_is_too_low) {
BOOST_LOG_TRIVIAL(warning)
<< "Filtering out gene " << name << " due to "
<< "coverage proportion (" << coverage_proportion << ") "
<< "being too low, less than the --min-gene-coverage-proportion parameter ("
<< min_gene_coverage_proportion << ")";
kmp.clear();
return;
}

// apply the min/max coverage filters
if (mean_covg < min_absolute_gene_coverage) {
BOOST_LOG_TRIVIAL(warning)
<< "Filtering out gene " << name << " due to "
if (!no_gene_coverage_filtering) {
if (mean_covg < min_absolute_gene_coverage) {
// apply the coverage proportion filter
uint32_t amount_of_bases_with_coverage = std::count_if(covgs.begin(), covgs.end(),
[](uint32_t covg){return covg > 0;});
float coverage_proportion = ((float)amount_of_bases_with_coverage) / covgs.size();
const bool coverage_proportion_is_too_low = coverage_proportion < min_gene_coverage_proportion;
if (coverage_proportion_is_too_low) {
BOOST_LOG_TRIVIAL(warning)
<< "Filtering out gene " << name << " due to "
<< "mean coverage (" << mean_covg << ") "
<< "being too low, less than the --min-abs-gene-coverage parameter (" << min_absolute_gene_coverage << ")";
kmp.clear();
return;
}
<< "coverage proportion (" << coverage_proportion << ") "
<< "being too low, less than the --min-gene-coverage-proportion parameter ("
<< min_gene_coverage_proportion << ")";
kmp.clear();
return;
}

if (mean_covg < min_relative_gene_coverage*global_covg) {
BOOST_LOG_TRIVIAL(warning)
<< "Filtering out gene " << name << " due to "
<< "mean coverage (" << mean_covg << ") "
<< "being too low, less than the "
<< "--min-rel-gene-coverage * global coverage ("
<< min_relative_gene_coverage << " * " << global_covg << " = "
<< min_relative_gene_coverage * global_covg << "). "
<< "Is global coverage very different from the expected (too low/high)? "
<< "Try setting a better genome length (see --genome-size param).";
kmp.clear();
return;
}
// apply the min/max coverage filters
if (mean_covg < min_absolute_gene_coverage) {
BOOST_LOG_TRIVIAL(warning)
<< "Filtering out gene " << name << " due to "
<< "mean coverage (" << mean_covg << ") "
<< "being too low, less than the --min-abs-gene-coverage parameter (" << min_absolute_gene_coverage << ")";
kmp.clear();
return;
}

if (mean_covg > max_relative_gene_coverage*global_covg) {
BOOST_LOG_TRIVIAL(warning)
<< "Filtering out gene " << name << " due to "
<< "mean coverage (" << mean_covg << ") "
<< "being too high, larger than the "
<< "--max-rel-gene-coverage * global coverage ("
<< max_relative_gene_coverage << " * " << global_covg << " = "
<< max_relative_gene_coverage * global_covg << "). "
<< "Is global coverage very different from the expected (too low/high)? "
<< "Try setting a better genome length (see --genome-size param).";
if (mean_covg < min_relative_gene_coverage*global_covg) {
BOOST_LOG_TRIVIAL(warning)
<< "Filtering out gene " << name << " due to "
<< "mean coverage (" << mean_covg << ") "
<< "being too low, less than the "
<< "--min-rel-gene-coverage * global coverage ("
<< min_relative_gene_coverage << " * " << global_covg << " = "
<< min_relative_gene_coverage * global_covg << "). "
<< "Is global coverage very different from the expected (too low/high)? "
<< "Try setting a better genome length (see --genome-size param).";
kmp.clear();
return;
}

if (mean_covg > max_relative_gene_coverage*global_covg) {
BOOST_LOG_TRIVIAL(warning)
<< "Filtering out gene " << name << " due to "
<< "mean coverage (" << mean_covg << ") "
<< "being too high, larger than the "
<< "--max-rel-gene-coverage * global coverage ("
<< max_relative_gene_coverage << " * " << global_covg << " = "
<< max_relative_gene_coverage * global_covg << "). "
<< "Is global coverage very different from the expected (too low/high)? "
<< "Try setting a better genome length (see --genome-size param).";
kmp.clear();
return;
}
}

std::string fq_name = pnode->get_name();
Expand Down
19 changes: 10 additions & 9 deletions src/map_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,13 +164,6 @@ void setup_map_subcommand(CLI::App& app)
->type_name("FLOAT")
->group("Filtering");

map_subcmd
->add_flag(
"--no-gene-coverage-filtering", opt->no_gene_coverage_filtering,
"Do not filter genes based on their coverage, effectively ignoring the three "
"previous params. This is useful if you are not using read datasets.")
->group("Filtering");

map_subcmd
->add_option(
"--min-gene-coverage-proportion", opt->min_gene_coverage_proportion,
Expand All @@ -183,6 +176,14 @@ void setup_map_subcommand(CLI::App& app)
->type_name("FLOAT")
->group("Filtering");

map_subcmd
->add_flag(
"--no-gene-coverage-filtering", opt->no_gene_coverage_filtering,
"Do not filter genes based on their coverage, effectively ignoring params "
"--min-abs-gene-coverage, --min-rel-gene-coverage, --max-rel-gene-coverage and --min-gene-coverage-proportion. "
"This is useful if you are not using read datasets.")
->group("Filtering");

description = "Add extra step to carefully genotype sites.";
auto* gt_opt = map_subcmd->add_flag("--genotype", opt->genotype, description)
->group("Consensus/Variant Calling");
Expand Down Expand Up @@ -396,8 +397,8 @@ int pandora_map(MapOptions& opt)
prg->add_consensus_path_to_fastaq(consensus_fq, pangraph_node, kmp, lmp,
index.get_window_size(), opt.binomial, covg, opt.max_num_kmers_to_avg, 0,
opt.min_absolute_gene_coverage, opt.min_relative_gene_coverage,
opt.max_relative_gene_coverage, opt.no_gene_coverage_filtering);
opt.max_relative_gene_coverage, opt.min_gene_coverage_proportion);
opt.max_relative_gene_coverage, opt.min_gene_coverage_proportion,
opt.no_gene_coverage_filtering);

if (kmp.empty()) {
#pragma omp critical(nodes_to_remove)
Expand Down

0 comments on commit 183c397

Please sign in to comment.