Skip to content

Commit

Permalink
Use policy enum in get_called_alleles rather than bool switch
Browse files Browse the repository at this point in the history
Also fixes a bug in NormalContamination and MedianSomaticMappingQuality resulting from trimming reference alleles.
  • Loading branch information
Daniel Cooke committed Mar 14, 2019
1 parent f014fd4 commit a1262e2
Show file tree
Hide file tree
Showing 14 changed files with 44 additions and 27 deletions.
2 changes: 1 addition & 1 deletion src/core/csr/measures/base_mismatch_count.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ Measure::ResultType BaseMismatchCount::do_evaluate(const VcfRecord& call, const
result.reserve(samples.size());
for (const auto& sample : samples) {
std::vector<Allele> alleles; bool has_ref;
std::tie(alleles, has_ref) = get_called_alleles(call, sample, true);
std::tie(alleles, has_ref) = get_called_alleles(call, sample);
int sample_result {0};
if (!alleles.empty()) {
const auto sample_allele_support = compute_allele_support(alleles, assignments, sample);
Expand Down
6 changes: 3 additions & 3 deletions src/core/csr/measures/denovo_contamination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ void sort_unique(Container& values)

auto get_denovo_alleles(const VcfRecord& denovo, const Trio& trio)
{
auto parent_alleles = concat(get_called_alleles(denovo, trio.mother(), true).first,
get_called_alleles(denovo, trio.father(), true).first);
auto child_alleles = get_called_alleles(denovo, trio.child(), true).first;
auto parent_alleles = concat(get_called_alleles(denovo, trio.mother()).first,
get_called_alleles(denovo, trio.father()).first);
auto child_alleles = get_called_alleles(denovo, trio.child()).first;
sort_unique(parent_alleles); sort_unique(child_alleles);
std::vector<Allele> result {};
result.reserve(child_alleles.size());
Expand Down
2 changes: 1 addition & 1 deletion src/core/csr/measures/mapping_quality_divergence.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ Measure::ResultType MappingQualityDivergence::do_evaluate(const VcfRecord& call,
boost::optional<int> sample_result {};
if (call.is_heterozygous(sample)) {
std::vector<Allele> alleles; bool has_ref;
std::tie(alleles, has_ref) = get_called_alleles(call, sample, true);
std::tie(alleles, has_ref) = get_called_alleles(call, sample);
if (!alleles.empty()) {
const auto sample_allele_support = compute_allele_support(alleles, assignments, sample);
const auto mapping_qualities = extract_mapping_qualities(sample_allele_support);
Expand Down
2 changes: 1 addition & 1 deletion src/core/csr/measures/median_base_quality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ Measure::ResultType MedianBaseQuality::do_evaluate(const VcfRecord& call, const
boost::optional<int> sample_result {};
if (is_evaluable(call, sample)) {
std::vector<Allele> alleles; bool has_ref;
std::tie(alleles, has_ref) = get_called_alleles(call, sample, true);
std::tie(alleles, has_ref) = get_called_alleles(call, sample);
if (has_ref) alleles.erase(std::cbegin(alleles));
if (!alleles.empty()) {
const auto sample_allele_support = compute_allele_support(alleles, assignments, sample);
Expand Down
9 changes: 7 additions & 2 deletions src/core/csr/measures/median_somatic_mapping_quality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ std::unique_ptr<Measure> MedianSomaticMappingQuality::do_clone() const

namespace {

auto extract_called_alleles(const VcfRecord& call, const VcfRecord::SampleName& sample)
{
return get_called_alleles(call, sample, ReferencePadPolicy::trim_alt_alleles).first;
}

template <typename Container>
void sort_unique(Container& values)
{
Expand All @@ -47,10 +52,10 @@ auto get_somatic_alleles(const VcfRecord& somatic, const std::vector<SampleName>
{
std::vector<Allele> somatic_sample_alleles {}, normal_sample_alleles {};
for (const auto& sample : somatic_samples) {
utils::append(get_called_alleles(somatic, sample, true).first, somatic_sample_alleles);
utils::append(extract_called_alleles(somatic, sample), somatic_sample_alleles);
}
for (const auto& sample : normal_samples) {
utils::append(get_called_alleles(somatic, sample, true).first, normal_sample_alleles);
utils::append(extract_called_alleles(somatic, sample), normal_sample_alleles);
}
sort_unique(somatic_sample_alleles); sort_unique(normal_sample_alleles);
std::vector<Allele> result {};
Expand Down
2 changes: 1 addition & 1 deletion src/core/csr/measures/mismatch_count.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ Measure::ResultType MismatchCount::do_evaluate(const VcfRecord& call, const Face
result.reserve(samples.size());
for (const auto& sample : samples) {
std::vector<Allele> alleles; bool has_ref;
std::tie(alleles, has_ref) = get_called_alleles(call, sample, true);
std::tie(alleles, has_ref) = get_called_alleles(call, sample);
int sample_result {0};
if (!alleles.empty()) {
const auto sample_allele_support = compute_allele_support(alleles, assignments, sample);
Expand Down
9 changes: 7 additions & 2 deletions src/core/csr/measures/normal_contamination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ std::unique_ptr<Measure> NormalContamination::do_clone() const

namespace {

auto extract_called_alleles(const VcfRecord& call, const VcfRecord::SampleName& sample)
{
return get_called_alleles(call, sample, ReferencePadPolicy::trim_alt_alleles).first;
}

template <typename Container>
void sort_unique(Container& values)
{
Expand All @@ -45,10 +50,10 @@ auto get_somatic_alleles(const VcfRecord& somatic, const std::vector<SampleName>
{
std::vector<Allele> somatic_sample_alleles {}, normal_sample_alleles {};
for (const auto& sample : somatic_samples) {
utils::append(get_called_alleles(somatic, sample, true).first, somatic_sample_alleles);
utils::append(extract_called_alleles(somatic, sample), somatic_sample_alleles);
}
for (const auto& sample : normal_samples) {
utils::append(get_called_alleles(somatic, sample, true).first, normal_sample_alleles);
utils::append(extract_called_alleles(somatic, sample), normal_sample_alleles);
}
sort_unique(somatic_sample_alleles); sort_unique(normal_sample_alleles);
std::vector<Allele> result {};
Expand Down
2 changes: 1 addition & 1 deletion src/core/csr/measures/read_end_bias.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ Measure::ResultType ReadEndBias::do_evaluate(const VcfRecord& call, const FacetM
const EndDefinition end_def {end_fraction_};
for (const auto& sample : samples) {
std::vector<Allele> alleles; bool has_ref;
std::tie(alleles, has_ref) = get_called_alleles(call, sample, true);
std::tie(alleles, has_ref) = get_called_alleles(call, sample);
if (!alleles.empty()) {
const auto allele_support = compute_allele_support(alleles, assignments, sample);
result.push_back(calculate_max_tail_bias(allele_support, end_def));
Expand Down
2 changes: 1 addition & 1 deletion src/core/csr/measures/read_side_bias.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ Measure::ResultType ReadSideBias::do_evaluate(const VcfRecord& call, const Facet
result.reserve(samples.size());
for (const auto& sample : samples) {
std::vector<Allele> alleles; bool has_ref;
std::tie(alleles, has_ref) = get_called_alleles(call, sample, true);
std::tie(alleles, has_ref) = get_called_alleles(call, sample);
if (!alleles.empty()) {
const auto allele_support = compute_allele_support(alleles, assignments, sample);
auto position_bias = calculate_position_bias(allele_support);
Expand Down
2 changes: 1 addition & 1 deletion src/core/csr/measures/read_tail_bias.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ Measure::ResultType ReadTailBias::do_evaluate(const VcfRecord& call, const Facet
const TailDefinition tail_def {tail_fraction_};
for (const auto& sample : samples) {
std::vector<Allele> alleles; bool has_ref;
std::tie(alleles, has_ref) = get_called_alleles(call, sample, true);
std::tie(alleles, has_ref) = get_called_alleles(call, sample);
if (!alleles.empty()) {
const auto allele_support = compute_allele_support(alleles, assignments, sample);
result.push_back(calculate_max_tail_bias(allele_support, tail_def));
Expand Down
2 changes: 1 addition & 1 deletion src/core/csr/measures/strand_bias.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ Measure::ResultType StrandBias::do_evaluate(const VcfRecord& call, const FacetMa
boost::optional<double> sample_result {};
if (is_evaluable(call, sample)) {
std::vector<Allele> alleles; bool has_ref;
std::tie(alleles, has_ref) = get_called_alleles(call, sample, true);
std::tie(alleles, has_ref) = get_called_alleles(call, sample);
assert(!alleles.empty());
const auto sample_allele_support = compute_allele_support(alleles, assignments, sample);
const auto direction_counts = get_direction_counts(sample_allele_support, mapped_region(call));
Expand Down
2 changes: 1 addition & 1 deletion src/core/csr/measures/supplementary_fraction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Measure::ResultType SupplementaryFraction::do_evaluate(const VcfRecord& call, co
result.reserve(samples.size());
for (const auto& sample : samples) {
std::vector<Allele> alleles; bool has_ref;
std::tie(alleles, has_ref) = get_called_alleles(call, sample, true);
std::tie(alleles, has_ref) = get_called_alleles(call, sample);
if (has_ref) alleles.erase(std::cbegin(alleles));
boost::optional<double> sample_result {};
if (!alleles.empty()) {
Expand Down
13 changes: 8 additions & 5 deletions src/utils/genotype_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ auto extract_genotype(const VcfRecord& call, const SampleName& sample)
} // namespace

std::pair<std::vector<Allele>, bool>
get_called_alleles(const VcfRecord& call, const VcfRecord::SampleName& sample, const bool trim_padding)
get_called_alleles(const VcfRecord& call, const VcfRecord::SampleName& sample, const ReferencePadPolicy ref_pad_policy)
{
auto genotype = get_genotype(call, sample);
remove_missing_alleles(genotype);
Expand All @@ -182,7 +182,7 @@ get_called_alleles(const VcfRecord& call, const VcfRecord::SampleName& sample, c
std::vector<Allele> result {};
result.reserve(genotype.size());
bool has_ref {false};
if (trim_padding) {
if (ref_pad_policy != ReferencePadPolicy::leave) {
auto first_itr = std::begin(genotype);
const auto ref_itr = std::find(first_itr, std::end(genotype), call.ref());
if (ref_itr != std::end(genotype)) {
Expand Down Expand Up @@ -214,9 +214,12 @@ get_called_alleles(const VcfRecord& call, const VcfRecord::SampleName& sample, c
}
if (has_ref) {
auto& ref = genotype.front();
ref.erase(std::cbegin(ref), std::next(std::cbegin(ref), *max_ref_pad));
auto allele_region = expand_lhs(call_region, -*max_ref_pad);
result.emplace_back(std::move(allele_region), std::move(ref));
if (ref_pad_policy == ReferencePadPolicy::trim_all_alleles) {
ref.erase(std::cbegin(ref), std::next(std::cbegin(ref), *max_ref_pad));
result.emplace_back(expand_lhs(call_region, -*max_ref_pad), std::move(ref));
} else {
result.emplace_back(call_region, std::move(ref));
}
std::rotate(std::rbegin(result), std::next(std::rbegin(result)), std::rend(result));
}
if (!unknwown_pad_allele_indices.empty()) {
Expand Down
16 changes: 10 additions & 6 deletions src/utils/genotype_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,17 @@ class GenomicRegion;

using GenotypeMap = std::unordered_map<SampleName, MappableFlatSet<Genotype<Haplotype>>>;

std::pair<std::vector<Allele>, bool>
get_called_alleles(const VcfRecord& call, const VcfRecord::SampleName& sample, const bool trim_padding = true);
enum class ReferencePadPolicy { leave, trim_alt_alleles, trim_all_alleles };

GenotypeMap extract_genotypes(const std::vector<VcfRecord>& calls,
const std::vector<VcfRecord::SampleName>& samples,
const ReferenceGenome& reference,
boost::optional<GenomicRegion> call_region = boost::none);
std::pair<std::vector<Allele>, bool>
get_called_alleles(const VcfRecord& call, const VcfRecord::SampleName& sample,
ReferencePadPolicy ref_pad_policy = ReferencePadPolicy::trim_all_alleles);

GenotypeMap
extract_genotypes(const std::vector<VcfRecord>& calls,
const std::vector<VcfRecord::SampleName>& samples,
const ReferenceGenome& reference,
boost::optional<GenomicRegion> call_region = boost::none);

}

Expand Down

0 comments on commit a1262e2

Please sign in to comment.