Skip to content

Commit

Permalink
Remove disallow_inferior_barcodes as an option, it is now always on f…
Browse files Browse the repository at this point in the history
…or dual-ended kits
  • Loading branch information
MarkBicknellONT committed Nov 15, 2024
1 parent b8c9f98 commit cc32c93
Show file tree
Hide file tree
Showing 7 changed files with 17 additions and 40 deletions.
7 changes: 0 additions & 7 deletions dorado/cli/basecaller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,11 +225,6 @@ void set_dorado_basecaller_args(utils::arg_parse::ArgParser& parser, int& verbos
.help("Require both ends of a read to be barcoded for a double ended barcode.")
.default_value(false)
.implicit_value(true);
parser.visible.add_argument("--disallow-inferior-barcodes")
.help("Declassify reads if a better barcode match over the threshold exists at "
"either end of the read for double ended kits.")
.default_value(false)
.implicit_value(true);
parser.visible.add_argument("--barcode-arrangement")
.help("Path to file with custom barcode arrangement. Requires --kit-name.");
parser.visible.add_argument("--barcode-sequences")
Expand Down Expand Up @@ -751,8 +746,6 @@ int basecaller(int argc, char* argv[]) {
barcoding_info = std::make_shared<demux::BarcodingInfo>();
barcoding_info->kit_name = parser.visible.get<std::string>("--kit-name");
barcoding_info->barcode_both_ends = parser.visible.get<bool>("--barcode-both-ends");
barcoding_info->disallow_inferior_barcodes =
parser.visible.get<bool>("--disallow-inferior-barcodes");
barcoding_info->trim = !no_trim_barcodes;

std::optional<std::string> custom_seqs =
Expand Down
6 changes: 0 additions & 6 deletions dorado/cli/demux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ std::shared_ptr<const dorado::demux::BarcodingInfo> get_barcoding_info(
return nullptr;
}
result->barcode_both_ends = parser.visible.get<bool>("--barcode-both-ends");
result->disallow_inferior_barcodes = parser.visible.get<bool>("--disallow-inferior-barcodes");
result->trim = !parser.visible.get<bool>("--no-trim");
if (sample_sheet) {
result->allowed_barcodes = sample_sheet->get_barcode_values();
Expand Down Expand Up @@ -155,11 +154,6 @@ int demuxer(int argc, char* argv[]) {
.help("Require both ends of a read to be barcoded for a double ended barcode.")
.default_value(false)
.implicit_value(true);
parser.visible.add_argument("--disallow-inferior-barcodes")
.help("Declassify reads if a better barcode match over the threshold exists at either "
"end of the read for double ended kits.")
.default_value(false)
.implicit_value(true);
parser.visible.add_argument("--no-trim")
.help("Skip barcode trimming. If this option is not chosen, trimming is enabled. "
"Note that you should use this option if your input data is mapped and you "
Expand Down
8 changes: 3 additions & 5 deletions dorado/demux/BarcodeClassifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,9 @@ BarcodeClassifier::~BarcodeClassifier() = default;

BarcodeScoreResult BarcodeClassifier::barcode(const std::string& seq,
bool barcode_both_ends,
bool disallow_inferior_barcodes,
const BarcodeFilterSet& allowed_barcodes) const {
auto best_barcode = find_best_barcode(seq, m_barcode_candidates, barcode_both_ends,
disallow_inferior_barcodes, allowed_barcodes);
auto best_barcode =
find_best_barcode(seq, m_barcode_candidates, barcode_both_ends, allowed_barcodes);
return best_barcode;
}

Expand Down Expand Up @@ -822,7 +821,6 @@ BarcodeScoreResult BarcodeClassifier::find_best_barcode(
const std::string& read_seq,
const std::vector<BarcodeCandidateKit>& candidates,
bool barcode_both_ends,
bool disallow_inferior_barcodes,
const BarcodeFilterSet& allowed_barcodes) const {
if (read_seq.length() == 0) {
return UNCLASSIFIED;
Expand Down Expand Up @@ -961,7 +959,7 @@ BarcodeScoreResult BarcodeClassifier::find_best_barcode(
}
}

if (kit.double_ends && disallow_inferior_barcodes) {
if (kit.double_ends) {
// For more stringent classification, ensure that neither end of a read has a higher scoring
// barcode, if any of the barcodes at that end are better than the threshold.
auto best_top_result = std::min_element(
Expand Down
2 changes: 0 additions & 2 deletions dorado/demux/BarcodeClassifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ class BarcodeClassifier {

BarcodeScoreResult barcode(const std::string& seq,
bool barcode_both_ends,
bool disallow_inferior_barcodes,
const BarcodeFilterSet& allowed_barcodes) const;

private:
Expand Down Expand Up @@ -57,7 +56,6 @@ class BarcodeClassifier {
BarcodeScoreResult find_best_barcode(const std::string& read_seq,
const std::vector<BarcodeCandidateKit>& adapter,
bool barcode_both_ends,
bool disallow_inferior_barcodes,
const BarcodeFilterSet& allowed_barcodes) const;
};

Expand Down
1 change: 0 additions & 1 deletion dorado/demux/barcoding_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ namespace dorado::demux {
struct BarcodingInfo {
std::string kit_name;
bool barcode_both_ends{false};
bool disallow_inferior_barcodes{false};
bool trim{false};
BarcodeFilterSet allowed_barcodes;
};
Expand Down
2 changes: 0 additions & 2 deletions dorado/read_pipeline/BarcodeClassifierNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ void BarcodeClassifierNode::barcode(BamMessage& read, const demux::BarcodingInfo
}

auto bc_res = barcoder->barcode(seq, barcoding_info->barcode_both_ends,
barcoding_info->disallow_inferior_barcodes,
barcoding_info->allowed_barcodes);
auto bc = generate_barcode_string(bc_res);
read.barcoding_result = std::make_shared<BarcodeScoreResult>(std::move(bc_res));
Expand Down Expand Up @@ -122,7 +121,6 @@ void BarcodeClassifierNode::barcode(SimplexRead& read) {

// get the sequence to map from the record
auto bc_res = barcoder->barcode(read.read_common.seq, barcoding_info->barcode_both_ends,
barcoding_info->disallow_inferior_barcodes,
barcoding_info->allowed_barcodes);
read.read_common.barcode = generate_barcode_string(bc_res);
spdlog::trace("Barcode for {} is {}", read.read_common.read_id, read.read_common.barcode);
Expand Down
31 changes: 14 additions & 17 deletions tests/BarcodeClassifierTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,14 @@ namespace {
std::shared_ptr<const demux::BarcodingInfo> create_barcoding_info(
const std::string& kit_name,
bool barcode_both_ends,
bool disallow_inferior_barcodes,
bool trim_barcode,
BarcodeFilterSet allowed_barcodes) {
if (kit_name.empty()) {
return {};
}

auto result = demux::BarcodingInfo{kit_name, barcode_both_ends, disallow_inferior_barcodes,
trim_barcode, std::move(allowed_barcodes)};
auto result = demux::BarcodingInfo{kit_name, barcode_both_ends, trim_barcode,
std::move(allowed_barcodes)};
return std::make_shared<demux::BarcodingInfo>(std::move(result));
}

Expand Down Expand Up @@ -82,7 +81,7 @@ TEST_CASE("BarcodeClassifier: test single ended barcode", TEST_GROUP) {
while (reader.read()) {
auto seqlen = reader.record->core.l_qseq;
std::string seq = utils::extract_sequence(reader.record.get());
auto res = classifier.barcode(seq, false, false, std::nullopt);
auto res = classifier.barcode(seq, false, std::nullopt);
if (res.barcode_name == "unclassified") {
CHECK(bc == res.barcode_name);
} else {
Expand All @@ -109,7 +108,7 @@ TEST_CASE("BarcodeClassifier: test double ended barcode", TEST_GROUP) {
while (reader.read()) {
auto seqlen = reader.record->core.l_qseq;
std::string seq = utils::extract_sequence(reader.record.get());
auto res = classifier.barcode(seq, false, false, std::nullopt);
auto res = classifier.barcode(seq, false, std::nullopt);
if (res.barcode_name == "unclassified") {
CHECK(bc == res.barcode_name);
} else {
Expand Down Expand Up @@ -138,7 +137,7 @@ TEST_CASE("BarcodeClassifier: test double ended barcode with different variants"
while (reader.read()) {
auto seqlen = reader.record->core.l_qseq;
std::string seq = utils::extract_sequence(reader.record.get());
auto res = classifier.barcode(seq, false, false, std::nullopt);
auto res = classifier.barcode(seq, false, std::nullopt);
if (res.barcode_name == "unclassified") {
CHECK(bc == res.barcode_name);
} else {
Expand All @@ -165,8 +164,8 @@ TEST_CASE("BarcodeClassifier: check barcodes on both ends - failing case", TEST_
HtsReader reader(bc_file.string(), std::nullopt);
while (reader.read()) {
std::string seq = utils::extract_sequence(reader.record.get());
auto single_end_res = classifier.barcode(seq, false, false, std::nullopt);
auto double_end_res = classifier.barcode(seq, true, false, std::nullopt);
auto single_end_res = classifier.barcode(seq, false, std::nullopt);
auto double_end_res = classifier.barcode(seq, true, std::nullopt);
CHECK(double_end_res.barcode_name == "unclassified");
CHECK(single_end_res.barcode_name == "BC01");
}
Expand All @@ -182,8 +181,8 @@ TEST_CASE("BarcodeClassifier: check barcodes on both ends - passing case", TEST_
HtsReader reader(bc_file.string(), std::nullopt);
while (reader.read()) {
std::string seq = utils::extract_sequence(reader.record.get());
auto single_end_res = classifier.barcode(seq, false, false, std::nullopt);
auto double_end_res = classifier.barcode(seq, true, false, std::nullopt);
auto single_end_res = classifier.barcode(seq, false, std::nullopt);
auto double_end_res = classifier.barcode(seq, true, std::nullopt);
CHECK(double_end_res.barcode_name == single_end_res.barcode_name);
CHECK(single_end_res.barcode_name == "BC01");
}
Expand All @@ -199,7 +198,7 @@ TEST_CASE("BarcodeClassifier: check presence of midstrand barcode double ended k
HtsReader reader(bc_file.string(), std::nullopt);
while (reader.read()) {
std::string seq = utils::extract_sequence(reader.record.get());
auto res = classifier.barcode(seq, false, false, std::nullopt);
auto res = classifier.barcode(seq, false, std::nullopt);
CHECK(res.barcode_name == "unclassified");
CHECK(res.found_midstrand);
}
Expand All @@ -215,7 +214,7 @@ TEST_CASE("BarcodeClassifier: check presence of midstrand barcode single ended k
HtsReader reader(bc_file.string(), std::nullopt);
while (reader.read()) {
std::string seq = utils::extract_sequence(reader.record.get());
auto res = classifier.barcode(seq, false, false, std::nullopt);
auto res = classifier.barcode(seq, false, std::nullopt);
CHECK(res.barcode_name == "unclassified");
CHECK(res.found_midstrand);
}
Expand Down Expand Up @@ -255,8 +254,7 @@ TEST_CASE(
read->read_common.model_stride = stride;

auto client_info = std::make_shared<dorado::DefaultClientInfo>();
auto barcoding_info =
create_barcoding_info(kit, barcode_both_ends, false, !no_trim, std::nullopt);
auto barcoding_info = create_barcoding_info(kit, barcode_both_ends, !no_trim, std::nullopt);
client_info->contexts().register_context<const demux::BarcodingInfo>(std::move(barcoding_info));
read->read_common.client_info = client_info;

Expand Down Expand Up @@ -404,8 +402,7 @@ TEST_CASE("BarcodeClassifierNode: test for proper trimming and alignment data st
reader.read();

auto client_info = std::make_shared<dorado::DefaultClientInfo>();
auto barcoding_info =
create_barcoding_info(kit, barcode_both_ends, false, !no_trim, std::nullopt);
auto barcoding_info = create_barcoding_info(kit, barcode_both_ends, !no_trim, std::nullopt);
client_info->contexts().register_context<const demux::BarcodingInfo>(std::move(barcoding_info));

BamPtr read1(bam_dup1(reader.record.get()));
Expand Down Expand Up @@ -512,7 +509,7 @@ TEST_CASE("BarcodeClassifier: test custom kit with double ended barcode", TEST_G
while (reader.read()) {
auto seqlen = reader.record->core.l_qseq;
std::string seq = utils::extract_sequence(reader.record.get());
auto res = classifier.barcode(seq, false, false, std::nullopt);
auto res = classifier.barcode(seq, false, std::nullopt);
if (res.barcode_name == "unclassified") {
CHECK(bc == res.barcode_name);
} else {
Expand Down

0 comments on commit cc32c93

Please sign in to comment.