Skip to content

Commit

Permalink
Merge branch 'jdaw/fix-resume-feature' into 'master'
Browse files Browse the repository at this point in the history
[resume] Fix bug with resume file header parsing

Closes DOR-255

See merge request machine-learning/dorado!476
  • Loading branch information
vellamike committed Jul 12, 2023
2 parents f9289e2 + 2aba658 commit 9d55b44
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 4 deletions.
17 changes: 14 additions & 3 deletions dorado/cli/basecaller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ void setup(std::vector<std::string> args,
bool skip_model_compatibility_check,
const std::string& dump_stats_file,
const std::string& dump_stats_filter,
const std::string& resume_from_file) {
const std::string& resume_from_file,
argparse::ArgumentParser& resume_parser) {
torch::set_num_threads(1);

// create modbase runners first so basecall runners can pick batch sizes based on available memory
Expand Down Expand Up @@ -191,7 +192,12 @@ void setup(std::vector<std::string> args,
hts_set_log_level(initial_hts_log_level);

auto tokens = utils::extract_token_from_cli(pg_keys["CL"]);
auto resume_model_name = utils::extract_model_from_model_path(tokens[2]);
// First token is the dorado binary name. Remove that because the
// sub parser only knows about the `basecaller` command.
tokens.erase(tokens.begin());
resume_parser.parse_args(tokens);
auto resume_model_name =
utils::extract_model_from_model_path(resume_parser.get<std::string>("model"));
if (model_name != resume_model_name) {
throw std::runtime_error(
"Resume only works if the same model is used. Resume model was " +
Expand Down Expand Up @@ -336,6 +342,11 @@ int basecaller(int argc, char* argv[]) {

argparse::ArgumentParser internal_parser;

// Create a copy of the parser to use if the resume feature is enabled. Needed
// to parse the model used for the file being resumed from. Note that this copy
// needs to be made __before__ the parser is used.
argparse::ArgumentParser resume_parser = parser;

try {
auto remaining_args = parser.parse_known_args(argc, argv);
internal_parser = utils::parse_internal_options(remaining_args);
Expand Down Expand Up @@ -408,7 +419,7 @@ int basecaller(int argc, char* argv[]) {
internal_parser.get<bool>("--skip-model-compatibility-check"),
internal_parser.get<std::string>("--dump_stats_file"),
internal_parser.get<std::string>("--dump_stats_filter"),
parser.get<std::string>("--resume-from"));
parser.get<std::string>("--resume-from"), resume_parser);
} catch (const std::exception& e) {
spdlog::error("{}", e.what());
return 1;
Expand Down
2 changes: 1 addition & 1 deletion tests/test_simple_basecaller_execution.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ samtools view -h $output_dir/calls.bam > $output_dir/calls.sam
if command -v truncate > /dev/null
then
echo dorado basecaller resume feature
$dorado_bin basecaller ${model} $data_dir/multi_read_pod5 -b ${batch} > $output_dir/tmp.bam
$dorado_bin basecaller -b ${batch} ${model} $data_dir/multi_read_pod5 > $output_dir/tmp.bam
truncate -s 20K $output_dir/tmp.bam
$dorado_bin basecaller ${model} $data_dir/multi_read_pod5 -b ${batch} --resume-from $output_dir/tmp.bam > $output_dir/calls.bam
samtools quickcheck -u $output_dir/calls.bam
Expand Down

0 comments on commit 9d55b44

Please sign in to comment.