diff --git a/CMakeLists.txt b/CMakeLists.txt index a07930a1a..6e4c22ce2 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -357,6 +357,8 @@ if(NOT DORADO_DISABLE_DORADO) add_executable(dorado dorado/main.cpp dorado/cli/aligner.cpp + dorado/cli/basecall_output_args.cpp + dorado/cli/basecall_output_args.h dorado/cli/demux.cpp dorado/cli/duplex.cpp dorado/cli/trim.cpp diff --git a/dorado/cli/basecall_output_args.cpp b/dorado/cli/basecall_output_args.cpp new file mode 100644 index 000000000..62decb0e2 --- /dev/null +++ b/dorado/cli/basecall_output_args.cpp @@ -0,0 +1,165 @@ +#include "basecall_output_args.h" + +#include "utils/tty_utils.h" + +#include +#include +#include +#include +#include + +using OutputMode = dorado::utils::HtsFile::OutputMode; +namespace fs = std::filesystem; + +namespace dorado::cli { + +namespace { + +constexpr std::string_view OUTPUT_DIR_ARG{"--output-dir"}; +constexpr std::string_view EMIT_FASTQ_ARG{"--emit-fastq"}; +constexpr std::string_view EMIT_SAM_ARG{"--emit-sam"}; + +constexpr std::string_view OUTPUT_FILE_PREFIX{"calls_"}; +constexpr std::string_view FASTQ_EXT{".fastq"}; +constexpr std::string_view SAM_EXT{".sam"}; +constexpr std::string_view BAM_EXT{".bam"}; + +bool try_create_output_folder(const std::string& output_folder) { + std::error_code creation_error; + // N.B. No error code if folder already exists. + fs::create_directories(fs::path{output_folder}, creation_error); + if (creation_error) { + spdlog::error("Unable to create output folder {}. ErrorCode({}) {}", output_folder, + creation_error.value(), creation_error.message()); + return false; + } + return true; +} + +std::tm get_gmtime(const std::time_t* time) { + // gmtime is not threadsafe, so lock. + static std::mutex gmtime_mutex; + std::lock_guard lock(gmtime_mutex); + std::tm* time_buffer = gmtime(time); + return *time_buffer; +} + +class HtsFileCreator { + const bool m_emit_fastq; + const bool m_emit_sam; + const bool m_reference_requested; + std::optional m_output_dir; + + std::string m_output_file{}; + OutputMode m_output_mode{OutputMode::BAM}; + + bool is_output_to_file() const { return m_output_file != "-"; } + + bool is_sam_or_bam_output() const { + return m_output_mode == OutputMode::BAM || m_output_mode == OutputMode::SAM; + } + + std::string_view get_output_file_extension() { + if (m_emit_fastq) { + return FASTQ_EXT; + } + if (m_emit_sam) { + return SAM_EXT; + } + return BAM_EXT; + } + + std::string get_output_filename() { + time_t time_now = time(nullptr); + std::tm gm_time_now = get_gmtime(&time_now); + char timestamp_buffer[32]; + strftime(timestamp_buffer, 32, "%F_T%H-%M-%S", &gm_time_now); + + std::ostringstream oss{}; + oss << OUTPUT_FILE_PREFIX << timestamp_buffer << get_output_file_extension(); + return oss.str(); + } + + bool try_set_output_file() { + if (!m_output_dir) { + m_output_file = "-"; + return true; + } + if (!try_create_output_folder(*m_output_dir)) { + return false; + } + + m_output_file = (fs::path(*m_output_dir) / get_output_filename()).string(); + return true; + } + + bool try_set_output_mode() { + if (m_emit_fastq) { + if (m_emit_sam) { + spdlog::error("Only one of --emit-{fastq, sam} can be set (or none)."); + return false; + } + if (m_reference_requested) { + spdlog::error( + "--emit-fastq cannot be used with --reference as FASTQ cannot store " + "alignment results."); + return false; + } + spdlog::info( + " - Note: FASTQ output is not recommended as not all data can be preserved."); + m_output_mode = OutputMode::FASTQ; + } else if (m_emit_sam || (m_output_file == "-" && utils::is_fd_tty(stdout))) { + m_output_mode = OutputMode::SAM; + } else if (m_output_file == "-" && utils::is_fd_pipe(stdout)) { + m_output_mode = OutputMode::UBAM; + } else { + m_output_mode = OutputMode::BAM; + } + + return true; + } + +public: + HtsFileCreator(const utils::arg_parse::ArgParser& parser) + : m_emit_fastq(parser.visible.get(EMIT_FASTQ_ARG)), + m_emit_sam(parser.visible.get(EMIT_SAM_ARG)), + m_reference_requested(!parser.visible.get("--reference").empty()), + m_output_dir(parser.visible.present(OUTPUT_DIR_ARG)) {} + + std::unique_ptr create() { + if (!try_set_output_file()) { + return nullptr; + } + if (!try_set_output_mode()) { + return nullptr; + } + + // If writing to a SAM/BAM file and there's a reference then we will sort. + auto sort_bam = is_output_to_file() && is_sam_or_bam_output() && m_reference_requested; + + return std::make_unique(m_output_file, m_output_mode, 0, sort_bam); + } +}; + +} // namespace + +std::unique_ptr extract_hts_file(const utils::arg_parse::ArgParser& parser) { + HtsFileCreator hts_file_creator(parser); + return hts_file_creator.create(); +} + +void add_basecaller_output_arguments(utils::arg_parse::ArgParser& parser) { + parser.visible.add_argument(EMIT_FASTQ_ARG) + .help("Output in fastq format.") + .default_value(false) + .implicit_value(true); + parser.visible.add_argument(EMIT_SAM_ARG) + .help("Output in SAM format.") + .default_value(false) + .implicit_value(true); + parser.visible.add_argument("-o", OUTPUT_DIR_ARG) + .help("Optional output folder, if specified output will be written to a calls file " + "(calls_.sam|.bam|.fastq) in the given folder."); +} + +} // namespace dorado::cli \ No newline at end of file diff --git a/dorado/cli/basecall_output_args.h b/dorado/cli/basecall_output_args.h new file mode 100644 index 000000000..5d58c7c68 --- /dev/null +++ b/dorado/cli/basecall_output_args.h @@ -0,0 +1,14 @@ +#pragma once + +#include "utils/arg_parse_ext.h" +#include "utils/hts_file.h" + +#include + +namespace dorado::cli { + +void add_basecaller_output_arguments(utils::arg_parse::ArgParser& parser); + +std::unique_ptr extract_hts_file(const utils::arg_parse::ArgParser& parser); + +} // namespace dorado::cli \ No newline at end of file diff --git a/dorado/cli/basecaller.cpp b/dorado/cli/basecaller.cpp index 4e0356e9d..b0c0d06e6 100644 --- a/dorado/cli/basecaller.cpp +++ b/dorado/cli/basecaller.cpp @@ -2,6 +2,7 @@ #include "api/pipeline_creation.h" #include "api/runner_creation.h" #include "basecall/CRFModelConfig.h" +#include "basecall_output_args.h" #include "cli/cli_utils.h" #include "data_loader/DataLoader.h" #include "demux/adapter_info.h" @@ -59,6 +60,12 @@ #include #include +using dorado::utils::default_parameters; +using OutputMode = dorado::utils::HtsFile::OutputMode; +using namespace std::chrono_literals; +using namespace dorado::models; +namespace fs = std::filesystem; + namespace dorado { namespace { @@ -113,12 +120,6 @@ void set_basecaller_params(const argparse::ArgumentParser& arg, } // namespace -using dorado::utils::default_parameters; -using OutputMode = dorado::utils::HtsFile::OutputMode; -using namespace std::chrono_literals; -using namespace dorado::models; -namespace fs = std::filesystem; - void set_dorado_basecaller_args(utils::arg_parse::ArgParser& parser, int& verbosity) { parser.visible.add_argument("model").help( "model selection {fast,hac,sup}@v{version} for automatic model selection including " @@ -200,15 +201,6 @@ void set_dorado_basecaller_args(utils::arg_parse::ArgParser& parser, int& verbos .help("the minimum predicted methylation probability for a modified base to be emitted " "in an all-context model, [0, 1]"); - parser.visible.add_argument("--emit-fastq") - .help("Output in fastq format.") - .default_value(false) - .implicit_value(true); - parser.visible.add_argument("--emit-sam") - .help("Output in SAM format.") - .default_value(false) - .implicit_value(true); - parser.visible.add_argument("--emit-moves").default_value(false).implicit_value(true); parser.visible.add_argument("--reference") @@ -260,6 +252,7 @@ void set_dorado_basecaller_args(utils::arg_parse::ArgParser& parser, int& verbos .help("Configuration file for PolyA estimation to change default behaviours") .default_value(std::string("")); + cli::add_basecaller_output_arguments(parser); cli::add_internal_arguments(parser); } @@ -274,7 +267,7 @@ void setup(const std::vector& args, size_t remora_batch_size, size_t num_remora_threads, float methylation_threshold_pct, - OutputMode output_mode, + std::unique_ptr hts_file, bool emit_moves, size_t max_reads, size_t min_qscore, @@ -416,14 +409,14 @@ void setup(const std::vector& args, utils::add_rg_headers(hdr.get(), read_groups); } - utils::HtsFile hts_file("-", output_mode, thread_allocations.writer_threads, false); + hts_file->set_num_threads(thread_allocations.writer_threads); PipelineDescriptor pipeline_desc; std::string gpu_names{}; #if DORADO_CUDA_BUILD gpu_names = utils::get_cuda_gpu_names(device); #endif - auto hts_writer = pipeline_desc.add_node({}, hts_file, gpu_names); + auto hts_writer = pipeline_desc.add_node({}, *hts_file, gpu_names); auto aligner = PipelineDescriptor::InvalidNodeHandle; auto current_sink_node = hts_writer; if (enable_aligner) { @@ -494,7 +487,7 @@ void setup(const std::vector& args, const auto& aligner_ref = dynamic_cast(pipeline->get_node_ref(aligner)); utils::add_sq_hdr(hdr.get(), aligner_ref.get_sequence_records_for_header()); } - hts_file.set_header(hdr.get()); + hts_file->set_header(hdr.get()); std::unordered_set reads_already_processed; if (!resume_from_file.empty()) { @@ -542,7 +535,7 @@ void setup(const std::vector& args, } // If we're doing alignment, post-processing takes longer due to bam file sorting. - float post_processing_percentage = (hts_file.finalise_is_noop() || ref.empty()) ? 0.0f : 0.5f; + float post_processing_percentage = (hts_file->finalise_is_noop() || ref.empty()) ? 0.0f : 0.5f; ProgressTracker tracker(int(num_reads), false, post_processing_percentage); tracker.set_description("Basecalling"); @@ -576,7 +569,7 @@ void setup(const std::vector& args, // Report progress during output file finalisation. tracker.set_description("Sorting output files"); - hts_file.finalise([&](size_t progress) { + hts_file->finalise([&](size_t progress) { tracker.update_post_processing_progress(static_cast(progress)); }); @@ -635,41 +628,24 @@ int basecaller(int argc, char* argv[]) { return EXIT_FAILURE; } - auto output_mode = OutputMode::BAM; - - auto emit_fastq = parser.visible.get("--emit-fastq"); - auto emit_sam = parser.visible.get("--emit-sam"); - - if (emit_fastq && emit_sam) { - spdlog::error("Only one of --emit-{fastq, sam} can be set (or none)."); - return EXIT_FAILURE; - } - if (parser.visible.get("--reference").empty() && !parser.visible.get("--bed-file").empty()) { spdlog::error("--bed-file cannot be used without --reference."); return EXIT_FAILURE; } - if (emit_fastq) { + auto hts_file = cli::extract_hts_file(parser); + if (!hts_file) { + return EXIT_FAILURE; + } + + if (hts_file->get_output_mode() == OutputMode::FASTQ) { if (model_selection.has_mods_variant() || !mod_bases.empty() || !mod_bases_models.empty()) { spdlog::error( "--emit-fastq cannot be used with modbase models as FASTQ cannot store modbase " "results."); return EXIT_FAILURE; } - if (!parser.visible.get("--reference").empty()) { - spdlog::error( - "--emit-fastq cannot be used with --reference as FASTQ cannot store alignment " - "results."); - return EXIT_FAILURE; - } - spdlog::info(" - Note: FASTQ output is not recommended as not all data can be preserved."); - output_mode = OutputMode::FASTQ; - } else if (emit_sam || utils::is_fd_tty(stdout)) { - output_mode = OutputMode::SAM; - } else if (utils::is_fd_pipe(stdout)) { - output_mode = OutputMode::UBAM; } bool no_trim_barcodes = false, no_trim_primers = false, no_trim_adapters = false; @@ -781,7 +757,7 @@ int basecaller(int argc, char* argv[]) { parser.visible.get("--reference"), parser.visible.get("--bed-file"), default_parameters.num_runners, default_parameters.remora_batchsize, default_parameters.remora_threads, - methylation_threshold, output_mode, parser.visible.get("--emit-moves"), + methylation_threshold, std::move(hts_file), parser.visible.get("--emit-moves"), parser.visible.get("--max-reads"), parser.visible.get("--min-qscore"), parser.visible.get("--read-ids"), recursive, *minimap_options, parser.hidden.get("--skip-model-compatibility-check"), diff --git a/dorado/cli/cli_utils.h b/dorado/cli/cli_utils.h index d745d6fca..4362e1888 100644 --- a/dorado/cli/cli_utils.h +++ b/dorado/cli/cli_utils.h @@ -6,9 +6,6 @@ #include "utils/arg_parse_ext.h" #include "utils/bam_utils.h" -#include -#include - #ifdef _WIN32 // Unreachable code warnings are emitted from argparse, even though they should be disabled by the // MSVC /external:W0 setting. This is a limitation of /external: for some C47XX backend warnings. See: @@ -34,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/dorado/cli/duplex.cpp b/dorado/cli/duplex.cpp index 4523e75da..1fa2d38f9 100644 --- a/dorado/cli/duplex.cpp +++ b/dorado/cli/duplex.cpp @@ -2,6 +2,7 @@ #include "api/pipeline_creation.h" #include "api/runner_creation.h" #include "basecall/CRFModelConfig.h" +#include "cli/basecall_output_args.h" #include "cli/cli_utils.h" #include "data_loader/DataLoader.h" #include "data_loader/ModelFinder.h" @@ -255,11 +256,6 @@ int duplex(int argc, char* argv[]) { .default_value(std::string("")) .help("Space-delimited csv containing read ID pairs. If not provided, pairing will be " "performed automatically"); - parser.visible.add_argument("--emit-fastq").default_value(false).implicit_value(true); - parser.visible.add_argument("--emit-sam") - .help("Output in SAM format.") - .default_value(false) - .implicit_value(true); parser.visible.add_argument("-t", "--threads").default_value(0).scan<'i', int>(); parser.visible.add_argument("-x", "--device") @@ -333,6 +329,7 @@ int duplex(int argc, char* argv[]) { .help("the minimum predicted methylation probability for a modified base to be emitted " "in an all-context model, [0, 1]"); + cli::add_basecaller_output_arguments(parser); cli::add_internal_arguments(parser); alignment::mm2::add_options_string_arg(parser); @@ -382,35 +379,12 @@ int duplex(int argc, char* argv[]) { auto output_mode = OutputMode::BAM; - auto emit_fastq = parser.visible.get("--emit-fastq"); - auto emit_sam = parser.visible.get("--emit-sam"); - - if (emit_fastq && emit_sam) { - throw std::runtime_error("Only one of --emit-{fastq, sam} can be set (or none)."); - } - if (parser.visible.get("--reference").empty() && !parser.visible.get("--bed-file").empty()) { spdlog::error("--bed-file cannot be used without --reference."); return EXIT_FAILURE; } - if (emit_fastq) { - if (!parser.visible.get("--reference").empty()) { - spdlog::error( - "--emit-fastq cannot be used with --reference as FASTQ cannot store " - "alignment results."); - return EXIT_FAILURE; - } - spdlog::info( - " - Note: FASTQ output is not recommended as not all data can be preserved."); - output_mode = OutputMode::FASTQ; - } else if (emit_sam || utils::is_fd_tty(stdout)) { - output_mode = OutputMode::SAM; - } else if (utils::is_fd_pipe(stdout)) { - output_mode = OutputMode::UBAM; - } - const std::string dump_stats_file = parser.hidden.get("--dump_stats_file"); const std::string dump_stats_filter = parser.hidden.get("--dump_stats_filter"); const size_t max_stats_records = static_cast(dump_stats_file.empty() ? 0 : 100000); @@ -432,8 +406,12 @@ int duplex(int argc, char* argv[]) { SamHdrPtr hdr(sam_hdr_init()); cli::add_pg_hdr(hdr.get(), "duplex", args, device); + auto hts_file = cli::extract_hts_file(parser); + if (!hts_file) { + return EXIT_FAILURE; + } constexpr int WRITER_THREADS = 4; - utils::HtsFile hts_file("-", output_mode, WRITER_THREADS, false); + hts_file->set_num_threads(WRITER_THREADS); PipelineDescriptor pipeline_desc; auto hts_writer = PipelineDescriptor::InvalidNodeHandle; @@ -444,7 +422,7 @@ int duplex(int argc, char* argv[]) { gpu_names = utils::get_cuda_gpu_names(device); #endif if (ref.empty()) { - hts_writer = pipeline_desc.add_node({}, hts_file, gpu_names); + hts_writer = pipeline_desc.add_node({}, *hts_file, gpu_names); converted_reads_sink = hts_writer; } else { std::string err_msg{}; @@ -464,7 +442,7 @@ int duplex(int argc, char* argv[]) { aligner = pipeline_desc.add_node({}, index_file_access, bed_file_access, ref, bed, *minimap_options, std::thread::hardware_concurrency()); - hts_writer = pipeline_desc.add_node({}, hts_file, gpu_names); + hts_writer = pipeline_desc.add_node({}, *hts_file, gpu_names); pipeline_desc.add_node_sink(aligner, hts_writer); converted_reads_sink = aligner; } @@ -478,7 +456,7 @@ int duplex(int argc, char* argv[]) { read_ids_to_filter, 5); std::unique_ptr pipeline; - ProgressTracker tracker(int(num_reads), duplex, hts_file.finalise_is_noop() ? 0.f : 0.5f); + ProgressTracker tracker(int(num_reads), duplex, hts_file->finalise_is_noop() ? 0.f : 0.5f); tracker.set_description("Running duplex"); std::vector stats_callables; stats_callables.push_back( @@ -526,7 +504,7 @@ int duplex(int argc, char* argv[]) { } // Write header as no read group info is needed. - hts_file.set_header(hdr.get()); + hts_file->set_header(hdr.get()); stats_sampler = std::make_unique( kStatsPeriod, stats_reporters, stats_callables, max_stats_records); @@ -683,7 +661,7 @@ int duplex(int argc, char* argv[]) { dynamic_cast(pipeline->get_node_ref(aligner)); utils::add_sq_hdr(hdr.get(), aligner_ref.get_sequence_records_for_header()); } - hts_file.set_header(hdr.get()); + hts_file->set_header(hdr.get()); DataLoader loader(*pipeline, "cpu", num_devices, 0, std::move(read_list), {}); loader.add_read_initialiser(client_info_init_func); @@ -708,7 +686,7 @@ int duplex(int argc, char* argv[]) { // Report progress during output file finalisation. tracker.set_description("Sorting output files"); - hts_file.finalise([&](size_t progress) { + hts_file->finalise([&](size_t progress) { tracker.update_post_processing_progress(static_cast(progress)); }); diff --git a/dorado/utils/hts_file.cpp b/dorado/utils/hts_file.cpp index ec90aa1b7..a88d2a2b5 100644 --- a/dorado/utils/hts_file.cpp +++ b/dorado/utils/hts_file.cpp @@ -14,7 +14,9 @@ namespace { -constexpr size_t MINIMUM_BUFFER_SIZE = 100000ul; // The smallest allowed buffer size is 100 KB. +constexpr size_t MINIMUM_BUFFER_SIZE{100000}; // The smallest allowed buffer size is 100 KB. +constexpr size_t DEFAULT_BUFFER_SIZE{ + 20000000}; // Arbitrary 20 MB. Can be overridden by application code. } // namespace @@ -47,7 +49,7 @@ struct HtsFile::ProgressUpdater { } }; -HtsFile::HtsFile(const std::string& filename, OutputMode mode, size_t threads, bool sort_bam) +HtsFile::HtsFile(const std::string& filename, OutputMode mode, int threads, bool sort_bam) : m_filename(filename), m_threads(int(threads)), m_finalise_is_noop(true), @@ -68,6 +70,7 @@ HtsFile::HtsFile(const std::string& filename, OutputMode mode, size_t threads, b break; case OutputMode::BAM: if (m_filename != "-" && m_sort_bam) { + set_buffer_size(DEFAULT_BUFFER_SIZE); // We're doing sorted BAM output. We need to indicate this for the // finalise method. m_finalise_is_noop = false; @@ -87,20 +90,40 @@ HtsFile::HtsFile(const std::string& filename, OutputMode mode, size_t threads, b std::to_string(static_cast(m_mode))); } - if (m_finalise_is_noop) { - if (!m_file) { - throw std::runtime_error("Could not open file: " + m_filename); - } + if (m_threads > 0) { + initialise_threads(); + } +} - if (m_file->format.compression == bgzf) { - auto res = bgzf_mt(m_file->fp.bgzf, m_threads, 128); - if (res < 0) { - throw std::runtime_error("Could not enable multi threading for BAM generation."); - } +void HtsFile::initialise_threads() { + if (!m_finalise_is_noop) { + return; + } + if (!m_file) { + throw std::runtime_error("Could not open file: " + m_filename); + } + + if (m_file->format.compression == bgzf) { + auto res = bgzf_mt(m_file->fp.bgzf, m_threads, 128); + if (res < 0) { + throw std::runtime_error("Could not enable multi threading for BAM generation."); } } } +void HtsFile::set_num_threads(std::size_t threads) { + if (m_threads > 0) { + throw std::runtime_error("HtsFile num threads cannot be changed if already initialised"); + } + + if (threads < 1) { + throw std::runtime_error("HtsFile num threads must be greater than 0"); + } + + m_threads = static_cast(threads); + initialise_threads(); +} + HtsFile::~HtsFile() { if (!m_finalised) { spdlog::error("finalise() not called on a HtsFile."); diff --git a/dorado/utils/hts_file.h b/dorado/utils/hts_file.h index 09f9016c8..837cdcdd1 100644 --- a/dorado/utils/hts_file.h +++ b/dorado/utils/hts_file.h @@ -21,11 +21,14 @@ class HtsFile { using ProgressCallback = std::function; - HtsFile(const std::string& filename, OutputMode mode, size_t threads, bool sort_bam); + HtsFile(const std::string& filename, OutputMode mode, int threads, bool sort_bam); ~HtsFile(); HtsFile(const HtsFile&) = delete; HtsFile& operator=(const HtsFile&) = delete; + // Support for setting threads after construction + void set_num_threads(std::size_t threads); + void set_buffer_size(size_t buff_size); int set_header(const sam_hdr_t* header); int write(const bam1_t* record); @@ -58,6 +61,7 @@ class HtsFile { int write_to_file(const bam1_t* record); void cache_record(const bam1_t* record); bool merge_temp_files(ProgressUpdater& update_progress) const; + void initialise_threads(); }; } // namespace dorado::utils diff --git a/tests/HtsFileTest.cpp b/tests/HtsFileTest.cpp index b3cc1ce67..28b3141b8 100644 --- a/tests/HtsFileTest.cpp +++ b/tests/HtsFileTest.cpp @@ -1,16 +1,18 @@ #include "TestUtils.h" +#include "utils/PostCondition.h" #include "utils/hts_file.h" #include #include #include +#include #include #include #include #define TEST_GROUP "[hts_file]" -static constexpr size_t NUM_THREADS = 4; +static constexpr int NUM_THREADS = 4; namespace fs = std::filesystem; using namespace dorado; @@ -18,20 +20,20 @@ using utils::HtsFile; namespace { struct Tester { - fs::path file_in_path, file_out_path; + dorado::tests::TempDir output_test_dir; + fs::path file_in_path; + fs::path file_out_path; std::vector records; HtsFilePtr file_in; SamHdrPtr header_in, header_out; std::vector indices; - dorado::tests::TempDir output_test_dir; - Tester() : output_test_dir(tests::make_temp_dir("hts_writer_output")) {} + Tester() + : output_test_dir(tests::make_temp_dir("hts_writer_output")), + file_in_path(fs::path(get_data_dir("hts_file")) / "test_data.bam"), + file_out_path(output_test_dir.m_path / "test_output.bam") {} void read_input_records() { - fs::path hts_file_test_dir = fs::path(get_data_dir("hts_file")); - file_in_path = hts_file_test_dir / "test_data.bam"; - file_out_path = output_test_dir.m_path / "test_output.bam"; - // Read the test data into a vector of BAM records. file_in.reset(hts_open(file_in_path.string().c_str(), "r")); header_in.reset(sam_hdr_read(file_in.get())); @@ -125,3 +127,34 @@ TEST_CASE("HtsFileTest: Write to multiple sorted files, and merge", TEST_GROUP) tester.check_output(true); } + +TEST_CASE("HtsFileTest: construct with zero threads for sorted BAM does not throw", TEST_GROUP) { + Tester tester; + std::unique_ptr cut{}; + auto finalize_file = utils::PostCondition([&cut] { cut->finalise([](size_t) {}); }); + + REQUIRE_NOTHROW(cut = std::make_unique(tester.file_out_path.string(), + HtsFile::OutputMode::BAM, 0, true)); +} + +TEST_CASE("HtsFileTest: construct with zero threads for unsorted BAM does not throw", TEST_GROUP) { + Tester tester; + std::unique_ptr cut{}; + auto finalize_file = utils::PostCondition([&cut] { cut->finalise([](size_t) {}); }); + + REQUIRE_NOTHROW(cut = std::make_unique(tester.file_out_path.string(), + HtsFile::OutputMode::BAM, 0, false)); +} + +TEST_CASE( + "HtsFileTest: set_num_threads with 2 after constructed with zero threads for unsorted BAM " + "does not throw", + TEST_GROUP) { + Tester tester; + std::unique_ptr cut{}; + auto finalize_file = utils::PostCondition([&cut] { cut->finalise([](size_t) {}); }); + cut = std::make_unique(tester.file_out_path.string(), HtsFile::OutputMode::BAM, 0, + false); + + cut->set_num_threads(2); +}