diff --git a/build.py b/build.py index 1836662..d5dd724 100644 --- a/build.py +++ b/build.py @@ -58,7 +58,7 @@ cdef = [ "typedef struct { ...; } bam_fset;" - "bam_fset* create_bam_fset(char* fname);" + "bam_fset* create_bam_fset(char* fname, char* fasta_path);" "void destroy_bam_fset(bam_fset* fset);" ] for header in ('clair3_pileup.h', 'clair3_full_alignment.h'): diff --git a/preprocess/CreateTensorPileupFromCffi.py b/preprocess/CreateTensorPileupFromCffi.py index e609c23..fde6260 100644 --- a/preprocess/CreateTensorPileupFromCffi.py +++ b/preprocess/CreateTensorPileupFromCffi.py @@ -47,7 +47,7 @@ def pileup_counts_clair3( """ lib = libclair3.lib featlenclair3 = lib.featlenclair3 - bam = BAMHandler(bam) + bam = BAMHandler(bam, fasta) def _process_region(reg): # ctg start is 1-based, medaka.common.Region object is 0-based @@ -55,7 +55,7 @@ def _process_region(reg): if isinstance(bam, BAMHandler): bam_handle = bam else: - bam_handle = BAMHandler(bam) + bam_handle = BAMHandler(bam, fasta) with bam_handle.borrow() as fh: counts = lib.calculate_clair3_pileup( region_str.encode(), fh, fasta.encode(), min_depth, min_snp_af, min_indel_af, min_mq, max_indel_length, call_snp_only, max_depth, gvcf) @@ -88,7 +88,7 @@ def _process_region(reg): class BAMHandler(object): """Opening of BAM file handles and indices.""" - def __init__(self, bam, size=16): + def __init__(self, bam, fasta, size=16): """Initialise a pool of HTSlib filehandles.""" # note: the default size here is set to match the default # `bam_workers` of prediction.DataLoader and `workers` @@ -100,7 +100,7 @@ def __init__(self, bam, size=16): lib, ffi = libclair3.lib, libclair3.ffi for _ in range(size): fset = ffi.gc( - lib.create_bam_fset(self.bam.encode()), + lib.create_bam_fset(self.bam.encode(), fasta.encode()), self._destroy_fset) self._pool.put(fset) diff --git a/src/clair3_full_alignment.c b/src/clair3_full_alignment.c index 7a59b01..962bee1 100644 --- a/src/clair3_full_alignment.c +++ b/src/clair3_full_alignment.c @@ -404,6 +404,7 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length) bam_hdr_t *header; hts_file = sam_open(bam_path, "r"); + hts_set_opt(hts_file, CRAM_OPT_REFERENCE, fasta_path); idx = sam_index_load(hts_file, bam_path); header = sam_hdr_read(hts_file); const int tid = bam_name2id(header, chr); diff --git a/src/medaka_bamiter.c b/src/medaka_bamiter.c index c01e874..d6d4576 100644 --- a/src/medaka_bamiter.c +++ b/src/medaka_bamiter.c @@ -49,9 +49,10 @@ int read_bam(void *data, bam1_t *b) { // Initialise BAM file, index and header structures -bam_fset* create_bam_fset(const char* fname) { +bam_fset* create_bam_fset(const char* fname, const char* fasta_path) { bam_fset* fset = xalloc(1, sizeof(bam_fset), "bam fileset"); fset->fp = hts_open(fname, "rb"); + hts_set_opt(fset->fp, CRAM_OPT_REFERENCE, fasta_path); fset->idx = sam_index_load(fset->fp, fname); fset->hdr = sam_hdr_read(fset->fp); if (fset->hdr == 0 || fset->idx == 0 || fset->fp == 0) { diff --git a/src/medaka_bamiter.h b/src/medaka_bamiter.h index 100c632..3ea1100 100644 --- a/src/medaka_bamiter.h +++ b/src/medaka_bamiter.h @@ -25,7 +25,7 @@ typedef struct { // Initialise BAM file, index and header structures -bam_fset* create_bam_fset(const char* fname); +bam_fset* create_bam_fset(const char* fname, const char* fasta_path); // Destory BAM file, index and header structures void destroy_bam_fset(bam_fset* fset);