From 358a39850f483cc29d8b43264cb3c2bb024c86e7 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Fri, 15 Nov 2024 08:51:10 -0500 Subject: [PATCH 1/3] Allow passing read name to mappy (#1260) * Allow passing read name to mappy This adds the (optional) ability to pass the read name to the mappy `map` method. Without the read name, the call to `map` can sometimes give different output than the command line version of `minimap2` because of the way minimap uses the hash of the read name to break ties in ordering hits. This can affect which / if certain supplementary alignments are generated, and even which / if non-primary alignments are generated. * Pass name directly to mm_map_aux Get rid of additional function, and always accept the name parameter in the mm_map_aux function (can be nullptr if not available). --------- Co-authored-by: Rob Patro --- python/cmappy.h | 6 +++--- python/cmappy.pxd | 2 +- python/mappy.pyx | 15 ++++++++++++--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/python/cmappy.h b/python/cmappy.h index 6bc5635d..8428351b 100644 --- a/python/cmappy.h +++ b/python/cmappy.h @@ -71,13 +71,13 @@ static inline void mm_reset_timer(void) } extern unsigned char seq_comp_table[256]; -static inline mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt) +static inline mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char* seqname, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt) { mm_reg1_t *r; Py_BEGIN_ALLOW_THREADS if (seq2 == 0) { - r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, NULL); + r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, seqname); } else { int _n_regs[2]; mm_reg1_t *regs[2]; @@ -94,7 +94,7 @@ static inline mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const seq[1][i] = seq_comp_table[t]; } if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]]; - mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, NULL); + mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, seqname); for (i = 0; i < _n_regs[1]; ++i) regs[1][i].rev = !regs[1][i].rev; *n_regs = _n_regs[0] + _n_regs[1]; diff --git a/python/cmappy.pxd b/python/cmappy.pxd index 05bb7a9b..38432746 100644 --- a/python/cmappy.pxd +++ b/python/cmappy.pxd @@ -129,7 +129,7 @@ cdef extern from "cmappy.h": void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h) void mm_free_reg1(mm_reg1_t *r) - mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt) + mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char* seqname, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt) char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *l) mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int l) diff --git a/python/mappy.pyx b/python/mappy.pyx index 0a6b93cd..4a5adf89 100644 --- a/python/mappy.pyx +++ b/python/mappy.pyx @@ -163,7 +163,7 @@ cdef class Aligner: def __bool__(self): return (self._idx != NULL) - def map(self, seq, seq2=None, buf=None, cs=False, MD=False, max_frag_len=None, extra_flags=None): + def map(self, seq, seq2=None, name=None, buf=None, cs=False, MD=False, max_frag_len=None, extra_flags=None): cdef cmappy.mm_reg1_t *regs cdef cmappy.mm_hitpy_t h cdef ThreadBuffer b @@ -185,11 +185,20 @@ cdef class Aligner: km = cmappy.mm_tbuf_get_km(b._b) _seq = seq if isinstance(seq, bytes) else seq.encode() + if name is not None: + _name = name if isinstance(name, bytes) else name.encode() + if seq2 is None: - regs = cmappy.mm_map_aux(self._idx, _seq, NULL, &n_regs, b._b, &map_opt) + if name is None: + regs = cmappy.mm_map_aux(self._idx, NULL, _seq, NULL, &n_regs, b._b, &map_opt) + else: + regs = cmappy.mm_map_aux(self._idx, _name, _seq, NULL, &n_regs, b._b, &map_opt) else: _seq2 = seq2 if isinstance(seq2, bytes) else seq2.encode() - regs = cmappy.mm_map_aux(self._idx, _seq, _seq2, &n_regs, b._b, &map_opt) + if name is None: + regs = cmappy.mm_map_aux(self._idx, NULL, _seq, _seq2, &n_regs, b._b, &map_opt) + else: + regs = cmappy.mm_map_aux(self._idx, _name, _seq, _seq2, &n_regs, b._b, &map_opt) try: i = 0 From c6db201b387030678f425f7d982a781c28d77fe7 Mon Sep 17 00:00:00 2001 From: James Webber Date: Fri, 15 Nov 2024 08:54:25 -0500 Subject: [PATCH 2/3] Update README.md (#1245) fixed documentation for paftools --- misc/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/misc/README.md b/misc/README.md index 991f02c2..f9d52f5c 100644 --- a/misc/README.md +++ b/misc/README.md @@ -16,7 +16,8 @@ minimap2 -c test/MT-human.fa test/MT-orang.fa \ | paftools.js liftover -l10000 - <(echo -e "MT_orang\t2000\t5000") # liftOver # no test data for the following examples paftools.js junceval -e anno.gtf splice.sam > out.txt # compare splice junctions to annotations -paftools.js splice2bed anno.gtf > anno.bed # convert GTF/GFF3 to BED12 +paftools.js splice2bed splice.sam > splice.bed # convert PAF/SAM to BED12 +paftools.js gff2bed anno.gtf > anno.bed # convert GTF/GFF3 to BED12 ``` ## Table of Contents From 7d8bbb74a870ac98b7c09b590848c11699f6f0a4 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Fri, 15 Nov 2024 21:56:36 +0800 Subject: [PATCH 3/3] Add `sc_ambi` and `max_chain_skip` parameters to `mappy.pyx` (#1240) * add sc_ambi option to cython interface * add max_gaplen param * set max_chain_skip to arg instead of forcing 255 --- python/mappy.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/mappy.pyx b/python/mappy.pyx index 4a5adf89..474f833e 100644 --- a/python/mappy.pyx +++ b/python/mappy.pyx @@ -113,7 +113,7 @@ cdef class Aligner: cdef cmappy.mm_idxopt_t idx_opt cdef cmappy.mm_mapopt_t map_opt - def __cinit__(self, fn_idx_in=None, preset=None, k=None, w=None, min_cnt=None, min_chain_score=None, min_dp_score=None, bw=None, bw_long=None, best_n=None, n_threads=3, fn_idx_out=None, max_frag_len=None, extra_flags=None, seq=None, scoring=None): + def __cinit__(self, fn_idx_in=None, preset=None, k=None, w=None, min_cnt=None, min_chain_score=None, min_dp_score=None, bw=None, bw_long=None, best_n=None, n_threads=3, fn_idx_out=None, max_frag_len=None, extra_flags=None, seq=None, scoring=None, sc_ambi=None, max_chain_skip=None): self._idx = NULL cmappy.mm_set_opt(NULL, &self.idx_opt, &self.map_opt) # set the default options if preset is not None: @@ -138,6 +138,8 @@ cdef class Aligner: self.map_opt.q2, self.map_opt.e2 = scoring[4], scoring[5] if len(scoring) >= 7: self.map_opt.sc_ambi = scoring[6] + if sc_ambi is not None: self.map_opt.sc_ambi = sc_ambi + if max_chain_skip is not None: self.map_opt.max_chain_skip = max_chain_skip cdef cmappy.mm_idx_reader_t *r;