From 7ff07a2fb294ed1bf664030659cc4e1f96fbc08c Mon Sep 17 00:00:00 2001 From: Jeremy Arbesfeld <50678786+jarbesfeld@users.noreply.github.com> Date: Thu, 17 Oct 2024 09:09:59 -0400 Subject: [PATCH] Update alignment code (#68) --- src/dcd_mapping/align.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/dcd_mapping/align.py b/src/dcd_mapping/align.py index fc2d7fb..9925089 100644 --- a/src/dcd_mapping/align.py +++ b/src/dcd_mapping/align.py @@ -235,7 +235,8 @@ def _get_best_hsp( """Retrieve preferred HSP from BLAT Hit object. We select the hsp object with the lowest distance from the start of the - corresponding gene and the highest BLAT score + corresponding gene and the highest BLAT score. We omit the first sorting step + when a gene symbol is not associated with a score set :param hit: hit object from BLAT result :param urn: scoreset identifier for use in error messages @@ -245,7 +246,11 @@ def _get_best_hsp( :raise AlignmentError: if hit object appears to be empty (should be impossible) """ best_hsp = None - hsp_list = sorted(hit, key=lambda hsp: abs(hsp.hit_start - gene_location.start)) + hsp_list = ( + sorted(hit, key=lambda hsp: abs(hsp.hit_start - gene_location.start)) + if gene_location + else hit + ) hsp_list = sorted( hsp_list, key=lambda hsp: (hsp.query_end - hsp.query_start) / output.seq_len,