Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#325 Adding arriba support #326

Merged
merged 5 commits into from
May 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ jobs:
steps:
- uses: actions/checkout@v2
- name: install machine dependencies
run: sudo apt-get install -y libcurl4-openssl-dev
run: |
sudo apt-get update
sudo apt-get install -y libcurl4-openssl-dev
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
Expand Down
5 changes: 5 additions & 0 deletions docs/background/citations.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ Saunders,C.T. et al. (2012) Strelka: accurate somatic small-variant
calling from sequenced tumor--normal sample pairs. Bioinformatics,
28, 1811--1817.

## Uhrig-2021

Uhrig,S. et al. (2021) Accurate and efficient detection of gene
fusions from RNA sequencing data. Genome Res., 31, 448--460.

## Yates-2016

Yates,A. et al. (2016) Ensembl 2016. Nucleic Acids Res., 44,
Expand Down
5 changes: 5 additions & 0 deletions docs/glossary.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ install instructions.
Community based standard of reccommendations for variant notation.
See [http://varnomen.hgvs.org/](http://varnomen.hgvs.org/)

## Arriba

Arriba is an SV caller. Source for Arriba can be found
[here](https://github.com/suhrig/arriba) [Uhrig-2021](../background/citations#uhrig-2021)

## BreakDancer

BreakDancer is an SV caller. Source for BreakDancer can be found
Expand Down
1 change: 1 addition & 0 deletions docs/inputs/support.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ It should be noted however that the tool tracked will only be listed as

| Name | Version(s) | MAVIS input | Publication |
| ------------------------------------------ | ---------------- | --------------------------------------------- | ----------------------------------------------------------- |
| [Arriba](../../glossary/#arriba) | `2.2.1` | `fusions.tsv` | [Uhrig-2021](../../background/citations#uhrig-2021) |
| [BreakDancer](../../glossary/#breakdancer) | `1.4.5` | `Tools main output file(s)` | [Chen-2009](../../background/citations#chen-2009) |
| [BreakSeq](../../glossary/#breakseq) | `2.2` | `work/breakseq.vcf.gz` | [Abyzov-2015](../../background/citations#abyzov-2015) |
| [Chimerascan](../../glossary/#chimerascan) | `0.4.5` | `*.bedpe` | [Iyer-2011](../../background/citations#Iyer-2011) |
Expand Down
5 changes: 5 additions & 0 deletions mavis/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .cnvnator import convert_row as _parse_cnvnator
from .vcf import convert_file as read_vcf
from .breakdancer import convert_file as _convert_breakdancer_file
from .arriba import convert_row as _parse_arriba
from .starfusion import convert_row as _parse_starfusion
from .chimerascan import convert_row as _parse_chimerascan

Expand Down Expand Up @@ -142,6 +143,10 @@ def _convert_tool_row(row, file_type, stranded, assume_no_untemplated=True):
{k: v for k, v in row.items() if k not in {'Type', 'Chr1', 'Chr2', 'Pos1', 'Pos2'}}
)

elif file_type == SUPPORTED_TOOL.ARRIBA:

std_row.update(_parse_arriba(row))

else:
raise NotImplementedError('unsupported file type', file_type)

Expand Down
39 changes: 39 additions & 0 deletions mavis/tools/arriba.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from ..constants import COLUMNS, ORIENT, STRAND

from .constants import TRACKING_COLUMN, SUPPORTED_TOOL


def get_orient(string):
if string == "downstream":
return ORIENT.LEFT
elif string == "upstream":
return ORIENT.RIGHT
return ORIENT.NS


def convert_row(row):
"""
transforms the aribba output into the common format for expansion. Maps the input column
names to column names which MAVIS can read
"""
std_row = {}

try:
std_row[COLUMNS.break1_chromosome], b1_start = row["breakpoint1"].split(":")
std_row[COLUMNS.break2_chromosome], b2_start = row["breakpoint2"].split(":")

std_row[COLUMNS.break1_strand] = row["strand1(gene/fusion)"].split("/")[1]
std_row[COLUMNS.break2_strand] = row["strand2(gene/fusion)"].split("/")[1]
std_row[COLUMNS.event_type] = row["type"].split("/")[0]
std_row[COLUMNS.break1_orientation] = get_orient(row["direction1"])
std_row[COLUMNS.break2_orientation] = get_orient(row["direction2"])

std_row[COLUMNS.break1_position_start] = std_row[COLUMNS.break1_position_end] = b1_start
std_row[COLUMNS.break2_position_start] = std_row[COLUMNS.break2_position_end] = b2_start
except (ValueError, TypeError):
raise AssertionError(
"Could not parse the breakpoint from the Arriba row: {}, {}".format(
row["breakpoint1"], row["breakpoint2"]
)
)
return std_row
1 change: 1 addition & 0 deletions mavis/tools/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
MAVIS='mavis',
DEFUSE='defuse',
BREAKDANCER='breakdancer',
ARRIBA='arriba',
VCF='vcf',
BREAKSEQ='breakseq',
CNVNATOR='cnvnator',
Expand Down
107 changes: 107 additions & 0 deletions tests/unit/test_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,113 @@ def test_convert_duplication(self):
self.assertEqual('1', bpp.break2.chr)



class TestArriba(unittest.TestCase):
def test_convert_standard_event(self):
row = {
'breakpoint1': '13:114529969',
'breakpoint2': '13:114751269',
'type': 'inversion',
'strand1(gene/fusion)': '+/+',
'strand2(gene/fusion)': '-/-',
'direction1': 'downstream',
'direction2': 'downstream',
}
bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.ARRIBA, True)

self.assertEqual(1, len(bpp_list))
bpp = bpp_list[0]
self.assertEqual('13', bpp.break1.chr)
self.assertEqual('13', bpp.break2.chr)
self.assertEqual(114529969, bpp.break1.start)
self.assertEqual(114751269, bpp.break2.start)
self.assertEqual(SVTYPE.INV, bpp.event_type)
self.assertEqual('L', bpp.break1.orient)
self.assertEqual('L', bpp.break2.orient)
self.assertEqual(True, bpp.opposing_strands)

def test_convert_translocation(self):
row = {
'breakpoint1': '17:69313092',
'breakpoint2': '20:58272875',
'type': 'translocation',
'strand1(gene/fusion)': '-/-',
'strand2(gene/fusion)': '-/-',
'direction1': 'upstream',
'direction2': 'downstream',
}
bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.ARRIBA, True)

self.assertEqual(1, len(bpp_list))
bpp = bpp_list[0]
self.assertEqual('chr17', bpp.break1.chr)
self.assertEqual('chr20', bpp.break2.chr)
self.assertEqual(69313092, bpp.break1.start)
self.assertEqual(58272875, bpp.break2.start)
self.assertEqual(SVTYPE.TRANS, bpp.event_type)
self.assertEqual('R', bpp.break1.orient)
self.assertEqual('L', bpp.break2.orient)
self.assertEqual(False, bpp.opposing_strands)

def test_convert_translocation(self):
row = {
'breakpoint1': '20:57265705',
'breakpoint2': '20:47786405',
'type': 'inversion/5\'-5\'',
'strand1(gene/fusion)': '-/-',
'strand2(gene/fusion)': '-/+',
'direction1': 'upstream',
'direction2': 'upstream',
}
bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.ARRIBA, True)

self.assertEqual(1, len(bpp_list))
bpp = bpp_list[0]
self.assertEqual('chr20', bpp.break1.chr)
self.assertEqual('chr20', bpp.break2.chr)
self.assertEqual(57265705, bpp.break1.start)
self.assertEqual(47786405, bpp.break2.start)
self.assertEqual(SVTYPE.INV, bpp.event_type)
self.assertEqual('R', bpp.break1.orient)
self.assertEqual('R', bpp.break2.orient)
self.assertEqual(True, bpp.opposing_strands)

def test_convert_translocation(self):
row = {
'breakpoint1': '14:102877322',
'breakpoint2': '14:102994672',
'type': 'deletion/read-through/5\'-5\'',
'strand1(gene/fusion)': '+/+',
'strand2(gene/fusion)': '-/+',
'direction1': 'downstream',
'direction2': 'upstream',
}
bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.ARRIBA, True)

self.assertEqual(1, len(bpp_list))
bpp = bpp_list[0]
self.assertEqual('chr14', bpp.break1.chr)
self.assertEqual('chr14', bpp.break2.chr)
self.assertEqual(102877322, bpp.break1.start)
self.assertEqual(102994672, bpp.break2.start)
self.assertEqual(SVTYPE.DEL, bpp.event_type)
self.assertEqual('L', bpp.break1.orient)
self.assertEqual('R', bpp.break2.orient)
self.assertEqual(False, bpp.opposing_strands)

def test_malformed(self):
row = {
'breakpoint1': '',
'breakpoint2': None,
'type': 'translocation',
'strand1(gene/fusion)': '-/-',
'strand2(gene/fusion)': '-/-',
'direction1': 'upstream',
'direction2': 'downstream',
}
with self.assertRaises(AssertionError):
_convert_tool_row(row, SUPPORTED_TOOL.ARRIBA, False)

class TestStarFusion(unittest.TestCase):
def test_convert_standard_event(self):
row = {
Expand Down