Skip to content

Commit

Permalink
Merge pull request #326 from bcgsc/feature/add-arriba-support
Browse files Browse the repository at this point in the history
#325 Adding arriba support
  • Loading branch information
calchoo authored May 16, 2022
2 parents 6689de6 + dc67258 commit cfc68d4
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 1 deletion.
4 changes: 3 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ jobs:
steps:
- uses: actions/checkout@v2
- name: install machine dependencies
run: sudo apt-get install -y libcurl4-openssl-dev
run: |
sudo apt-get update
sudo apt-get install -y libcurl4-openssl-dev
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
Expand Down
5 changes: 5 additions & 0 deletions docs/background/citations.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ Saunders,C.T. et al. (2012) Strelka: accurate somatic small-variant
calling from sequenced tumor--normal sample pairs. Bioinformatics,
28, 1811--1817.

## Uhrig-2021

Uhrig,S. et al. (2021) Accurate and efficient detection of gene
fusions from RNA sequencing data. Genome Res., 31, 448--460.

## Yates-2016

Yates,A. et al. (2016) Ensembl 2016. Nucleic Acids Res., 44,
Expand Down
5 changes: 5 additions & 0 deletions docs/glossary.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ install instructions.
Community based standard of reccommendations for variant notation.
See [http://varnomen.hgvs.org/](http://varnomen.hgvs.org/)

## Arriba

Arriba is an SV caller. Source for Arriba can be found
[here](https://github.com/suhrig/arriba) [Uhrig-2021](../background/citations#uhrig-2021)

## BreakDancer

BreakDancer is an SV caller. Source for BreakDancer can be found
Expand Down
1 change: 1 addition & 0 deletions docs/inputs/support.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ It should be noted however that the tool tracked will only be listed as

| Name | Version(s) | MAVIS input | Publication |
| ------------------------------------------ | ---------------- | --------------------------------------------- | ----------------------------------------------------------- |
| [Arriba](../../glossary/#arriba) | `2.2.1` | `fusions.tsv` | [Uhrig-2021](../../background/citations#uhrig-2021) |
| [BreakDancer](../../glossary/#breakdancer) | `1.4.5` | `Tools main output file(s)` | [Chen-2009](../../background/citations#chen-2009) |
| [BreakSeq](../../glossary/#breakseq) | `2.2` | `work/breakseq.vcf.gz` | [Abyzov-2015](../../background/citations#abyzov-2015) |
| [Chimerascan](../../glossary/#chimerascan) | `0.4.5` | `*.bedpe` | [Iyer-2011](../../background/citations#Iyer-2011) |
Expand Down
5 changes: 5 additions & 0 deletions mavis/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .cnvnator import convert_row as _parse_cnvnator
from .vcf import convert_file as read_vcf
from .breakdancer import convert_file as _convert_breakdancer_file
from .arriba import convert_row as _parse_arriba
from .starfusion import convert_row as _parse_starfusion
from .chimerascan import convert_row as _parse_chimerascan

Expand Down Expand Up @@ -142,6 +143,10 @@ def _convert_tool_row(row, file_type, stranded, assume_no_untemplated=True):
{k: v for k, v in row.items() if k not in {'Type', 'Chr1', 'Chr2', 'Pos1', 'Pos2'}}
)

elif file_type == SUPPORTED_TOOL.ARRIBA:

std_row.update(_parse_arriba(row))

else:
raise NotImplementedError('unsupported file type', file_type)

Expand Down
39 changes: 39 additions & 0 deletions mavis/tools/arriba.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from ..constants import COLUMNS, ORIENT, STRAND

from .constants import TRACKING_COLUMN, SUPPORTED_TOOL


def get_orient(string):
if string == "downstream":
return ORIENT.LEFT
elif string == "upstream":
return ORIENT.RIGHT
return ORIENT.NS


def convert_row(row):
"""
transforms the aribba output into the common format for expansion. Maps the input column
names to column names which MAVIS can read
"""
std_row = {}

try:
std_row[COLUMNS.break1_chromosome], b1_start = row["breakpoint1"].split(":")
std_row[COLUMNS.break2_chromosome], b2_start = row["breakpoint2"].split(":")

std_row[COLUMNS.break1_strand] = row["strand1(gene/fusion)"].split("/")[1]
std_row[COLUMNS.break2_strand] = row["strand2(gene/fusion)"].split("/")[1]
std_row[COLUMNS.event_type] = row["type"].split("/")[0]
std_row[COLUMNS.break1_orientation] = get_orient(row["direction1"])
std_row[COLUMNS.break2_orientation] = get_orient(row["direction2"])

std_row[COLUMNS.break1_position_start] = std_row[COLUMNS.break1_position_end] = b1_start
std_row[COLUMNS.break2_position_start] = std_row[COLUMNS.break2_position_end] = b2_start
except (ValueError, TypeError):
raise AssertionError(
"Could not parse the breakpoint from the Arriba row: {}, {}".format(
row["breakpoint1"], row["breakpoint2"]
)
)
return std_row
1 change: 1 addition & 0 deletions mavis/tools/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
MAVIS='mavis',
DEFUSE='defuse',
BREAKDANCER='breakdancer',
ARRIBA='arriba',
VCF='vcf',
BREAKSEQ='breakseq',
CNVNATOR='cnvnator',
Expand Down
107 changes: 107 additions & 0 deletions tests/unit/test_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,113 @@ def test_convert_duplication(self):
self.assertEqual('1', bpp.break2.chr)



class TestArriba(unittest.TestCase):
def test_convert_standard_event(self):
row = {
'breakpoint1': '13:114529969',
'breakpoint2': '13:114751269',
'type': 'inversion',
'strand1(gene/fusion)': '+/+',
'strand2(gene/fusion)': '-/-',
'direction1': 'downstream',
'direction2': 'downstream',
}
bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.ARRIBA, True)

self.assertEqual(1, len(bpp_list))
bpp = bpp_list[0]
self.assertEqual('13', bpp.break1.chr)
self.assertEqual('13', bpp.break2.chr)
self.assertEqual(114529969, bpp.break1.start)
self.assertEqual(114751269, bpp.break2.start)
self.assertEqual(SVTYPE.INV, bpp.event_type)
self.assertEqual('L', bpp.break1.orient)
self.assertEqual('L', bpp.break2.orient)
self.assertEqual(True, bpp.opposing_strands)

def test_convert_translocation(self):
row = {
'breakpoint1': '17:69313092',
'breakpoint2': '20:58272875',
'type': 'translocation',
'strand1(gene/fusion)': '-/-',
'strand2(gene/fusion)': '-/-',
'direction1': 'upstream',
'direction2': 'downstream',
}
bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.ARRIBA, True)

self.assertEqual(1, len(bpp_list))
bpp = bpp_list[0]
self.assertEqual('chr17', bpp.break1.chr)
self.assertEqual('chr20', bpp.break2.chr)
self.assertEqual(69313092, bpp.break1.start)
self.assertEqual(58272875, bpp.break2.start)
self.assertEqual(SVTYPE.TRANS, bpp.event_type)
self.assertEqual('R', bpp.break1.orient)
self.assertEqual('L', bpp.break2.orient)
self.assertEqual(False, bpp.opposing_strands)

def test_convert_translocation(self):
row = {
'breakpoint1': '20:57265705',
'breakpoint2': '20:47786405',
'type': 'inversion/5\'-5\'',
'strand1(gene/fusion)': '-/-',
'strand2(gene/fusion)': '-/+',
'direction1': 'upstream',
'direction2': 'upstream',
}
bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.ARRIBA, True)

self.assertEqual(1, len(bpp_list))
bpp = bpp_list[0]
self.assertEqual('chr20', bpp.break1.chr)
self.assertEqual('chr20', bpp.break2.chr)
self.assertEqual(57265705, bpp.break1.start)
self.assertEqual(47786405, bpp.break2.start)
self.assertEqual(SVTYPE.INV, bpp.event_type)
self.assertEqual('R', bpp.break1.orient)
self.assertEqual('R', bpp.break2.orient)
self.assertEqual(True, bpp.opposing_strands)

def test_convert_translocation(self):
row = {
'breakpoint1': '14:102877322',
'breakpoint2': '14:102994672',
'type': 'deletion/read-through/5\'-5\'',
'strand1(gene/fusion)': '+/+',
'strand2(gene/fusion)': '-/+',
'direction1': 'downstream',
'direction2': 'upstream',
}
bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.ARRIBA, True)

self.assertEqual(1, len(bpp_list))
bpp = bpp_list[0]
self.assertEqual('chr14', bpp.break1.chr)
self.assertEqual('chr14', bpp.break2.chr)
self.assertEqual(102877322, bpp.break1.start)
self.assertEqual(102994672, bpp.break2.start)
self.assertEqual(SVTYPE.DEL, bpp.event_type)
self.assertEqual('L', bpp.break1.orient)
self.assertEqual('R', bpp.break2.orient)
self.assertEqual(False, bpp.opposing_strands)

def test_malformed(self):
row = {
'breakpoint1': '',
'breakpoint2': None,
'type': 'translocation',
'strand1(gene/fusion)': '-/-',
'strand2(gene/fusion)': '-/-',
'direction1': 'upstream',
'direction2': 'downstream',
}
with self.assertRaises(AssertionError):
_convert_tool_row(row, SUPPORTED_TOOL.ARRIBA, False)

class TestStarFusion(unittest.TestCase):
def test_convert_standard_event(self):
row = {
Expand Down

0 comments on commit cfc68d4

Please sign in to comment.