diff --git a/notebooks/getting_started/1_Quick_Start.ipynb b/notebooks/getting_started/1_Quick_Start.ipynb index 98fe1e23..e4c22d55 100644 --- a/notebooks/getting_started/1_Quick_Start.ipynb +++ b/notebooks/getting_started/1_Quick_Start.ipynb @@ -2,8 +2,15 @@ "cells": [ { "cell_type": "markdown", + "id": "7f11c55b5ce145ee", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ - "# 1 Quick Start\n", + "# 1. Quick Start\n", "This notebook provides instructions on how to get your vrs-python environment up and running with as few\n", "steps as possible, and to provide some rudimentary examples to prove it is working properly.\n", "\n", @@ -18,17 +25,16 @@ " UTA_DB_URL=\"postgresql://anonymous:anonymous@uta.biocommons.org:5432/uta/uta_20210129b\"\n", "\n", "**NOTE** The external sources for the SeqRepo and UTA repositories are **ONLY** to be used as part of this notebook series and are not meant for use in production code. Please refer to the links above and follow the directions provided on how to setup local instances." - ], - "metadata": { - "collapsed": false - }, - "id": "7f11c55b5ce145ee" + ] }, { "cell_type": "markdown", "id": "91f92a3e35bd48a1", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "#### Step 1 - Setup Data Proxy Access\n", @@ -40,10 +46,13 @@ "execution_count": 1, "id": "37130d69b9dbd9d1", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-04-18T20:21:17.340604Z", "start_time": "2024-04-18T20:21:17.207979Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, "outputs": [], @@ -57,7 +66,10 @@ "cell_type": "markdown", "id": "a1d5bbc3a77ff03f", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "Assert that the UTA URL is defined in the environment" @@ -68,10 +80,13 @@ "execution_count": 2, "id": "b11653c9aae4ecba", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-04-18T20:21:17.343195Z", "start_time": "2024-04-18T20:21:17.341690Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, "outputs": [], @@ -84,7 +99,10 @@ "cell_type": "markdown", "id": "98ab29e5ac01cc3a", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "#### Step 2 - Setup an Allele Translator\n", @@ -96,10 +114,13 @@ "execution_count": 3, "id": "42bd6d6f09916724", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-04-18T20:21:17.393606Z", "start_time": "2024-04-18T20:21:17.343980Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, "outputs": [], @@ -112,7 +133,10 @@ "cell_type": "markdown", "id": "88d9aba51e44ae0", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "#### Step 3 - Translate variation representations to VRS\n", @@ -125,16 +149,31 @@ "execution_count": 4, "id": "af7a8f1509acf4ed", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-04-18T20:21:17.525029Z", "start_time": "2024-04-18T20:21:17.394248Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, "outputs": [ { "data": { - "text/plain": "{'id': 'ga4gh:VA.LK_4rOVxyEwrEpaOVd-BDFV0ocbO5vgV',\n 'type': 'Allele',\n 'digest': 'LK_4rOVxyEwrEpaOVd-BDFV0ocbO5vgV',\n 'location': {'id': 'ga4gh:SL.nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n 'type': 'SequenceLocation',\n 'digest': 'nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n 'sequenceReference': {'type': 'SequenceReference',\n 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n 'start': 80656509,\n 'end': 80656510},\n 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'TT'}}" + "text/plain": [ + "{'id': 'ga4gh:VA.LK_4rOVxyEwrEpaOVd-BDFV0ocbO5vgV',\n", + " 'type': 'Allele',\n", + " 'digest': 'LK_4rOVxyEwrEpaOVd-BDFV0ocbO5vgV',\n", + " 'location': {'id': 'ga4gh:SL.nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n", + " 'type': 'SequenceLocation',\n", + " 'digest': 'nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n", + " 'start': 80656509,\n", + " 'end': 80656510},\n", + " 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'TT'}}" + ] }, "execution_count": 4, "metadata": {}, @@ -150,7 +189,10 @@ "cell_type": "markdown", "id": "6fa7cb2ac050547f", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "The output above is the JSON structure of an *Allele* in VRS form. You should be able to recognize the *Allele*, *SequenceLocation*, *SequenceReference* and *LiteralSequenceLocation* classes. \n", @@ -163,16 +205,31 @@ "execution_count": 5, "id": "42430c680fe262c1", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-04-18T20:21:18.708118Z", "start_time": "2024-04-18T20:21:17.526585Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, "outputs": [ { "data": { - "text/plain": "{'id': 'ga4gh:VA.LK_4rOVxyEwrEpaOVd-BDFV0ocbO5vgV',\n 'type': 'Allele',\n 'digest': 'LK_4rOVxyEwrEpaOVd-BDFV0ocbO5vgV',\n 'location': {'id': 'ga4gh:SL.nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n 'type': 'SequenceLocation',\n 'digest': 'nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n 'sequenceReference': {'type': 'SequenceReference',\n 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n 'start': 80656509,\n 'end': 80656510},\n 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'TT'}}" + "text/plain": [ + "{'id': 'ga4gh:VA.LK_4rOVxyEwrEpaOVd-BDFV0ocbO5vgV',\n", + " 'type': 'Allele',\n", + " 'digest': 'LK_4rOVxyEwrEpaOVd-BDFV0ocbO5vgV',\n", + " 'location': {'id': 'ga4gh:SL.nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n", + " 'type': 'SequenceLocation',\n", + " 'digest': 'nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n", + " 'start': 80656509,\n", + " 'end': 80656510},\n", + " 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'TT'}}" + ] }, "execution_count": 5, "metadata": {}, @@ -186,29 +243,35 @@ }, { "cell_type": "markdown", - "source": [ - "The VRS variant representations should be the same." - ], + "id": "2c6d2303b4bda87c", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, - "id": "2c6d2303b4bda87c" + "source": [ + "The VRS variant representations should be the same." + ] }, { "cell_type": "code", - "outputs": [], - "source": [ - "assert(vrs_from_hgvs == vrs_from_spdi)" - ], + "execution_count": 6, + "id": "3c91be2bdec6b4be", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-04-18T20:21:18.713821Z", "start_time": "2024-04-18T20:21:18.710176Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, - "id": "3c91be2bdec6b4be", - "execution_count": 6 + "outputs": [], + "source": [ + "assert(vrs_from_hgvs == vrs_from_spdi)" + ] } ], "metadata": { @@ -227,7 +290,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.12.1" } }, "nbformat": 4, diff --git a/notebooks/getting_started/2_Exploring_the_SeqRepo_DataProxy.ipynb b/notebooks/getting_started/2_Exploring_the_SeqRepo_DataProxy.ipynb index 93dbf1d3..f5e81a09 100644 --- a/notebooks/getting_started/2_Exploring_the_SeqRepo_DataProxy.ipynb +++ b/notebooks/getting_started/2_Exploring_the_SeqRepo_DataProxy.ipynb @@ -2,70 +2,99 @@ "cells": [ { "cell_type": "markdown", - "source": [ - "# 2 Exploring the SeqRepo DataProxy\n", - "The SeqRepo DataProxy has sequence related functionality that may be of use." - ], + "id": "4107043be5d9af0d", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, - "id": "4107043be5d9af0d" + "source": [ + "# 2. Exploring the SeqRepo DataProxy\n", + "The SeqRepo DataProxy has sequence related functionality that may be of use." + ] }, { "cell_type": "markdown", + "id": "2cd1dae76c042895", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "#### Step 1 - Setup Data Proxy Access\n", "The *DataProxy* provides access to sequence references." - ], - "metadata": { - "collapsed": false - }, - "id": "2cd1dae76c042895" + ] }, { "cell_type": "code", - "outputs": [], - "source": [ - "from ga4gh.vrs.dataproxy import create_dataproxy\n", - "seqrepo_rest_service_url = \"seqrepo+https://services.genomicmedlab.org/seqrepo\"\n", - "seqrepo_dataproxy = create_dataproxy(uri=seqrepo_rest_service_url)" - ], + "execution_count": 1, + "id": "f5057501e0ff48aa", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-04-18T20:22:19.380351Z", "start_time": "2024-04-18T20:22:19.248562Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, - "id": "f5057501e0ff48aa", - "execution_count": 1 + "outputs": [], + "source": [ + "from ga4gh.vrs.dataproxy import create_dataproxy\n", + "seqrepo_rest_service_url = \"seqrepo+https://services.genomicmedlab.org/seqrepo\"\n", + "seqrepo_dataproxy = create_dataproxy(uri=seqrepo_rest_service_url)" + ] }, { "cell_type": "markdown", - "source": [ - "#### Step 2 - Information on refseq accessions" - ], + "id": "80b8074e25870aee", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, - "id": "80b8074e25870aee" + "source": [ + "#### Step 2 - Information on refseq accessions" + ] }, { "cell_type": "markdown", - "source": [ - "It is often necessary when building *SequenceLocation* objects, to obtain the refget accession from a public accession identifier. The *DataProxy* method *derive_refget_accession* can do this for you." - ], + "id": "efdf07f650059a11", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, - "id": "efdf07f650059a11" + "source": [ + "It is often necessary when building *SequenceLocation* objects, to obtain the refget accession from a public accession identifier. The *DataProxy* method *derive_refget_accession* can do this for you." + ] }, { "cell_type": "code", + "execution_count": 2, + "id": "77fa2f312e39d4a3", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-18T20:22:19.515713Z", + "start_time": "2024-04-18T20:22:19.381606Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "'SQ.Pw3Ch0x3XWD6ljsnIfmk_NERcZCI9sNM'" + "text/plain": [ + "'SQ.Pw3Ch0x3XWD6ljsnIfmk_NERcZCI9sNM'" + ] }, "execution_count": 2, "metadata": {}, @@ -74,33 +103,51 @@ ], "source": [ "seqrepo_dataproxy.derive_refget_accession('refseq:NM_002439.5')" - ], + ] + }, + { + "cell_type": "markdown", + "id": "203aada74390820e", "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-18T20:22:19.515713Z", - "start_time": "2024-04-18T20:22:19.381606Z" + "jupyter": { + "outputs_hidden": false } }, - "id": "77fa2f312e39d4a3", - "execution_count": 2 - }, - { - "cell_type": "markdown", "source": [ "The *DataProxy* *get_metadata* method provides metadata information on the accession including: the date the accession was added, aliases for the accession and reference length." - ], - "metadata": { - "collapsed": false - }, - "id": "203aada74390820e" + ] }, { "cell_type": "code", + "execution_count": 3, + "id": "bdb9122059add31c", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-18T20:22:19.694430Z", + "start_time": "2024-04-18T20:22:19.516491Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "{'added': '2016-08-24T05:03:11Z',\n 'aliases': ['MD5:215137b1973c1a5afcf86be7d999574a',\n 'NCBI:NM_000551.3',\n 'refseq:NM_000551.3',\n 'SEGUID:T12L0p2X5E8DbnL0+SwI4Wc1S6g',\n 'SHA1:4f5d8bd29d97e44f036e72f4f92c08e167354ba8',\n 'VMC:GS_v_QTc1p-MUYdgrRv4LMT6ByXIOsdw3C_',\n 'sha512t24u:v_QTc1p-MUYdgrRv4LMT6ByXIOsdw3C_',\n 'ga4gh:SQ.v_QTc1p-MUYdgrRv4LMT6ByXIOsdw3C_'],\n 'alphabet': 'ACGT',\n 'length': 4560}" + "text/plain": [ + "{'added': '2016-08-24T05:03:11Z',\n", + " 'aliases': ['MD5:215137b1973c1a5afcf86be7d999574a',\n", + " 'NCBI:NM_000551.3',\n", + " 'refseq:NM_000551.3',\n", + " 'SEGUID:T12L0p2X5E8DbnL0+SwI4Wc1S6g',\n", + " 'SHA1:4f5d8bd29d97e44f036e72f4f92c08e167354ba8',\n", + " 'VMC:GS_v_QTc1p-MUYdgrRv4LMT6ByXIOsdw3C_',\n", + " 'sha512t24u:v_QTc1p-MUYdgrRv4LMT6ByXIOsdw3C_',\n", + " 'ga4gh:SQ.v_QTc1p-MUYdgrRv4LMT6ByXIOsdw3C_'],\n", + " 'alphabet': 'ACGT',\n", + " 'length': 4560}" + ] }, "execution_count": 3, "metadata": {}, @@ -109,33 +156,41 @@ ], "source": [ "seqrepo_dataproxy.get_metadata(\"refseq:NM_000551.3\")" - ], + ] + }, + { + "cell_type": "markdown", + "id": "e73504bc095b1ff1", "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-18T20:22:19.694430Z", - "start_time": "2024-04-18T20:22:19.516491Z" + "jupyter": { + "outputs_hidden": false } }, - "id": "bdb9122059add31c", - "execution_count": 3 - }, - { - "cell_type": "markdown", "source": [ "*DataProxy* *get_sequence* returns actual sequence for given identifier, optionally limited to interbase intervals." - ], - "metadata": { - "collapsed": false - }, - "id": "e73504bc095b1ff1" + ] }, { "cell_type": "code", + "execution_count": 4, + "id": "7d5563eae9dd9e58", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-18T20:22:19.833640Z", + "start_time": "2024-04-18T20:22:19.695452Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "'CCTCGCCTCCGTTACAACGGCCTACGGTGCTGGAGGATCCTTCTGCGCACG'" + "text/plain": [ + "'CCTCGCCTCCGTTACAACGGCCTACGGTGCTGGAGGATCCTTCTGCGCACG'" + ] }, "execution_count": 4, "metadata": {}, @@ -145,33 +200,41 @@ "source": [ "identifier = \"ga4gh:SQ.v_QTc1p-MUYdgrRv4LMT6ByXIOsdw3C_\"\n", "seqrepo_dataproxy.get_sequence(identifier, start=0, end=51)" - ], + ] + }, + { + "cell_type": "markdown", + "id": "7c1e8515d46f0fac", "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-18T20:22:19.833640Z", - "start_time": "2024-04-18T20:22:19.695452Z" + "jupyter": { + "outputs_hidden": false } }, - "id": "7d5563eae9dd9e58", - "execution_count": 4 - }, - { - "cell_type": "markdown", "source": [ "*DataProxy* *translate_sequence_identifier* returns a list of equivalent identifiers in the given namespace." - ], - "metadata": { - "collapsed": false - }, - "id": "7c1e8515d46f0fac" + ] }, { "cell_type": "code", + "execution_count": 5, + "id": "9ad294ec1b92bd86", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-18T20:22:19.940602Z", + "start_time": "2024-04-18T20:22:19.836067Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "['ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl']" + "text/plain": [ + "['ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl']" + ] }, "execution_count": 5, "metadata": {}, @@ -180,23 +243,28 @@ ], "source": [ "seqrepo_dataproxy.translate_sequence_identifier(\"GRCh38:19\", \"ga4gh\")" - ], + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a16ba639ccca0323", "metadata": { - "collapsed": false, "ExecuteTime": { - "end_time": "2024-04-18T20:22:19.940602Z", - "start_time": "2024-04-18T20:22:19.836067Z" + "end_time": "2024-04-18T20:22:20.069679Z", + "start_time": "2024-04-18T20:22:19.941670Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, - "id": "9ad294ec1b92bd86", - "execution_count": 5 - }, - { - "cell_type": "code", "outputs": [ { "data": { - "text/plain": "['GRCh38:19', 'GRCh38:chr19']" + "text/plain": [ + "['GRCh38:19', 'GRCh38:chr19']" + ] }, "execution_count": 6, "metadata": {}, @@ -205,35 +273,26 @@ ], "source": [ "seqrepo_dataproxy.translate_sequence_identifier(\"ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\", \"GRCh38\")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-18T20:22:20.069679Z", - "start_time": "2024-04-18T20:22:19.941670Z" - } - }, - "id": "a16ba639ccca0323", - "execution_count": 6 + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.12.1" } }, "nbformat": 4, diff --git a/notebooks/getting_started/3_Basic_Models.ipynb b/notebooks/getting_started/3_Basic_Models.ipynb index 70871290..87931317 100644 --- a/notebooks/getting_started/3_Basic_Models.ipynb +++ b/notebooks/getting_started/3_Basic_Models.ipynb @@ -2,94 +2,127 @@ "cells": [ { "cell_type": "markdown", - "source": [ - "# 3 Basic Models\n", - "This notebook details how to compose VRS objects using component classes, not by use of a nomenclature string (HGVS/SPDI/Gnomad-VCF).\n" - ], + "id": "3b784af70aaada45", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, - "id": "3b784af70aaada45" + "source": [ + "# 3. Basic Models\n", + "This notebook details how to compose VRS objects using component classes, not by use of a nomenclature string (HGVS/SPDI/Gnomad-VCF).\n" + ] }, { "cell_type": "markdown", + "id": "6567dd63bcbe6969", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "#### Step 1 - Setup Data Proxy Access\n", "The *DataProxy* provides access to sequence references." - ], - "metadata": { - "collapsed": false - }, - "id": "6567dd63bcbe6969" + ] }, { "cell_type": "code", - "outputs": [], - "source": [ - "from ga4gh.vrs.dataproxy import create_dataproxy\n", - "seqrepo_rest_service_url = \"seqrepo+https://services.genomicmedlab.org/seqrepo\"\n", - "seqrepo_dataproxy = create_dataproxy(uri=seqrepo_rest_service_url)" - ], + "execution_count": 1, + "id": "4347f39231fd663c", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-04-18T20:23:57.240826Z", "start_time": "2024-04-18T20:23:57.106544Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, - "id": "4347f39231fd663c", - "execution_count": 1 + "outputs": [], + "source": [ + "from ga4gh.vrs.dataproxy import create_dataproxy\n", + "seqrepo_rest_service_url = \"seqrepo+https://services.genomicmedlab.org/seqrepo\"\n", + "seqrepo_dataproxy = create_dataproxy(uri=seqrepo_rest_service_url)" + ] }, { "cell_type": "markdown", + "id": "ae2e3ada0adaccdf", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "#### Step 2 - Access the VRS models package\n", "The models package contains the various classes necessary for building VRS objects." - ], - "metadata": { - "collapsed": false - }, - "id": "ae2e3ada0adaccdf" + ] }, { "cell_type": "code", - "outputs": [], - "source": [ - "from ga4gh.vrs import models" - ], + "execution_count": 2, + "id": "da9f8a5f19d1ea35", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-04-18T20:23:57.243477Z", "start_time": "2024-04-18T20:23:57.241894Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, - "id": "da9f8a5f19d1ea35", - "execution_count": 2 + "outputs": [], + "source": [ + "from ga4gh.vrs import models" + ] }, { "cell_type": "markdown", + "id": "63f99fabb02e236f", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "#### Step 3 - Build the Allele\n", "In this example we are going to build a VRS object from the variant \"NC_000005.10:g.80656510delinsTT\". This variant can be viewed in [Clinvar](https://www.ncbi.nlm.nih.gov/clinvar/variation/2673535/)." - ], - "metadata": { - "collapsed": false - }, - "id": "63f99fabb02e236f" + ] }, { "cell_type": "markdown", - "source": [ - "Start by getting the VRS string representation of the sequence reference using the *DataProxy* object." - ], + "id": "c1a3db39d8d2d3ea", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, - "id": "c1a3db39d8d2d3ea" + "source": [ + "Start by getting the VRS string representation of the sequence reference using the *DataProxy* object." + ] }, { "cell_type": "code", + "execution_count": 3, + "id": "320242aa48ef314d", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-18T20:23:57.304096Z", + "start_time": "2024-04-18T20:23:57.244087Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "name": "stdout", @@ -102,33 +135,47 @@ "source": [ "refget_accession = seqrepo_dataproxy.derive_refget_accession('refseq:NM_002439.5')\n", "print(refget_accession)" - ], + ] + }, + { + "cell_type": "markdown", + "id": "6ac6feb2b9ffb0c", "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-18T20:23:57.304096Z", - "start_time": "2024-04-18T20:23:57.244087Z" + "jupyter": { + "outputs_hidden": false } }, - "id": "320242aa48ef314d", - "execution_count": 3 - }, - { - "cell_type": "markdown", "source": [ "Build a dictionary of type *SequenceReference* containing the refget_accession. Then continue in succession building dictionaries of type *SequenceLocation*, *LiteralSequenceExpression* and *Allele* referencing previously built structures where applicable." - ], - "metadata": { - "collapsed": false - }, - "id": "6ac6feb2b9ffb0c" + ] }, { "cell_type": "code", + "execution_count": 4, + "id": "445983b1043c504f", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-18T20:23:57.308698Z", + "start_time": "2024-04-18T20:23:57.304734Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "{'type': 'Allele',\n 'location': {'type': 'SequenceLocation',\n 'sequenceReference': {'type': 'SequenceReference',\n 'refgetAccession': 'SQ.Pw3Ch0x3XWD6ljsnIfmk_NERcZCI9sNM'},\n 'start': 80656509,\n 'end': 80656510},\n 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'TT'}}" + "text/plain": [ + "{'type': 'Allele',\n", + " 'location': {'type': 'SequenceLocation',\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ.Pw3Ch0x3XWD6ljsnIfmk_NERcZCI9sNM'},\n", + " 'start': 80656509,\n", + " 'end': 80656510},\n", + " 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'TT'}}" + ] }, "execution_count": 4, "metadata": {}, @@ -160,29 +207,35 @@ "}\n", "allele = models.Allele(**allele_dict)\n", "allele.model_dump(exclude_none=True)" - ], + ] + }, + { + "cell_type": "markdown", + "id": "b69c827d943f1ef1", "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-18T20:23:57.308698Z", - "start_time": "2024-04-18T20:23:57.304734Z" + "jupyter": { + "outputs_hidden": false } }, - "id": "445983b1043c504f", - "execution_count": 4 - }, - { - "cell_type": "markdown", "source": [ "The *Allele* object is displayed above. Since it was built from component dictionaries, it is not yet complete as not all the identifiable objects have VRS identifiers. Note that not all objects in the Allele object are VRS identifiable." - ], - "metadata": { - "collapsed": false - }, - "id": "b69c827d943f1ef1" + ] }, { "cell_type": "code", + "execution_count": 5, + "id": "60b41e56a20d4e10", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-18T20:23:57.312561Z", + "start_time": "2024-04-18T20:23:57.310213Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "name": "stdout", @@ -205,34 +258,52 @@ "literal_sequence_expression = models.LiteralSequenceExpression(**literal_sequence_expression_dict)\n", "is_identifiable(literal_sequence_expression)\n", "is_identifiable(allele)" - ], + ] + }, + { + "cell_type": "markdown", + "id": "91619f7666c0bcf2", "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-18T20:23:57.312561Z", - "start_time": "2024-04-18T20:23:57.310213Z" + "jupyter": { + "outputs_hidden": false } }, - "id": "60b41e56a20d4e10", - "execution_count": 5 - }, - { - "cell_type": "markdown", "source": [ "#### Step 4 - Compute the identifiers\n", "To make the *Allele* object a valid VRS object - that is that all identifiable objects have valid VRS identifiers - is to use the *ga4gh_identify* method on the identifiable objects (*SequenceLocation*, and *Allele*)." - ], - "metadata": { - "collapsed": false - }, - "id": "91619f7666c0bcf2" + ] }, { "cell_type": "code", + "execution_count": 6, + "id": "5ad675932601aa94", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-18T20:23:57.315549Z", + "start_time": "2024-04-18T20:23:57.313165Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "{'id': 'ga4gh:VA.5C67OBmCLuHPgDkCQj7EOMih58BS2Eor',\n 'type': 'Allele',\n 'digest': '5C67OBmCLuHPgDkCQj7EOMih58BS2Eor',\n 'location': {'id': 'ga4gh:SL.lGxOP1JRd4dysmrOVaskO5P_35DyCLnx',\n 'type': 'SequenceLocation',\n 'digest': 'lGxOP1JRd4dysmrOVaskO5P_35DyCLnx',\n 'sequenceReference': {'type': 'SequenceReference',\n 'refgetAccession': 'SQ.Pw3Ch0x3XWD6ljsnIfmk_NERcZCI9sNM'},\n 'start': 80656509,\n 'end': 80656510},\n 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'TT'}}" + "text/plain": [ + "{'id': 'ga4gh:VA.5C67OBmCLuHPgDkCQj7EOMih58BS2Eor',\n", + " 'type': 'Allele',\n", + " 'digest': '5C67OBmCLuHPgDkCQj7EOMih58BS2Eor',\n", + " 'location': {'id': 'ga4gh:SL.lGxOP1JRd4dysmrOVaskO5P_35DyCLnx',\n", + " 'type': 'SequenceLocation',\n", + " 'digest': 'lGxOP1JRd4dysmrOVaskO5P_35DyCLnx',\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ.Pw3Ch0x3XWD6ljsnIfmk_NERcZCI9sNM'},\n", + " 'start': 80656509,\n", + " 'end': 80656510},\n", + " 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'TT'}}" + ] }, "execution_count": 6, "metadata": {}, @@ -244,45 +315,39 @@ "allele.location.id = ga4gh_identify(allele.location)\n", "allele.id = ga4gh_identify(allele)\n", "allele.model_dump(exclude_none=True)" - ], + ] + }, + { + "cell_type": "markdown", + "id": "37b5e28820b700fd", "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-18T20:23:57.315549Z", - "start_time": "2024-04-18T20:23:57.313165Z" + "jupyter": { + "outputs_hidden": false } }, - "id": "5ad675932601aa94", - "execution_count": 6 - }, - { - "cell_type": "markdown", "source": [ "The output of the *Allele* object represents a complete VRS allele with VRS identifiers and digests on all of the identifiable objects." - ], - "metadata": { - "collapsed": false - }, - "id": "37b5e28820b700fd" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.12.1" } }, "nbformat": 4, diff --git a/notebooks/getting_started/4_Exploring_the_AlleleTranslator.ipynb b/notebooks/getting_started/4_Exploring_the_AlleleTranslator.ipynb index c04f9fe3..d8542b1e 100644 --- a/notebooks/getting_started/4_Exploring_the_AlleleTranslator.ipynb +++ b/notebooks/getting_started/4_Exploring_the_AlleleTranslator.ipynb @@ -2,115 +2,163 @@ "cells": [ { "cell_type": "markdown", + "id": "f3a35f19da823af8", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ - "# 4 Exploring the AlleleTranslator\n", + "# 4. Exploring the AlleleTranslator\n", "There are four variant nomenclatures available in the vrs-python *AlleleTranslator*: SPDI, gnomad/VCF, Beacon and HGVS. In this notebook we will perform a simple Allele translation for each. We will use each of the four nomenclatures for a single variant in translating variants to VRS. his variant can be viewed in \n", "[ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/variation/652570) and in [gnomAD](https://gnomad.broadinstitute.org/variant/5-80656489-C-T)." - ], - "metadata": { - "collapsed": false - }, - "id": "f3a35f19da823af8" + ] }, { "cell_type": "markdown", + "id": "5b3ec967f1e08834", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "#### Step 1 - Setup Data Proxy Access\n", "The *DataProxy* provides access to sequence references." - ], - "metadata": { - "collapsed": false - }, - "id": "5b3ec967f1e08834" + ] }, { "cell_type": "code", - "outputs": [], - "source": [ - "from ga4gh.vrs.dataproxy import create_dataproxy\n", - "seqrepo_rest_service_url = \"seqrepo+https://services.genomicmedlab.org/seqrepo\"\n", - "seqrepo_dataproxy = create_dataproxy(uri=seqrepo_rest_service_url)" - ], + "execution_count": 1, + "id": "4dd605526ab7227e", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-04-19T17:13:46.243963Z", "start_time": "2024-04-19T17:13:46.110956Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, - "id": "4dd605526ab7227e", - "execution_count": 1 + "outputs": [], + "source": [ + "from ga4gh.vrs.dataproxy import create_dataproxy\n", + "seqrepo_rest_service_url = \"seqrepo+https://services.genomicmedlab.org/seqrepo\"\n", + "seqrepo_dataproxy = create_dataproxy(uri=seqrepo_rest_service_url)" + ] }, { "cell_type": "markdown", - "source": [ - "Import the *AlleleTranslator* class." - ], + "id": "5f7ac7c602d40af7", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, - "id": "5f7ac7c602d40af7" + "source": [ + "Import the *AlleleTranslator* class." + ] }, { "cell_type": "code", - "outputs": [], - "source": [ - "from ga4gh.vrs.extras.translator import AlleleTranslator" - ], + "execution_count": 2, + "id": "24b10ea2d6ae9b0b", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-04-19T17:13:46.295212Z", "start_time": "2024-04-19T17:13:46.245063Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, - "id": "24b10ea2d6ae9b0b", - "execution_count": 2 + "outputs": [], + "source": [ + "from ga4gh.vrs.extras.translator import AlleleTranslator" + ] }, { "cell_type": "markdown", - "source": [ - "The UTA server is required in the environment since we are translating from/to HGVS." - ], + "id": "6381d3a17866d76d", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, - "id": "6381d3a17866d76d" + "source": [ + "The UTA server is required in the environment since we are translating from/to HGVS." + ] }, { "cell_type": "code", - "outputs": [], - "source": [ - "import os\n", - "os.environ[\"UTA_DB_URL\"] = \"postgresql://anonymous:anonymous@uta.biocommons.org:5432/uta/uta_20210129b\"" - ], + "execution_count": 3, + "id": "864e5a9c0ee98257", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-04-19T17:13:46.297528Z", "start_time": "2024-04-19T17:13:46.295903Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, - "id": "864e5a9c0ee98257", - "execution_count": 3 + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"UTA_DB_URL\"] = \"postgresql://anonymous:anonymous@uta.biocommons.org:5432/uta/uta_20210129b\"" + ] }, { "cell_type": "markdown", + "id": "87ddbc19e906119f", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "#### From/To HGVS\n", "This example will translate an HGVS variant to VRS using the *AlleleTranslator* *translate_from* method." - ], - "metadata": { - "collapsed": false - }, - "id": "87ddbc19e906119f" + ] }, { "cell_type": "code", + "execution_count": 4, + "id": "925b01dd4764ed33", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-19T17:13:48.351951Z", + "start_time": "2024-04-19T17:13:46.298147Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "{'id': 'ga4gh:VA.ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n 'type': 'Allele',\n 'digest': 'ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n 'location': {'id': 'ga4gh:SL.JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n 'type': 'SequenceLocation',\n 'digest': 'JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n 'sequenceReference': {'type': 'SequenceReference',\n 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n 'start': 80656488,\n 'end': 80656489},\n 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'T'}}" + "text/plain": [ + "{'id': 'ga4gh:VA.ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n", + " 'type': 'Allele',\n", + " 'digest': 'ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n", + " 'location': {'id': 'ga4gh:SL.JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n", + " 'type': 'SequenceLocation',\n", + " 'digest': 'JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n", + " 'start': 80656488,\n", + " 'end': 80656489},\n", + " 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'T'}}" + ] }, "execution_count": 4, "metadata": {}, @@ -121,33 +169,41 @@ "allele_translator = AlleleTranslator(data_proxy=seqrepo_dataproxy)\n", "allele = allele_translator.translate_from(\"NC_000005.10:g.80656489C>T\", \"hgvs\")\n", "allele.model_dump(exclude_none=True)" - ], + ] + }, + { + "cell_type": "markdown", + "id": "f3951db9d1a1b833", "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-19T17:13:48.351951Z", - "start_time": "2024-04-19T17:13:46.298147Z" + "jupyter": { + "outputs_hidden": false } }, - "id": "925b01dd4764ed33", - "execution_count": 4 - }, - { - "cell_type": "markdown", "source": [ "The output from above is the VRS representation of the *Allele*. Using the *AlleleTranslator* *translate_to* method we can get back to the HGVS representation." - ], - "metadata": { - "collapsed": false - }, - "id": "f3951db9d1a1b833" + ] }, { "cell_type": "code", + "execution_count": 5, + "id": "722919c1d8cdd83b", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-19T17:13:49.804350Z", + "start_time": "2024-04-19T17:13:48.354473Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "['NC_000005.10:g.80656489C>T']" + "text/plain": [ + "['NC_000005.10:g.80656489C>T']" + ] }, "execution_count": 5, "metadata": {}, @@ -156,36 +212,54 @@ ], "source": [ "allele_translator.translate_to(allele, \"hgvs\")" - ], + ] + }, + { + "cell_type": "markdown", + "id": "508d821d68360f36", "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-19T17:13:49.804350Z", - "start_time": "2024-04-19T17:13:48.354473Z" + "jupyter": { + "outputs_hidden": false } }, - "id": "722919c1d8cdd83b", - "execution_count": 5 - }, - { - "cell_type": "markdown", "source": [ "The AlleleTranslator class by default will use \"GRCh38\" as the default assembly when performing translation. But the actual assembly used for translation will be inferred from the reference sequence passed as part of the HGVS variant. A specific default assembly may be specified when creating an AlleleTranslator by passing in the keyword argument \"default_assembly_name\" with the assembly:\n", "> AlleleTranslator(data_proxy=seqrepo_dataproxy, default_assembly_name=\"GRCh37\")\n", "\n", "This example is using the GRCh37 representation of the variant." - ], - "metadata": { - "collapsed": false - }, - "id": "508d821d68360f36" + ] }, { "cell_type": "code", + "execution_count": 6, + "id": "ef3ba37deafba7ac", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-19T17:13:50.060957Z", + "start_time": "2024-04-19T17:13:49.806216Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "{'id': 'ga4gh:VA.hEyB1sGiQrdrPFIq4u4CF17uAuUs2Wvx',\n 'type': 'Allele',\n 'digest': 'hEyB1sGiQrdrPFIq4u4CF17uAuUs2Wvx',\n 'location': {'id': 'ga4gh:SL.Y-itBtqe9IwbxyL4EVZ4T_X9TUsdbJ22',\n 'type': 'SequenceLocation',\n 'digest': 'Y-itBtqe9IwbxyL4EVZ4T_X9TUsdbJ22',\n 'sequenceReference': {'type': 'SequenceReference',\n 'refgetAccession': 'SQ.vbjOdMfHJvTjK_nqvFvpaSKhZillW0SX'},\n 'start': 79952307,\n 'end': 79952308},\n 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'T'}}" + "text/plain": [ + "{'id': 'ga4gh:VA.hEyB1sGiQrdrPFIq4u4CF17uAuUs2Wvx',\n", + " 'type': 'Allele',\n", + " 'digest': 'hEyB1sGiQrdrPFIq4u4CF17uAuUs2Wvx',\n", + " 'location': {'id': 'ga4gh:SL.Y-itBtqe9IwbxyL4EVZ4T_X9TUsdbJ22',\n", + " 'type': 'SequenceLocation',\n", + " 'digest': 'Y-itBtqe9IwbxyL4EVZ4T_X9TUsdbJ22',\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ.vbjOdMfHJvTjK_nqvFvpaSKhZillW0SX'},\n", + " 'start': 79952307,\n", + " 'end': 79952308},\n", + " 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'T'}}" + ] }, "execution_count": 6, "metadata": {}, @@ -195,23 +269,28 @@ "source": [ "allele = allele_translator.translate_from(\"NC_000005.9:g.79952308C>T\", \"hgvs\")\n", "allele.model_dump(exclude_none=True)" - ], + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "adae25fd8dbca27c", "metadata": { - "collapsed": false, "ExecuteTime": { - "end_time": "2024-04-19T17:13:50.060957Z", - "start_time": "2024-04-19T17:13:49.806216Z" + "end_time": "2024-04-19T17:13:51.648795Z", + "start_time": "2024-04-19T17:13:50.062068Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, - "id": "ef3ba37deafba7ac", - "execution_count": 6 - }, - { - "cell_type": "code", "outputs": [ { "data": { - "text/plain": "['NC_000005.9:g.79952308C>T']" + "text/plain": [ + "['NC_000005.9:g.79952308C>T']" + ] }, "execution_count": 7, "metadata": {}, @@ -220,34 +299,52 @@ ], "source": [ "allele_translator.translate_to(allele, \"hgvs\")" - ], + ] + }, + { + "cell_type": "markdown", + "id": "728fbf740565a801", "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-19T17:13:51.648795Z", - "start_time": "2024-04-19T17:13:50.062068Z" + "jupyter": { + "outputs_hidden": false } }, - "id": "adae25fd8dbca27c", - "execution_count": 7 - }, - { - "cell_type": "markdown", "source": [ "#### From/To SPDI\n", "Example of translation a SPDI representation of a variant to and from VRS." - ], - "metadata": { - "collapsed": false - }, - "id": "728fbf740565a801" + ] }, { "cell_type": "code", + "execution_count": 8, + "id": "43873e55f82d10a0", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-19T17:13:51.658433Z", + "start_time": "2024-04-19T17:13:51.652705Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "{'id': 'ga4gh:VA.ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n 'type': 'Allele',\n 'digest': 'ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n 'location': {'id': 'ga4gh:SL.JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n 'type': 'SequenceLocation',\n 'digest': 'JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n 'sequenceReference': {'type': 'SequenceReference',\n 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n 'start': 80656488,\n 'end': 80656489},\n 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'T'}}" + "text/plain": [ + "{'id': 'ga4gh:VA.ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n", + " 'type': 'Allele',\n", + " 'digest': 'ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n", + " 'location': {'id': 'ga4gh:SL.JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n", + " 'type': 'SequenceLocation',\n", + " 'digest': 'JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n", + " 'start': 80656488,\n", + " 'end': 80656489},\n", + " 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'T'}}" + ] }, "execution_count": 8, "metadata": {}, @@ -257,23 +354,28 @@ "source": [ "allele = allele_translator.translate_from(\"NC_000005.10:80656488:C:T\",\"spdi\")\n", "allele.model_dump(exclude_none=True)" - ], + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "dd695b3dfc14a83e", "metadata": { - "collapsed": false, "ExecuteTime": { - "end_time": "2024-04-19T17:13:51.658433Z", - "start_time": "2024-04-19T17:13:51.652705Z" + "end_time": "2024-04-19T17:13:51.662545Z", + "start_time": "2024-04-19T17:13:51.659707Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, - "id": "43873e55f82d10a0", - "execution_count": 8 - }, - { - "cell_type": "code", "outputs": [ { "data": { - "text/plain": "['NC_000005.10:80656488:1:T']" + "text/plain": [ + "['NC_000005.10:80656488:1:T']" + ] }, "execution_count": 9, "metadata": {}, @@ -282,34 +384,52 @@ ], "source": [ "allele_translator.translate_to(allele, \"spdi\")" - ], + ] + }, + { + "cell_type": "markdown", + "id": "4e0911a7694a060", "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-19T17:13:51.662545Z", - "start_time": "2024-04-19T17:13:51.659707Z" + "jupyter": { + "outputs_hidden": false } }, - "id": "dd695b3dfc14a83e", - "execution_count": 9 - }, - { - "cell_type": "markdown", "source": [ "#### From Beacon (VCF-like)\n", "For variants represented in the Beacon nomenclature, the *AlleleTranslator* currently only supports *translate_from* to convert to VRS. *translate_to* is not yet supported." - ], - "metadata": { - "collapsed": false - }, - "id": "4e0911a7694a060" + ] }, { "cell_type": "code", + "execution_count": 10, + "id": "57f54e6c3854a48f", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-19T17:13:51.743020Z", + "start_time": "2024-04-19T17:13:51.663593Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "{'id': 'ga4gh:VA.ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n 'type': 'Allele',\n 'digest': 'ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n 'location': {'id': 'ga4gh:SL.JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n 'type': 'SequenceLocation',\n 'digest': 'JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n 'sequenceReference': {'type': 'SequenceReference',\n 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n 'start': 80656488,\n 'end': 80656489},\n 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'T'}}" + "text/plain": [ + "{'id': 'ga4gh:VA.ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n", + " 'type': 'Allele',\n", + " 'digest': 'ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n", + " 'location': {'id': 'ga4gh:SL.JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n", + " 'type': 'SequenceLocation',\n", + " 'digest': 'JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n", + " 'start': 80656488,\n", + " 'end': 80656489},\n", + " 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'T'}}" + ] }, "execution_count": 10, "metadata": {}, @@ -319,34 +439,52 @@ "source": [ "allele = allele_translator.translate_from(\"5 : 80656489 C > T\", \"beacon\")\n", "allele.model_dump(exclude_none=True)" - ], + ] + }, + { + "cell_type": "markdown", + "id": "c4a6abfd56b8fa1e", "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-19T17:13:51.743020Z", - "start_time": "2024-04-19T17:13:51.663593Z" + "jupyter": { + "outputs_hidden": false } }, - "id": "57f54e6c3854a48f", - "execution_count": 10 - }, - { - "cell_type": "markdown", "source": [ "#### From gnomAD style VCF\n", "For variants represented in the gnomad nomenclature, the *AlleleTranslator* currently only supports *translate_from* to convert to VRS. *translate_to* is not yet supported." - ], - "metadata": { - "collapsed": false - }, - "id": "c4a6abfd56b8fa1e" + ] }, { "cell_type": "code", + "execution_count": 11, + "id": "7868c365e327d995", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-19T17:13:51.823442Z", + "start_time": "2024-04-19T17:13:51.743770Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "{'id': 'ga4gh:VA.ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n 'type': 'Allele',\n 'digest': 'ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n 'location': {'id': 'ga4gh:SL.JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n 'type': 'SequenceLocation',\n 'digest': 'JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n 'sequenceReference': {'type': 'SequenceReference',\n 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n 'start': 80656488,\n 'end': 80656489},\n 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'T'}}" + "text/plain": [ + "{'id': 'ga4gh:VA.ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n", + " 'type': 'Allele',\n", + " 'digest': 'ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n", + " 'location': {'id': 'ga4gh:SL.JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n", + " 'type': 'SequenceLocation',\n", + " 'digest': 'JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n", + " 'start': 80656488,\n", + " 'end': 80656489},\n", + " 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'T'}}" + ] }, "execution_count": 11, "metadata": {}, @@ -356,35 +494,26 @@ "source": [ "allele = allele_translator.translate_from(\"5-80656489-C-T\", \"gnomad\")\n", "allele.model_dump(exclude_none=True)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-19T17:13:51.823442Z", - "start_time": "2024-04-19T17:13:51.743770Z" - } - }, - "id": "7868c365e327d995", - "execution_count": 11 + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.12.1" } }, "nbformat": 4, diff --git a/notebooks/getting_started/5_Exploring_the_CnvTranslator.ipynb b/notebooks/getting_started/5_Exploring_the_CnvTranslator.ipynb index 78313d56..0af84722 100644 --- a/notebooks/getting_started/5_Exploring_the_CnvTranslator.ipynb +++ b/notebooks/getting_started/5_Exploring_the_CnvTranslator.ipynb @@ -2,10 +2,17 @@ "cells": [ { "cell_type": "markdown", + "id": "c940e9d78bc6e98a", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ - "# 5 Exploring the CnvTranslator\n", + "# 5. Exploring the CnvTranslator\n", "The vrs-python model supports two classes of copy number variation: \n", - "* CopyNumberChange - an assessment of loss or gain relative to a location within a gene or system, where loss or gain is represented by the following \"efo\" ontology codes:\n", + "* CopyNumberChange - an assessment of loss or gain relative to a location within a system, where loss or gain is represented by the following [EMBL-EBI Experimental Factor Ontology](https://www.ebi.ac.uk/efo/) (EFO) codes:\n", " * efo:0030064 - regional base ploidy\n", " * efo:0030067 - loss\n", " * efo:0030068 - low-level loss\n", @@ -16,120 +23,164 @@ " * efo:0020073 - high-level loss \n", "* CopyNumberCount - an absolute count of discrete copies of a location within a gene or system\n", "For the CnvTranslator, only HGVS nomenclature is used to describe the variation." - ], - "metadata": { - "collapsed": false - }, - "id": "c940e9d78bc6e98a" + ] }, { "cell_type": "markdown", + "id": "ac31eec4a405b218", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "#### Step 1 - Setup Data Proxy Access\n", "The *DataProxy* provides access to sequence references." - ], - "metadata": { - "collapsed": false - }, - "id": "ac31eec4a405b218" + ] }, { "cell_type": "code", - "outputs": [], - "source": [ - "from ga4gh.vrs.dataproxy import create_dataproxy\n", - "seqrepo_rest_service_url = \"seqrepo+https://services.genomicmedlab.org/seqrepo\"\n", - "seqrepo_dataproxy = create_dataproxy(uri=seqrepo_rest_service_url)" - ], + "execution_count": 1, + "id": "b7b0c4864ad5f9dd", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-04-18T20:25:27.328387Z", "start_time": "2024-04-18T20:25:27.194307Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, - "id": "b7b0c4864ad5f9dd", - "execution_count": 1 + "outputs": [], + "source": [ + "from ga4gh.vrs.dataproxy import create_dataproxy\n", + "seqrepo_rest_service_url = \"seqrepo+https://services.genomicmedlab.org/seqrepo\"\n", + "seqrepo_dataproxy = create_dataproxy(uri=seqrepo_rest_service_url)" + ] }, { "cell_type": "markdown", - "source": [ - "Import the *CnvTranslator* class." - ], + "id": "27a861e38d55ea44", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, - "id": "27a861e38d55ea44" + "source": [ + "Import the *CnvTranslator* class." + ] }, { "cell_type": "code", - "outputs": [], - "source": [ - "from ga4gh.vrs.extras.translator import CnvTranslator" - ], + "execution_count": 2, + "id": "9fe4f25508590533", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-04-18T20:25:27.379097Z", "start_time": "2024-04-18T20:25:27.329523Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, - "id": "9fe4f25508590533", - "execution_count": 2 + "outputs": [], + "source": [ + "from ga4gh.vrs.extras.translator import CnvTranslator" + ] }, { "cell_type": "markdown", - "source": [ - "The UTA server is required in the environment since we are translating from/to HGVS." - ], + "id": "e9312b31a06c98e0", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, - "id": "e9312b31a06c98e0" + "source": [ + "The UTA server is required in the environment since we are translating from/to HGVS." + ] }, { "cell_type": "code", - "outputs": [], - "source": [ - "import os\n", - "os.environ[\"UTA_DB_URL\"] = \"postgresql://anonymous:anonymous@uta.biocommons.org:5432/uta/uta_20210129b\"" - ], + "execution_count": 3, + "id": "71a288dff7b87f1", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-04-18T20:25:27.381511Z", "start_time": "2024-04-18T20:25:27.379793Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, - "id": "71a288dff7b87f1", - "execution_count": 3 + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"UTA_DB_URL\"] = \"postgresql://anonymous:anonymous@uta.biocommons.org:5432/uta/uta_20210129b\"" + ] }, { "cell_type": "markdown", - "source": [ - "#### Step 2 - CopyNumberChange examples" - ], + "id": "6c8160cff22db940", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, - "id": "6c8160cff22db940" + "source": [ + "#### Step 2 - CopyNumberChange examples" + ] }, { "cell_type": "markdown", - "source": [ - "This example depicts a *CopyNumberChange* representing a deletion, or copy number loss. The \"efo\" ontology code specifying the type of copy number change is passed as a keyword argument \"copy_change\" to *translate_from*. This variant can be viewed in [ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/variation/984438)." - ], + "id": "6a2c1c355bbcc494", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, - "id": "6a2c1c355bbcc494" + "source": [ + "This example depicts a *CopyNumberChange* representing a deletion, or copy number loss. The Experimental Factor Ontology code specifying the type of copy number change is passed as a keyword argument \"copy_change\" to *translate_from*. This variant can be viewed in [ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/variation/984438)." + ] }, { "cell_type": "code", + "execution_count": 4, + "id": "c8a6400cb8605b0d", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-18T20:25:28.974970Z", + "start_time": "2024-04-18T20:25:27.382082Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "{'id': 'ga4gh:CX.0M5VkV5v504_laQURFMEsqzZGcOF9YEw',\n 'type': 'CopyNumberChange',\n 'digest': '0M5VkV5v504_laQURFMEsqzZGcOF9YEw',\n 'location': {'id': 'ga4gh:SL.GSJAEJXFDz7Nq6VlJj5NTEku48MmteUU',\n 'type': 'SequenceLocation',\n 'digest': 'GSJAEJXFDz7Nq6VlJj5NTEku48MmteUU',\n 'sequenceReference': {'type': 'SequenceReference',\n 'refgetAccession': 'SQ.eK4D2MosgK_ivBkgi6FVPg5UXs1bYESm'},\n 'start': 45002866,\n 'end': 45015056},\n 'copyChange': 'efo:0030067'}" + "text/plain": [ + "{'id': 'ga4gh:CX.0M5VkV5v504_laQURFMEsqzZGcOF9YEw',\n", + " 'type': 'CopyNumberChange',\n", + " 'digest': '0M5VkV5v504_laQURFMEsqzZGcOF9YEw',\n", + " 'location': {'id': 'ga4gh:SL.GSJAEJXFDz7Nq6VlJj5NTEku48MmteUU',\n", + " 'type': 'SequenceLocation',\n", + " 'digest': 'GSJAEJXFDz7Nq6VlJj5NTEku48MmteUU',\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ.eK4D2MosgK_ivBkgi6FVPg5UXs1bYESm'},\n", + " 'start': 45002866,\n", + " 'end': 45015056},\n", + " 'copyChange': 'efo:0030067'}" + ] }, "execution_count": 4, "metadata": {}, @@ -140,33 +191,51 @@ "cnv_translator = CnvTranslator(data_proxy=seqrepo_dataproxy)\n", "allele = cnv_translator.translate_from(\"NC_000014.9:g.45002867_45015056del\", \"hgvs\", copy_change=\"efo:0030067\")\n", "allele.model_dump(exclude_none=True)" - ], + ] + }, + { + "cell_type": "markdown", + "id": "6fc8dfa1340e010d", "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-18T20:25:28.974970Z", - "start_time": "2024-04-18T20:25:27.382082Z" + "jupyter": { + "outputs_hidden": false } }, - "id": "c8a6400cb8605b0d", - "execution_count": 4 - }, - { - "cell_type": "markdown", "source": [ "This example depicts a CopyNumberChange* representing a duplication, or copy number gain. This variant can be viewed in [ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/variation/549625)." - ], - "metadata": { - "collapsed": false - }, - "id": "6fc8dfa1340e010d" + ] }, { "cell_type": "code", + "execution_count": 5, + "id": "f4efc189d53d7000", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-18T20:25:29.076267Z", + "start_time": "2024-04-18T20:25:28.976791Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "{'id': 'ga4gh:CX.0BN4vrqPrLPAZYsQEAPnG4IS8AYeBGe1',\n 'type': 'CopyNumberChange',\n 'digest': '0BN4vrqPrLPAZYsQEAPnG4IS8AYeBGe1',\n 'location': {'id': 'ga4gh:SL.tydo6UFL8Y60L5Me3k8AJfljURO9vYn9',\n 'type': 'SequenceLocation',\n 'digest': 'tydo6UFL8Y60L5Me3k8AJfljURO9vYn9',\n 'sequenceReference': {'type': 'SequenceReference',\n 'refgetAccession': 'SQ.KEO-4XBcm1cxeo_DIQ8_ofqGUkp4iZhI'},\n 'start': 75502957,\n 'end': 76045032},\n 'copyChange': 'efo:0030070'}" + "text/plain": [ + "{'id': 'ga4gh:CX.0BN4vrqPrLPAZYsQEAPnG4IS8AYeBGe1',\n", + " 'type': 'CopyNumberChange',\n", + " 'digest': '0BN4vrqPrLPAZYsQEAPnG4IS8AYeBGe1',\n", + " 'location': {'id': 'ga4gh:SL.tydo6UFL8Y60L5Me3k8AJfljURO9vYn9',\n", + " 'type': 'SequenceLocation',\n", + " 'digest': 'tydo6UFL8Y60L5Me3k8AJfljURO9vYn9',\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ.KEO-4XBcm1cxeo_DIQ8_ofqGUkp4iZhI'},\n", + " 'start': 75502957,\n", + " 'end': 76045032},\n", + " 'copyChange': 'efo:0030070'}" + ] }, "execution_count": 5, "metadata": {}, @@ -176,43 +245,64 @@ "source": [ "allele = cnv_translator.translate_from(\"NC_000009.12:g.75502958_76045032dup\", \"hgvs\", copy_change=\"efo:0030070\")\n", "allele.model_dump(exclude_none=True)" - ], + ] + }, + { + "cell_type": "markdown", + "id": "aaf041e4b83301cd", "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-18T20:25:29.076267Z", - "start_time": "2024-04-18T20:25:28.976791Z" + "jupyter": { + "outputs_hidden": false } }, - "id": "f4efc189d53d7000", - "execution_count": 5 - }, - { - "cell_type": "markdown", "source": [ "#### Step 3 - CopyNumberCount examples" - ], - "metadata": { - "collapsed": false - }, - "id": "aaf041e4b83301cd" + ] }, { "cell_type": "markdown", - "source": [ - "This example depicts a *CopyNumberCount* with a copy number gain. With copy number count variation, the \"copies\" keyword argument is passed to *translate_from* with the appropriate \"efo\" ontology code. This variant can be viewed in [ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/variation/2579174/)." - ], + "id": "ff76cb8a2f1387a5", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, - "id": "ff76cb8a2f1387a5" + "source": [ + "This example depicts a *CopyNumberCount* with a copy number gain. With copy number count variation, the \"copies\" keyword argument is passed to *translate_from* with the appropriate \"efo\" ontology code. This variant can be viewed in [ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/variation/2579174/)." + ] }, { "cell_type": "code", + "execution_count": 6, + "id": "f057e93172e97a88", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-18T20:25:29.187023Z", + "start_time": "2024-04-18T20:25:29.077128Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "{'id': 'ga4gh:CN.O_QHImmfErh9jDFkJaypPPvUmnj7EM70',\n 'type': 'CopyNumberCount',\n 'digest': 'O_QHImmfErh9jDFkJaypPPvUmnj7EM70',\n 'location': {'id': 'ga4gh:SL.hBVWalem_rNclxjmUuT9CHbEGCdlqW9L',\n 'type': 'SequenceLocation',\n 'digest': 'hBVWalem_rNclxjmUuT9CHbEGCdlqW9L',\n 'sequenceReference': {'type': 'SequenceReference',\n 'refgetAccession': 'SQ.HxuclGHh0XCDuF8x6yQrpHUBL7ZntAHc'},\n 'start': 85623,\n 'end': 57073230},\n 'copies': 3}" + "text/plain": [ + "{'id': 'ga4gh:CN.O_QHImmfErh9jDFkJaypPPvUmnj7EM70',\n", + " 'type': 'CopyNumberCount',\n", + " 'digest': 'O_QHImmfErh9jDFkJaypPPvUmnj7EM70',\n", + " 'location': {'id': 'ga4gh:SL.hBVWalem_rNclxjmUuT9CHbEGCdlqW9L',\n", + " 'type': 'SequenceLocation',\n", + " 'digest': 'hBVWalem_rNclxjmUuT9CHbEGCdlqW9L',\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ.HxuclGHh0XCDuF8x6yQrpHUBL7ZntAHc'},\n", + " 'start': 85623,\n", + " 'end': 57073230},\n", + " 'copies': 3}" + ] }, "execution_count": 6, "metadata": {}, @@ -222,33 +312,51 @@ "source": [ "allele = cnv_translator.translate_from(\"NC_000004.12:g.85624_57073230dup\", \"hgvs\", copies=\"3\")\n", "allele.model_dump(exclude_none=True)" - ], + ] + }, + { + "cell_type": "markdown", + "id": "41df40dd67cb1009", "metadata": { "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-18T20:25:29.187023Z", - "start_time": "2024-04-18T20:25:29.077128Z" + "jupyter": { + "outputs_hidden": false } }, - "id": "f057e93172e97a88", - "execution_count": 6 - }, - { - "cell_type": "markdown", "source": [ "This example depicts a *CopyNumberCount* with a copy number loss. This variant can be viewed in [ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/variation/2579226/)." - ], - "metadata": { - "collapsed": false - }, - "id": "41df40dd67cb1009" + ] }, { "cell_type": "code", + "execution_count": 7, + "id": "412feaeba95751e7", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-18T20:25:29.276073Z", + "start_time": "2024-04-18T20:25:29.187923Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "{'id': 'ga4gh:CN.WDzlT9oUq4IcQrVRWGH0dZnARnFBotCS',\n 'type': 'CopyNumberCount',\n 'digest': 'WDzlT9oUq4IcQrVRWGH0dZnARnFBotCS',\n 'location': {'id': 'ga4gh:SL.H1Zh5xdBqamBjwVE9orWdY_uBkpEMH1V',\n 'type': 'SequenceLocation',\n 'digest': 'H1Zh5xdBqamBjwVE9orWdY_uBkpEMH1V',\n 'sequenceReference': {'type': 'SequenceReference',\n 'refgetAccession': 'SQ.5ZUqxCmDDgN4xTRbaSjN8LwgZironmB8'},\n 'start': 46111352,\n 'end': 46119948},\n 'copies': 1}" + "text/plain": [ + "{'id': 'ga4gh:CN.WDzlT9oUq4IcQrVRWGH0dZnARnFBotCS',\n", + " 'type': 'CopyNumberCount',\n", + " 'digest': 'WDzlT9oUq4IcQrVRWGH0dZnARnFBotCS',\n", + " 'location': {'id': 'ga4gh:SL.H1Zh5xdBqamBjwVE9orWdY_uBkpEMH1V',\n", + " 'type': 'SequenceLocation',\n", + " 'digest': 'H1Zh5xdBqamBjwVE9orWdY_uBkpEMH1V',\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ.5ZUqxCmDDgN4xTRbaSjN8LwgZironmB8'},\n", + " 'start': 46111352,\n", + " 'end': 46119948},\n", + " 'copies': 1}" + ] }, "execution_count": 7, "metadata": {}, @@ -258,35 +366,26 @@ "source": [ "allele = cnv_translator.translate_from(\"NC_000021.9:g.46111353_46119948del\", \"hgvs\", copies=\"1\")\n", "allele.model_dump(exclude_none=True)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-04-18T20:25:29.276073Z", - "start_time": "2024-04-18T20:25:29.187923Z" - } - }, - "id": "412feaeba95751e7", - "execution_count": 7 + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.12.1" } }, "nbformat": 4,