From 5a9115c1c934560cb810192d9595a122d4277361 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 23 Feb 2024 08:30:31 -0500 Subject: [PATCH] Issue 351.2 (#353) * Restore line that recomputes repeat subunit length * Added additional HGVS test case * Only recompute subunit length for ambiguous insertions or deletion/insertions * Added test case with ambiguous deletion * addresses #351 --------- Co-authored-by: Eugene Clark --- notebooks/scratch.ipynb | 351 ++++++++++++++++++--- src/ga4gh/vrs/normalize.py | 86 +++-- tests/cassettes/test_normalize_allele.yaml | 246 ++++++--------- tests/test_vrs_normalize.py | 34 ++ 4 files changed, 493 insertions(+), 224 deletions(-) diff --git a/notebooks/scratch.ipynb b/notebooks/scratch.ipynb index 9814322b..ef61b3d3 100644 --- a/notebooks/scratch.ipynb +++ b/notebooks/scratch.ipynb @@ -7,8 +7,8 @@ "metadata": { "collapsed": true, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.078655Z", - "start_time": "2024-02-10T15:38:13.857632Z" + "end_time": "2024-02-23T05:53:19.147071Z", + "start_time": "2024-02-23T05:53:18.905272Z" } }, "outputs": [], @@ -44,8 +44,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.082117Z", - "start_time": "2024-02-10T15:38:14.080117Z" + "end_time": "2024-02-23T05:53:19.153851Z", + "start_time": "2024-02-23T05:53:19.148112Z" } }, "id": "68f5f7e40bf74d70", @@ -69,8 +69,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.087793Z", - "start_time": "2024-02-10T15:38:14.083968Z" + "end_time": "2024-02-23T05:53:19.154681Z", + "start_time": "2024-02-23T05:53:19.151578Z" } }, "id": "c11e134c85f2dec9", @@ -94,8 +94,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.091376Z", - "start_time": "2024-02-10T15:38:14.086540Z" + "end_time": "2024-02-23T05:53:19.157331Z", + "start_time": "2024-02-23T05:53:19.155072Z" } }, "id": "4bfa27852c9b7a76", @@ -120,8 +120,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.094883Z", - "start_time": "2024-02-10T15:38:14.090731Z" + "end_time": "2024-02-23T05:53:19.164798Z", + "start_time": "2024-02-23T05:53:19.157587Z" } }, "id": "5f08dedc3934e14b", @@ -146,8 +146,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.096044Z", - "start_time": "2024-02-10T15:38:14.094071Z" + "end_time": "2024-02-23T05:53:19.165593Z", + "start_time": "2024-02-23T05:53:19.160302Z" } }, "id": "8e67a4b8ae29e077", @@ -162,8 +162,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.099116Z", - "start_time": "2024-02-10T15:38:14.096777Z" + "end_time": "2024-02-23T05:53:19.166255Z", + "start_time": "2024-02-23T05:53:19.162929Z" } }, "id": "2a09ab2316876e02", @@ -187,8 +187,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.102084Z", - "start_time": "2024-02-10T15:38:14.099058Z" + "end_time": "2024-02-23T05:53:19.169340Z", + "start_time": "2024-02-23T05:53:19.165108Z" } }, "id": "b010f1faf5aa4c7c", @@ -213,8 +213,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.133735Z", - "start_time": "2024-02-10T15:38:14.102404Z" + "end_time": "2024-02-23T05:53:19.216541Z", + "start_time": "2024-02-23T05:53:19.168029Z" } }, "id": "b9e5d4e04237c06a", @@ -238,8 +238,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.134333Z", - "start_time": "2024-02-10T15:38:14.106609Z" + "end_time": "2024-02-23T05:53:19.217977Z", + "start_time": "2024-02-23T05:53:19.172495Z" } }, "id": "6510c36a000679d7", @@ -271,8 +271,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.134880Z", - "start_time": "2024-02-10T15:38:14.108443Z" + "end_time": "2024-02-23T05:53:19.218561Z", + "start_time": "2024-02-23T05:53:19.174981Z" } }, "id": "e173a0f8790b12e4", @@ -289,8 +289,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.135378Z", - "start_time": "2024-02-10T15:38:14.111664Z" + "end_time": "2024-02-23T05:53:19.219535Z", + "start_time": "2024-02-23T05:53:19.178147Z" } }, "id": "5aa91c0c9c69658d", @@ -306,8 +306,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.155710Z", - "start_time": "2024-02-10T15:38:14.126605Z" + "end_time": "2024-02-23T05:53:19.244703Z", + "start_time": "2024-02-23T05:53:19.192390Z" } }, "id": "bed8be3045afc562", @@ -325,8 +325,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.169077Z", - "start_time": "2024-02-10T15:38:14.129514Z" + "end_time": "2024-02-23T05:53:19.258791Z", + "start_time": "2024-02-23T05:53:19.194840Z" } }, "id": "33d925353737ae55", @@ -343,8 +343,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.169489Z", - "start_time": "2024-02-10T15:38:14.166113Z" + "end_time": "2024-02-23T05:53:19.259384Z", + "start_time": "2024-02-23T05:53:19.230043Z" } }, "id": "32981ef7ab235120", @@ -368,8 +368,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.170943Z", - "start_time": "2024-02-10T15:38:14.168939Z" + "end_time": "2024-02-23T05:53:19.259982Z", + "start_time": "2024-02-23T05:53:19.232974Z" } }, "id": "85434dae68309a1e", @@ -384,8 +384,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.175393Z", - "start_time": "2024-02-10T15:38:14.171070Z" + "end_time": "2024-02-23T05:53:19.260872Z", + "start_time": "2024-02-23T05:53:19.235718Z" } }, "id": "a22bce22cb23ec2", @@ -410,8 +410,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.190052Z", - "start_time": "2024-02-10T15:38:14.175313Z" + "end_time": "2024-02-23T05:53:19.266577Z", + "start_time": "2024-02-23T05:53:19.237926Z" } }, "id": "897f943eee0cf054", @@ -437,8 +437,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:38:14.190897Z", - "start_time": "2024-02-10T15:38:14.177572Z" + "end_time": "2024-02-23T05:53:19.267318Z", + "start_time": "2024-02-23T05:53:19.240171Z" } }, "id": "7ed88eee2fd528a2", @@ -454,21 +454,30 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:42:02.625043Z", - "start_time": "2024-02-10T15:42:02.619571Z" + "end_time": "2024-02-23T05:53:19.268284Z", + "start_time": "2024-02-23T05:53:19.244306Z" } }, "id": "c7c2b9ef81aaee80", - "execution_count": 24 + "execution_count": 20 }, { "cell_type": "code", "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ahw001/git/vrs-python/venv/lib/python3.10/site-packages/pydantic/main.py:314: UserWarning: Pydantic serializer warnings:\n", + " Expected `Union[definition-ref, plain_function[ga4gh_serialize]]` but got `str` - serialized value may not be as expected\n", + " return self.__pydantic_serializer__.to_python(\n" + ] + }, { "data": { "text/plain": "{'id': 'ga4gh:VA.Hy2XU_-rp4IMh6I_1NXNecBo8Qx8n0oE',\n 'type': 'Allele',\n 'digest': 'Hy2XU_-rp4IMh6I_1NXNecBo8Qx8n0oE',\n 'location': 'ga4gh:SL._G2K0qSioM74l_u3OaKR0mgLYdeTL7Xd',\n 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'T'}}" }, - "execution_count": 25, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -479,12 +488,12 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:42:07.073679Z", - "start_time": "2024-02-10T15:42:07.069400Z" + "end_time": "2024-02-23T05:53:19.269188Z", + "start_time": "2024-02-23T05:53:19.246827Z" } }, "id": "2523640eaa70e64", - "execution_count": 25 + "execution_count": 21 }, { "cell_type": "code", @@ -503,21 +512,263 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-02-10T15:42:08.761026Z", - "start_time": "2024-02-10T15:42:08.755629Z" + "end_time": "2024-02-23T05:53:19.308490Z", + "start_time": "2024-02-23T05:53:19.251032Z" } }, "id": "33007f8ca388eb43", - "execution_count": 26 + "execution_count": 22 + }, + { + "cell_type": "markdown", + "source": [ + "## Dups" + ], + "metadata": { + "collapsed": false + }, + "id": "187d2599d24e532a" }, { "cell_type": "code", "outputs": [], "source": [], "metadata": { - "collapsed": false + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-02-23T05:53:19.309138Z", + "start_time": "2024-02-23T05:53:19.254021Z" + } + }, + "id": "2bb125b0a7df7a7b", + "execution_count": 22 + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "from ga4gh.vrs.dataproxy import SeqRepoDataProxy\n", + "from biocommons.seqrepo import SeqRepo\n", + "from ga4gh.vrs.extras.translator import Translator\n", + "from ga4gh.vrs import models\n", + "\n", + "data_proxy = SeqRepoDataProxy(SeqRepo(\"/usr/local/share/seqrepo/2021-01-29\"))\n", + "translator = Translator(data_proxy=data_proxy)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-02-23T05:53:19.343619Z", + "start_time": "2024-02-23T05:53:19.256376Z" + } + }, + "id": "3b0262df913b741", + "execution_count": 23 + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "from ga4gh.vrs import normalize\n", + "a = {\n", + " 'location': {\n", + " 'end': 289464,\n", + " 'start': 289464,\n", + " 'sequenceReference': {\n", + " 'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl'\n", + " },\n", + " 'type': 'SequenceLocation'\n", + " },\n", + " 'state': {\n", + " 'sequence': 'CAGCAG',\n", + " 'type': 'LiteralSequenceExpression'\n", + " },\n", + " 'type': 'Allele'\n", + "}\n", + "small_dup = models.Allele(**a)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-02-23T05:53:19.344265Z", + "start_time": "2024-02-23T05:53:19.336337Z" + } + }, + "id": "864cc1f21bfc357e", + "execution_count": 24 + }, + { + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": "{'type': 'Allele',\n 'location': {'type': 'SequenceLocation',\n 'sequenceReference': {'type': 'SequenceReference',\n 'refgetAccession': 'SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl'},\n 'start': 289464,\n 'end': 289469},\n 'state': {'type': 'ReferenceLengthExpression',\n 'length': 11,\n 'sequence': 'CAGCAGCAGCA',\n 'repeatSubunitLength': 3}}" + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Expecting an RLE with RSL=3\n", + "a2 = normalize(small_dup, data_proxy=data_proxy)\n", + "a2.model_dump(exclude_none=True)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-02-23T05:53:19.351589Z", + "start_time": "2024-02-23T05:53:19.338785Z" + } + }, + "id": "edf2040e69305b63", + "execution_count": 25 + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "b = {\n", + " 'location': {\n", + " 'end': 289464,\n", + " 'start': 289464,\n", + " 'sequenceReference': {\n", + " 'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl'\n", + " },\n", + " 'type': 'SequenceLocation'\n", + " },\n", + " 'state': {\n", + " 'sequence': 'CACA',\n", + " 'type': 'LiteralSequenceExpression'\n", + " },\n", + " 'type': 'Allele'\n", + "}\n", + "small_dup2 = models.Allele(**b)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-02-23T05:53:19.352751Z", + "start_time": "2024-02-23T05:53:19.349973Z" + } + }, + "id": "1de4d8b75d466e4a", + "execution_count": 26 + }, + { + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": "Allele(id=None, label=None, description=None, extensions=None, type='Allele', digest=None, expressions=None, location=SequenceLocation(id=None, label=None, description=None, extensions=None, type='SequenceLocation', digest=None, sequenceReference=SequenceReference(id=None, label=None, description=None, extensions=None, type='SequenceReference', refgetAccession='SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl', residueAlphabet=None), start=289464, end=289466), state=ReferenceLengthExpression(id=None, label=None, description=None, extensions=None, type='ReferenceLengthExpression', length=6, sequence=SequenceString(root='CACACA'), repeatSubunitLength=2))" + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Expecting an RLE with RSL=2\n", + "normalize(small_dup2, data_proxy=data_proxy)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-02-23T05:53:19.373924Z", + "start_time": "2024-02-23T05:53:19.356227Z" + } + }, + "id": "82dc8a42d5e3385f", + "execution_count": 27 + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "c = {'digest': 'swY2caCgv1kP6YqKyPlcEzJqTvou15vC',\n", + " 'id': 'ga4gh:VA.swY2caCgv1kP6YqKyPlcEzJqTvou15vC',\n", + " 'location': {'digest': 'ikECYncPpE1xh6f_LiComrFGevocjDHQ',\n", + " 'end': 32331094,\n", + " 'id': 'ga4gh:SL.ikECYncPpE1xh6f_LiComrFGevocjDHQ',\n", + " 'sequenceReference': {'refgetAccession': 'SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT',\n", + " 'type': 'SequenceReference'},\n", + " 'start': 32331082,\n", + " 'type': 'SequenceLocation'},\n", + " 'state': {'length': 14,\n", + " 'repeatSubunitLength': 2,\n", + " 'sequence': 'TTTTTTTTTTTTTT',\n", + " 'type': 'ReferenceLengthExpression'},\n", + " 'type': 'Allele'}\n", + "multi_repeat_rle = models.Allele(**c)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-02-23T05:53:19.374829Z", + "start_time": "2024-02-23T05:53:19.358837Z" + } + }, + "id": "95b31f331ad36ffa", + "execution_count": 28 + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "d = {\n", + " 'location': {\n", + " 'end': 32331094,\n", + " 'start': 32331082,\n", + " 'sequenceReference': {\n", + " 'type': 'SequenceReference',\n", + " 'refgetAccession': 'SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT'\n", + " },\n", + " 'type': 'SequenceLocation'\n", + " },\n", + " 'state': {\n", + " 'sequence': 'TTTTTTTTTTTTTT',\n", + " 'type': 'LiteralSequenceExpression'\n", + " },\n", + " 'type': 'Allele'\n", + "}\n", + "multi_repeat_lse = models.Allele(**d)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-02-23T05:53:19.375508Z", + "start_time": "2024-02-23T05:53:19.362180Z" + } + }, + "id": "ab66e7da87ab1f4f", + "execution_count": 29 + }, + { + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": "Allele(id=None, label=None, description=None, extensions=None, type='Allele', digest=None, expressions=None, location=SequenceLocation(id=None, label=None, description=None, extensions=None, type='SequenceLocation', digest=None, sequenceReference=SequenceReference(id=None, label=None, description=None, extensions=None, type='SequenceReference', refgetAccession='SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT', residueAlphabet=None), start=32331082, end=32331094), state=ReferenceLengthExpression(id=None, label=None, description=None, extensions=None, type='ReferenceLengthExpression', length=14, sequence=SequenceString(root='TTTTTTTTTTTTTT'), repeatSubunitLength=2))" + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "normalize(multi_repeat_lse, data_proxy=data_proxy)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-02-23T05:53:19.376308Z", + "start_time": "2024-02-23T05:53:19.364880Z" + } }, - "id": "864cc1f21bfc357e" + "id": "5cf99af76d2ef3ff", + "execution_count": 30 } ], "metadata": { diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index a9da5760..855ca5cd 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -159,9 +159,9 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): new_allele.state.sequence = models.SequenceString(trim_alleles[1]) return new_allele elif len_trimmed_ref: - repeat_subunit_length = len_trimmed_ref + seed_length = len_trimmed_ref else: - repeat_subunit_length = len_trimmed_alt + seed_length = len_trimmed_alt # Determine bounds of ambiguity new_ival, new_alleles = _normalize( @@ -179,47 +179,71 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): ) extended_ref_seq = ref_seq[new_ival[0]: new_ival[1]] + extended_alt_seq = new_alleles[1] if not extended_ref_seq: # If the reference sequence is empty this is an unambiguous insertion. # Return a new Allele with the trimmed alternate sequence as a Literal # Sequence Expression new_allele.state = models.LiteralSequenceExpression( - sequence=models.SequenceString(new_alleles[1]) - ) - else: - # Otherwise, calculate the repeat subunit length and determine if this is - # an RLE allele. - len_extended_alt = len(new_alleles[1]) - len_extended_ref = len(extended_ref_seq) - - if len_extended_alt > len_extended_ref: - repeat_subunit_length = math.gcd(len_extended_ref, len_extended_alt) - repeat_sequence = itertools.cycle(extended_ref_seq[:repeat_subunit_length]) - ref_derived_alt = ''.join([next(repeat_sequence) for _ in range(len_extended_alt)]) - # TODO: The space and time efficiency can be improved by iterating over the new_allele[1] - # sequence and comparing to next(repeat_sequence) until there is a mismatch (LSE Allele) - # or you get through the whole new_allele[1] sequence (RLE Allele). - if ref_derived_alt != new_alleles[1]: - # if this is an ambiguous insertion of novel sequence - # create a new allele with a Literal Sequence Expression - new_allele.state = models.LiteralSequenceExpression( - sequence=models.SequenceString(new_alleles[1]) - ) - return new_allele - - # Otherwise, create the Allele as an RLE - new_allele.state = models.ReferenceLengthExpression( - length=len_extended_alt, - repeatSubunitLength=repeat_subunit_length + sequence=models.SequenceString(extended_alt_seq) ) + return new_allele - if (rle_seq_limit and len_extended_alt <= rle_seq_limit) or (rle_seq_limit is None): - new_allele.state.sequence = models.SequenceString(new_alleles[1]) + # Otherwise, calculate the repeat subunit length and determine if this is + # an RLE allele. + len_extended_alt = len(extended_alt_seq) + len_extended_ref = len(extended_ref_seq) + + if len_extended_alt > len_extended_ref: + # If this is an insertion, it may or may not be reference-derived + if seed_length > len_extended_ref: + # If the VOCA seed length is greater than the ambiguous reference space, + # a valid RLE has the repeat subunit length that is the greatest divisor of the + # seed length such that the repeat subunit reconstitutes the Allele state + for cycle_start in range(len_extended_ref): + cycle_length = len_extended_ref - cycle_start + if seed_length % cycle_length != 0: + continue + if _is_valid_cycle(cycle_start, extended_ref_seq, extended_alt_seq): + return _define_rle_allele( + new_allele, len_extended_alt, cycle_length, rle_seq_limit, extended_alt_seq) + else: + # If the VOCA seed length is less than or equal to the ambiguous reference + # space, a valid RLE repeat subunit reconstitutes the Allele state + if _is_valid_cycle(0, extended_ref_seq[:seed_length], extended_alt_seq): + return _define_rle_allele( + new_allele, len_extended_alt, seed_length, rle_seq_limit, extended_alt_seq) + else: + # If this is a deletion, it is reference-derived + return _define_rle_allele(new_allele, len_extended_alt, seed_length, rle_seq_limit, extended_alt_seq) + new_allele.state = models.LiteralSequenceExpression( + sequence=models.SequenceString(extended_alt_seq) + ) return new_allele +def _define_rle_allele(allele, length, repeat_subunit_length, rle_seq_limit, extended_alt_seq): + # Otherwise, create the Allele as an RLE + allele.state = models.ReferenceLengthExpression( + length=length, + repeatSubunitLength=repeat_subunit_length + ) + + if (rle_seq_limit and length <= rle_seq_limit) or (rle_seq_limit is None): + allele.state.sequence = models.SequenceString(extended_alt_seq) + + return allele + + +def _is_valid_cycle(template_start, template, target): + cycle = itertools.cycle(template[template_start:]) + for char in target[len(template):]: + if char != next(cycle): + return False + return True + # TODO _normalize_genotype? diff --git a/tests/cassettes/test_normalize_allele.yaml b/tests/cassettes/test_normalize_allele.yaml index 209411de..8d4a498c 100644 --- a/tests/cassettes/test_normalize_allele.yaml +++ b/tests/cassettes/test_normalize_allele.yaml @@ -9,7 +9,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV response: @@ -34,7 +34,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -50,7 +50,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV?start=26090950&end=26090951 response: @@ -64,7 +64,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -80,7 +80,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP response: @@ -105,7 +105,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -121,7 +121,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980377 response: @@ -135,7 +135,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -151,51 +151,21 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980377 - response: - body: - string: TA - headers: - Connection: - - close - Content-Length: - - '2' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Thu, 31 Aug 2023 18:13:56 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980377 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980374&end=155980375 response: body: - string: TA + string: T headers: Connection: - close Content-Length: - - '2' + - '1' Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -211,12 +181,12 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980374&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980377&end=155980378 response: body: - string: T + string: A headers: Connection: - close @@ -225,7 +195,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -241,21 +211,21 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980377&end=155980378 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980375 response: body: - string: A + string: '' headers: Connection: - close Content-Length: - - '1' + - '0' Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -271,9 +241,9 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980377&end=155980377 response: body: string: '' @@ -285,7 +255,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -301,21 +271,21 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980377&end=155980377 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980373&end=155980375 response: body: - string: '' + string: GT headers: Connection: - close Content-Length: - - '0' + - '2' Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -331,21 +301,21 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980377 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980373&end=155980374 response: body: - string: TA + string: G headers: Connection: - close Content-Length: - - '2' + - '1' Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -361,32 +331,21 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980372&end=155980373 response: body: - string: "{\n \"added\": \"2016-08-27T23:57:18Z\",\n \"aliases\": [\n \"GRCh38:X\",\n - \ \"GRCh38:chrX\",\n \"GRCh38.p1:X\",\n \"GRCh38.p1:chrX\",\n \"GRCh38.p10:X\",\n - \ \"GRCh38.p10:chrX\",\n \"GRCh38.p11:X\",\n \"GRCh38.p11:chrX\",\n - \ \"GRCh38.p12:X\",\n \"GRCh38.p12:chrX\",\n \"GRCh38.p2:X\",\n \"GRCh38.p2:chrX\",\n - \ \"GRCh38.p3:X\",\n \"GRCh38.p3:chrX\",\n \"GRCh38.p4:X\",\n \"GRCh38.p4:chrX\",\n - \ \"GRCh38.p5:X\",\n \"GRCh38.p5:chrX\",\n \"GRCh38.p6:X\",\n \"GRCh38.p6:chrX\",\n - \ \"GRCh38.p7:X\",\n \"GRCh38.p7:chrX\",\n \"GRCh38.p8:X\",\n \"GRCh38.p8:chrX\",\n - \ \"GRCh38.p9:X\",\n \"GRCh38.p9:chrX\",\n \"MD5:2b3a55ff7f58eb308420c8a9b11cac50\",\n - \ \"NCBI:NC_000023.11\",\n \"refseq:NC_000023.11\",\n \"SEGUID:Z9QbQrrPjpjXSMJesDYqC3A43lA\",\n - \ \"SHA1:67d41b42bacf8e98d748c25eb0362a0b7038de50\",\n \"VMC:GS_w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\",\n - \ \"sha512t24u:w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\",\n \"ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\"\n - \ ],\n \"alphabet\": \"ACGNRSTWY\",\n \"length\": 156040895\n}\n" + string: G headers: Connection: - close Content-Length: - - '978' + - '1' Content-Type: - - application/json + - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -402,32 +361,21 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980376 response: body: - string: "{\n \"added\": \"2016-08-27T23:57:18Z\",\n \"aliases\": [\n \"GRCh38:X\",\n - \ \"GRCh38:chrX\",\n \"GRCh38.p1:X\",\n \"GRCh38.p1:chrX\",\n \"GRCh38.p10:X\",\n - \ \"GRCh38.p10:chrX\",\n \"GRCh38.p11:X\",\n \"GRCh38.p11:chrX\",\n - \ \"GRCh38.p12:X\",\n \"GRCh38.p12:chrX\",\n \"GRCh38.p2:X\",\n \"GRCh38.p2:chrX\",\n - \ \"GRCh38.p3:X\",\n \"GRCh38.p3:chrX\",\n \"GRCh38.p4:X\",\n \"GRCh38.p4:chrX\",\n - \ \"GRCh38.p5:X\",\n \"GRCh38.p5:chrX\",\n \"GRCh38.p6:X\",\n \"GRCh38.p6:chrX\",\n - \ \"GRCh38.p7:X\",\n \"GRCh38.p7:chrX\",\n \"GRCh38.p8:X\",\n \"GRCh38.p8:chrX\",\n - \ \"GRCh38.p9:X\",\n \"GRCh38.p9:chrX\",\n \"MD5:2b3a55ff7f58eb308420c8a9b11cac50\",\n - \ \"NCBI:NC_000023.11\",\n \"refseq:NC_000023.11\",\n \"SEGUID:Z9QbQrrPjpjXSMJesDYqC3A43lA\",\n - \ \"SHA1:67d41b42bacf8e98d748c25eb0362a0b7038de50\",\n \"VMC:GS_w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\",\n - \ \"sha512t24u:w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\",\n \"ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\"\n - \ ],\n \"alphabet\": \"ACGNRSTWY\",\n \"length\": 156040895\n}\n" + string: T headers: Connection: - close Content-Length: - - '978' + - '1' Content-Type: - - application/json + - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -443,21 +391,33 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980373&end=155980375 + uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl response: body: - string: GT + string: "{\n \"added\": \"2016-08-24T08:19:02Z\",\n \"aliases\": [\n \"Ensembl:19\",\n + \ \"ensembl:19\",\n \"GRCh38:19\",\n \"GRCh38:chr19\",\n \"GRCh38.p1:19\",\n + \ \"GRCh38.p1:chr19\",\n \"GRCh38.p10:19\",\n \"GRCh38.p10:chr19\",\n + \ \"GRCh38.p11:19\",\n \"GRCh38.p11:chr19\",\n \"GRCh38.p12:19\",\n + \ \"GRCh38.p12:chr19\",\n \"GRCh38.p2:19\",\n \"GRCh38.p2:chr19\",\n + \ \"GRCh38.p3:19\",\n \"GRCh38.p3:chr19\",\n \"GRCh38.p4:19\",\n \"GRCh38.p4:chr19\",\n + \ \"GRCh38.p5:19\",\n \"GRCh38.p5:chr19\",\n \"GRCh38.p6:19\",\n \"GRCh38.p6:chr19\",\n + \ \"GRCh38.p7:19\",\n \"GRCh38.p7:chr19\",\n \"GRCh38.p8:19\",\n \"GRCh38.p8:chr19\",\n + \ \"GRCh38.p9:19\",\n \"GRCh38.p9:chr19\",\n \"MD5:b0eba2c7bb5c953d1e06a508b5e487de\",\n + \ \"NCBI:NC_000019.10\",\n \"refseq:NC_000019.10\",\n \"SEGUID:AHxM5/L8jIX08UhBBkKXkiO5rhY\",\n + \ \"SHA1:007c4ce7f2fc8c85f4f148410642979223b9ae16\",\n \"VMC:GS_IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n + \ \"sha512t24u:IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n \"ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\"\n + \ ],\n \"alphabet\": \"ACGNT\",\n \"length\": 58617616\n}\n" headers: Connection: - close Content-Length: - - '2' + - '1035' Content-Type: - - text/plain; charset=utf-8 + - application/json Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -473,9 +433,9 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289464&end=289464 response: body: string: '' @@ -487,7 +447,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -503,21 +463,21 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289463&end=289464 response: body: - string: '' + string: T headers: Connection: - close Content-Length: - - '0' + - '1' Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -533,12 +493,12 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980374&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289464&end=289465 response: body: - string: T + string: C headers: Connection: - close @@ -547,7 +507,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -563,12 +523,12 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980373&end=155980374 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289465&end=289466 response: body: - string: G + string: A headers: Connection: - close @@ -577,7 +537,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -593,9 +553,9 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980372&end=155980373 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289466&end=289467 response: body: string: G @@ -607,7 +567,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:56 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -623,12 +583,12 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980376 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289467&end=289468 response: body: - string: T + string: C headers: Connection: - close @@ -637,7 +597,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:57 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -653,21 +613,21 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980373&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289468&end=289469 response: body: - string: GT + string: A headers: Connection: - close Content-Length: - - '2' + - '1' Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:57 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -683,21 +643,21 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289469&end=289470 response: body: - string: '' + string: C headers: Connection: - close Content-Length: - - '0' + - '1' Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:57 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -713,21 +673,21 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.31.0 + - python-requests/2.28.2 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980373&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289464&end=289469 response: body: - string: GT + string: CAGCA headers: Connection: - close Content-Length: - - '2' + - '5' Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 31 Aug 2023 18:13:57 GMT + - Fri, 23 Feb 2024 05:51:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: diff --git a/tests/test_vrs_normalize.py b/tests/test_vrs_normalize.py index 5b579e13..849acbef 100644 --- a/tests/test_vrs_normalize.py +++ b/tests/test_vrs_normalize.py @@ -116,6 +116,35 @@ } } +allele_dict5 = { + 'location': { + 'end': 289464, + 'start': 289464, + 'sequenceReference': { + 'type': 'SequenceReference', + 'refgetAccession': 'SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl' + }, + 'type': 'SequenceLocation' + }, + 'state': { + 'sequence': 'CAGCAG', + 'type': 'LiteralSequenceExpression' + }, + 'type': 'Allele' +} + +allele_dict5_normalized = { + 'type': 'Allele', + 'location': {'type': 'SequenceLocation', + 'sequenceReference': {'type': 'SequenceReference', + 'refgetAccession': 'SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl'}, + 'start': 289464, + 'end': 289469}, + 'state': {'type': 'ReferenceLengthExpression', + 'length': 11, + 'sequence': 'CAGCAGCAGCA', + 'repeatSubunitLength': 3} +} @pytest.mark.vcr def test_normalize_allele(rest_dataproxy): @@ -137,3 +166,8 @@ def test_normalize_allele(rest_dataproxy): allele4 = models.Allele(**allele_dict4) allele4_after_norm = normalize(allele4, rest_dataproxy) assert allele4_after_norm == models.Allele(**allele_dict4_normalized) + + # Duplication in non-integer-repeat ambiguous region + allele5 = models.Allele(**allele_dict5) + allele5_after_norm = normalize(allele5, rest_dataproxy) + assert allele5_after_norm == models.Allele(**allele_dict5_normalized)