From 79c652e3ba27dd1258a86e8be30af197e98d0f14 Mon Sep 17 00:00:00 2001 From: Dan Mosora <30501696+dmosorast@users.noreply.github.com> Date: Tue, 2 Nov 2021 10:08:18 -0400 Subject: [PATCH] Widen Deserialize Decimal precision (#33) * Override decimal parsing so we can control the precision, set precision to 100 to match singer.decimal * Pylint fixes and disablements * Pylint changes to tests * Version 1.1.2 and changelog --- CHANGELOG.md | 3 +++ Makefile | 4 ++-- setup.py | 2 +- tap_dynamodb/deserialize.py | 14 ++++++++++++++ tap_dynamodb/sync_strategies/log_based.py | 4 ++-- tests/base.py | 6 +++--- .../test_dynamodb_full_table_interruptible_sync.py | 6 +++--- tests/test_dynamodb_full_table_sync.py | 6 +++--- tests/test_dynamodb_log_based.py | 6 +++--- tests/test_dynamodb_log_based_interruptible.py | 6 +++--- tests/test_dynamodb_log_based_projections.py | 6 +++--- tests/test_dynamodb_projections.py | 6 +++--- 12 files changed, 43 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2142050..243912d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 1.1.2 + * Fix issue where decimals can throw a Rounded signal if they are too wide [#33](https://github.com/singer-io/tap-dynamodb/pull/33) + ## 1.1.1 * Add more error checking to ensure a projection provided is also not empty [#26](https://github.com/singer-io/tap-dynamodb/pull/26) diff --git a/Makefile b/Makefile index 1bbec70..791b466 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ .DEFAULT_GOAL := lint lint-tests: - pylint tests -d broad-except,chained-comparison,empty-docstring,fixme,invalid-name,line-too-long,missing-class-docstring,missing-function-docstring,missing-module-docstring,no-else-raise,no-else-return,too-few-public-methods,too-many-arguments,too-many-branches,too-many-lines,too-many-locals,ungrouped-imports,wrong-spelling-in-comment,wrong-spelling-in-docstring,duplicate-code,no-name-in-module,import-error + pylint tests -d broad-except,chained-comparison,empty-docstring,fixme,invalid-name,line-too-long,missing-class-docstring,missing-function-docstring,missing-module-docstring,no-else-raise,no-else-return,too-few-public-methods,too-many-arguments,too-many-branches,too-many-lines,too-many-locals,ungrouped-imports,wrong-spelling-in-comment,wrong-spelling-in-docstring,duplicate-code,no-name-in-module,import-error,consider-using-f-string lint-code: - pylint tap_dynamodb -d broad-except,chained-comparison,empty-docstring,fixme,invalid-name,line-too-long,missing-class-docstring,missing-function-docstring,missing-module-docstring,no-else-raise,no-else-return,too-few-public-methods,too-many-arguments,too-many-branches,too-many-lines,too-many-locals,ungrouped-imports,wrong-spelling-in-comment,wrong-spelling-in-docstring,raise-missing-from + pylint tap_dynamodb -d broad-except,chained-comparison,empty-docstring,fixme,invalid-name,line-too-long,missing-class-docstring,missing-function-docstring,missing-module-docstring,no-else-raise,no-else-return,too-few-public-methods,too-many-arguments,too-many-branches,too-many-lines,too-many-locals,ungrouped-imports,wrong-spelling-in-comment,wrong-spelling-in-docstring,raise-missing-from,consider-using-f-string lint: lint-code lint-tests diff --git a/setup.py b/setup.py index eb02cf1..a6a2e2e 100755 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name="tap-dynamodb", - version="1.1.1", + version="1.1.2", description="Singer.io tap for extracting data", author="Stitch", url="http://singer.io", diff --git a/tap_dynamodb/deserialize.py b/tap_dynamodb/deserialize.py index f613ab5..8b95cd1 100644 --- a/tap_dynamodb/deserialize.py +++ b/tap_dynamodb/deserialize.py @@ -1,6 +1,14 @@ import base64 +import decimal from boto3.dynamodb.types import TypeDeserializer +# Custom context to control how decimals are deserialized +# Precision = 100 because that's the max for `singer.decimal` elsewhere, still +# trapping Rounding errors because that is a true error. +trapped_signals = [decimal.Clamped, decimal.Overflow, decimal.Inexact, decimal.Rounded, decimal.Underflow] +SINGER_CONTEXT = decimal.Context(Emin=-128, Emax=126, prec=100, + traps=trapped_signals) + class Deserializer(TypeDeserializer): ''' This class inherits from boto3.dynamodb.types.TypeDeserializer @@ -20,6 +28,12 @@ def _deserialize_b(self, value): ''' return base64.b64encode(value).decode('utf-8') + def _deserialize_n(self, value): + ''' + Deserializes sets as lists to allow JSON encoding + ''' + return SINGER_CONTEXT.create_decimal(value) + def _deserialize_ns(self, value): ''' Deserializes sets as lists to allow JSON encoding diff --git a/tap_dynamodb/sync_strategies/log_based.py b/tap_dynamodb/sync_strategies/log_based.py index 9d214e5..4ac37e0 100644 --- a/tap_dynamodb/sync_strategies/log_based.py +++ b/tap_dynamodb/sync_strategies/log_based.py @@ -139,7 +139,7 @@ def sync(config, state, stream): # fully synced seq_number_bookmarks = singer.get_bookmark(state, table_name, 'shard_seq_numbers') if not seq_number_bookmarks: - seq_number_bookmarks = dict() + seq_number_bookmarks = {} # Get the list of closed shards which we have fully synced. These # are removed after performing a sync and not seeing the shardId @@ -147,7 +147,7 @@ def sync(config, state, stream): # killed by DynamoDB and will not be returned anymore finished_shard_bookmarks = singer.get_bookmark(state, table_name, 'finished_shards') if not finished_shard_bookmarks: - finished_shard_bookmarks = list() + finished_shard_bookmarks = [] # The list of shardIds we found this sync. Is used to determine which # finished_shard_bookmarks to kill diff --git a/tests/base.py b/tests/base.py index aed780c..a0ea07c 100644 --- a/tests/base.py +++ b/tests/base.py @@ -7,9 +7,9 @@ import singer -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner +from tap_tester import connections +from tap_tester import menagerie +from tap_tester import runner ALL_TABLE_NAMES_TO_CLEAR = frozenset({ 'simple_table_1', diff --git a/tests/test_dynamodb_full_table_interruptible_sync.py b/tests/test_dynamodb_full_table_interruptible_sync.py index 3226ed5..24902a5 100644 --- a/tests/test_dynamodb_full_table_interruptible_sync.py +++ b/tests/test_dynamodb_full_table_interruptible_sync.py @@ -4,9 +4,9 @@ from boto3.dynamodb.types import TypeSerializer from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner +from tap_tester import connections +from tap_tester import menagerie +from tap_tester import runner from base import TestDynamoDBBase diff --git a/tests/test_dynamodb_full_table_sync.py b/tests/test_dynamodb_full_table_sync.py index dcfcbcc..77f055e 100644 --- a/tests/test_dynamodb_full_table_sync.py +++ b/tests/test_dynamodb_full_table_sync.py @@ -5,9 +5,9 @@ from boto3.dynamodb.types import TypeSerializer from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner +from tap_tester import connections +from tap_tester import menagerie +from tap_tester import runner from base import TestDynamoDBBase diff --git a/tests/test_dynamodb_log_based.py b/tests/test_dynamodb_log_based.py index e627f01..79d14ea 100644 --- a/tests/test_dynamodb_log_based.py +++ b/tests/test_dynamodb_log_based.py @@ -3,9 +3,9 @@ from boto3.dynamodb.types import TypeSerializer from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner +from tap_tester import connections +from tap_tester import menagerie +from tap_tester import runner from base import TestDynamoDBBase diff --git a/tests/test_dynamodb_log_based_interruptible.py b/tests/test_dynamodb_log_based_interruptible.py index c3faed5..eaf39f6 100644 --- a/tests/test_dynamodb_log_based_interruptible.py +++ b/tests/test_dynamodb_log_based_interruptible.py @@ -3,9 +3,9 @@ from boto3.dynamodb.types import TypeSerializer from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner +from tap_tester import connections +from tap_tester import menagerie +from tap_tester import runner from base import TestDynamoDBBase diff --git a/tests/test_dynamodb_log_based_projections.py b/tests/test_dynamodb_log_based_projections.py index 477ec40..7c79e69 100644 --- a/tests/test_dynamodb_log_based_projections.py +++ b/tests/test_dynamodb_log_based_projections.py @@ -4,9 +4,9 @@ from boto3.dynamodb.types import TypeSerializer from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner +from tap_tester import connections +from tap_tester import menagerie +from tap_tester import runner from base import TestDynamoDBBase diff --git a/tests/test_dynamodb_projections.py b/tests/test_dynamodb_projections.py index c44248d..0f8c416 100644 --- a/tests/test_dynamodb_projections.py +++ b/tests/test_dynamodb_projections.py @@ -4,9 +4,9 @@ from boto3.dynamodb.types import TypeSerializer from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner +from tap_tester import connections +from tap_tester import menagerie +from tap_tester import runner from base import TestDynamoDBBase LOGGER = singer.get_logger()