Skip to content

Commit

Permalink
Fix SAT failing on similar nested strucutres with different order. (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmytro authored Nov 1, 2021
1 parent a80d388 commit ebf35eb
Show file tree
Hide file tree
Showing 7 changed files with 175 additions and 98 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 0.1.25
Fix incorrect nested strucutres compare.

## 0.1.24
Improve message about errors in the stream's schema: https://github.com/airbytehq/airbyte/pull/6934

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ COPY setup.py ./
COPY pytest.ini ./
RUN pip install .

LABEL io.airbyte.version=0.1.24
LABEL io.airbyte.version=0.1.25
LABEL io.airbyte.name=airbyte/source-acceptance-test

ENTRYPOINT ["python", "-m", "pytest", "-p", "source_acceptance_test.plugin"]
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from jsonschema import validate
from source_acceptance_test.base import BaseTest
from source_acceptance_test.config import BasicReadTestConfig, ConnectionTestConfig
from source_acceptance_test.utils import ConnectorRunner, SecretDict, filter_output, serialize, verify_records_schema
from source_acceptance_test.utils import ConnectorRunner, SecretDict, filter_output, make_hashable, verify_records_schema
from source_acceptance_test.utils.json_schema_helper import JsonSchemaHelper, get_expected_schema_structure, get_object_structure


Expand Down Expand Up @@ -308,8 +308,8 @@ def compare_records(
r2 = TestBasicRead.remove_extra_fields(r2, r1)
assert r1 == r2, f"Stream {stream_name}: Mismatch of record order or values"
else:
expected = set(map(serialize, expected))
actual = set(map(serialize, actual))
expected = set(map(make_hashable, expected))
actual = set(map(make_hashable, actual))
missing_expected = set(expected) - set(actual)

if missing_expected:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest
from airbyte_cdk.models import Type
from source_acceptance_test.base import BaseTest
from source_acceptance_test.utils import ConnectorRunner, full_refresh_only_catalog, serialize
from source_acceptance_test.utils import ConnectorRunner, full_refresh_only_catalog, make_hashable


@pytest.mark.default_timeout(20 * 60)
Expand All @@ -19,7 +19,7 @@ def test_sequential_reads(self, connector_config, configured_catalog, docker_run
output = docker_runner.call_read(connector_config, configured_catalog)
records_2 = [message.record.data for message in output if message.type == Type.RECORD]

output_diff = set(map(serialize, records_1)) - set(map(serialize, records_2))
output_diff = set(map(make_hashable, records_1)).symmetric_difference(set(map(make_hashable, records_2)))
if output_diff:
msg = "The two sequential reads should produce either equal set of records or one of them is a strict subset of the other"
detailed_logger.info(msg)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#
from .asserts import verify_records_schema
from .common import SecretDict, filter_output, full_refresh_only_catalog, incremental_only_catalog, load_config
from .compare import diff_dicts, serialize
from .compare import diff_dicts, make_hashable
from .connector_runner import ConnectorRunner
from .json_schema_helper import JsonSchemaHelper

Expand All @@ -13,6 +16,6 @@
"SecretDict",
"ConnectorRunner",
"diff_dicts",
"serialize",
"make_hashable",
"verify_records_schema",
]
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@


import functools
import json
from typing import List, Mapping, Optional

import icdiff
Expand Down Expand Up @@ -52,14 +51,17 @@ def diff_dicts(left, right, use_markup) -> Optional[List[str]]:


@functools.total_ordering
class DictWithHash(dict):

_hash: str = None
class HashMixin:
@staticmethod
def get_hash(obj):
if isinstance(obj, Mapping):
return hash(str({k: (HashMixin.get_hash(v)) for k, v in sorted(obj.items())}))
if isinstance(obj, List):
return hash(str(sorted([HashMixin.get_hash(v) for v in obj])))
return hash(obj)

def __hash__(self):
if not self._hash:
self._hash = hash(json.dumps({k: serialize(v) for k, v in self.items()}, sort_keys=True))
return self._hash
return HashMixin.get_hash(self)

def __lt__(self, other):
return hash(self) < hash(other)
Expand All @@ -68,10 +70,17 @@ def __eq__(self, other):
return hash(self) == hash(other)


def serialize(value) -> str:
"""Simplify comparison of nested dicts/lists"""
if isinstance(value, Mapping):
return DictWithHash(value)
if isinstance(value, List):
return sorted([serialize(v) for v in value])
return str(value)
class DictWithHashMixin(HashMixin, dict):
pass


class ListWithHashMixin(HashMixin, list):
pass


def make_hashable(obj):
if isinstance(obj, Mapping):
return DictWithHashMixin(obj)
if isinstance(obj, List):
return ListWithHashMixin(obj)
return obj
Original file line number Diff line number Diff line change
Expand Up @@ -3,86 +3,148 @@
#

import pytest
from source_acceptance_test.utils.compare import serialize
from source_acceptance_test.utils.compare import make_hashable


@pytest.fixture(name="not_sorted_data")
def not_sorted_data_fixture():
return [
{
"date_created": "0001-01-01T00:00:00",
"date_updated": "0001-01-01T00:00:00",
"editable": False,
"id": "superuser",
"name": "Super User",
"organization_id": "orga_ya3w9oMjeLtWe7zFGZr63Dz8ruBbjybG0EIUdUXaESi",
"permissions": [
"bulk_edit",
"delete_own_opportunities",
"export",
"manage_group_numbers",
"manage_email_sequences",
"delete_leads",
"call_coach_listen",
"call_coach_barge",
"manage_others_tasks",
"manage_others_activities",
"delete_own_tasks",
"manage_customizations",
"manage_team_smart_views",
"bulk_delete",
"manage_team_email_templates",
"bulk_email",
"merge_leads",
"calling",
"bulk_sequence_subscriptions",
"bulk_import",
"delete_own_activities",
"manage_others_opportunities",
],
}
]
def not_sorted_data():
return {
"date_created": "0001-01-01T00:00:00",
"date_updated": "0001-01-01T00:00:00",
"editable": False,
"id": "superuser",
"name": "Super User",
"organization_id": "orga_ya3w9oMjeLtWe7zFGZr63Dz8ruBbjybG0EIUdUXaESi",
"permissions": [
"bulk_edit",
"delete_own_opportunities",
"export",
"manage_group_numbers",
"manage_email_sequences",
"delete_leads",
"call_coach_listen",
"call_coach_barge",
"manage_others_tasks",
"manage_others_activities",
"delete_own_tasks",
"manage_customizations",
"manage_team_smart_views",
"bulk_delete",
"manage_team_email_templates",
"bulk_email",
"merge_leads",
"calling",
"bulk_sequence_subscriptions",
"bulk_import",
"delete_own_activities",
"manage_others_opportunities",
],
}


@pytest.fixture(name="sorted_data")
def sorted_data_fixture():
return [
{
"date_created": "0001-01-01T00:00:00",
"date_updated": "0001-01-01T00:00:00",
"editable": False,
"id": "superuser",
"name": "Super User",
"organization_id": "orga_ya3w9oMjeLtWe7zFGZr63Dz8ruBbjybG0EIUdUXaESi",
"permissions": [
"bulk_delete",
"bulk_edit",
"bulk_email",
"bulk_import",
"bulk_sequence_subscriptions",
"call_coach_barge",
"call_coach_listen",
"calling",
"delete_leads",
"delete_own_activities",
"delete_own_opportunities",
"delete_own_tasks",
"export",
"manage_customizations",
"manage_email_sequences",
"manage_group_numbers",
"manage_others_activities",
"manage_others_opportunities",
"manage_others_tasks",
"manage_team_email_templates",
"manage_team_smart_views",
"merge_leads",
],
}
]
def sorted_data():
return {
"date_created": "0001-01-01T00:00:00",
"date_updated": "0001-01-01T00:00:00",
"editable": False,
"id": "superuser",
"name": "Super User",
"organization_id": "orga_ya3w9oMjeLtWe7zFGZr63Dz8ruBbjybG0EIUdUXaESi",
"permissions": [
"bulk_delete",
"bulk_edit",
"bulk_email",
"bulk_import",
"bulk_sequence_subscriptions",
"call_coach_barge",
"call_coach_listen",
"calling",
"delete_leads",
"delete_own_activities",
"delete_own_opportunities",
"delete_own_tasks",
"export",
"manage_customizations",
"manage_email_sequences",
"manage_group_numbers",
"manage_others_activities",
"manage_others_opportunities",
"manage_others_tasks",
"manage_team_email_templates",
"manage_team_smart_views",
"merge_leads",
],
}


def test_compare_two_records(not_sorted_data, sorted_data):
@pytest.mark.parametrize(
"obj1,obj2,is_same",
[
(sorted_data(), not_sorted_data(), True),
(
{
"organization": {
"features": [
"issue-percent-filters",
"performance-tag-page",
]
}
},
{
"organization": {
"features": [
"performance-tag-page",
"issue-percent-filters",
]
}
},
True,
),
(
{
"organization": {
"features": [
"issue-percent-filters",
"performance-tag-page",
]
}
},
{
"organization": {
"features": [
"performance-tag-pag",
"issue-percent-filters",
]
}
},
False,
),
(
{
"organization": {
"features": [
"issue-percent-filters",
"performance-tag-page",
]
}
},
{
"organization": {
"features": [
"performance-tag-page",
]
}
},
False,
),
({"a": 1, "b": 2}, {"b": 2, "a": 1}, True),
({"a": 1, "b": 2, "c": {"d": [1, 2]}}, {"b": 2, "a": 1, "c": {"d": [2, 1]}}, True),
({"a": 1, "b": 2, "c": {"d": [1, 2]}}, {"b": 2, "a": 1, "c": {"d": [3, 4]}}, False),
],
)
def test_compare_two_records_nested_with_different_orders(obj1, obj2, is_same):
"""Test that compare two records with equals, not sorted data."""
output_diff = set(map(serialize, sorted_data)) - set(map(serialize, not_sorted_data))
assert not output_diff
output_diff = set(map(make_hashable, [obj1])).symmetric_difference(set(map(make_hashable, [obj2])))
if is_same:
assert not output_diff, f"{obj1} should be equal to {obj2}"
else:
assert output_diff, f"{obj1} shouldnt be equal to {obj2}"

0 comments on commit ebf35eb

Please sign in to comment.