airbytehq · avida · Nov 1, 2021 · Oct 29, 2021 · Nov 1, 2021 · Nov 1, 2021
diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py
@@ -15,7 +15,7 @@
 from jsonschema import validate
 from source_acceptance_test.base import BaseTest
 from source_acceptance_test.config import BasicReadTestConfig, ConnectionTestConfig
-from source_acceptance_test.utils import ConnectorRunner, SecretDict, filter_output, serialize, verify_records_schema
+from source_acceptance_test.utils import ConnectorRunner, SecretDict, filter_output, make_hashible, verify_records_schema
 from source_acceptance_test.utils.json_schema_helper import JsonSchemaHelper, get_expected_schema_structure, get_object_structure
 
 
@@ -308,8 +308,8 @@ def compare_records(
                     r2 = TestBasicRead.remove_extra_fields(r2, r1)
                 assert r1 == r2, f"Stream {stream_name}: Mismatch of record order or values"
         else:
-            expected = set(map(serialize, expected))
-            actual = set(map(serialize, actual))
+            expected = set(map(make_hashible, expected))
+            actual = set(map(make_hashible, actual))
             missing_expected = set(expected) - set(actual)
 
             if missing_expected:

diff --git a/...tegrations/bases/source-acceptance-test/source_acceptance_test/tests/test_full_refresh.py b/...tegrations/bases/source-acceptance-test/source_acceptance_test/tests/test_full_refresh.py
@@ -6,7 +6,7 @@
 import pytest
 from airbyte_cdk.models import Type
 from source_acceptance_test.base import BaseTest
-from source_acceptance_test.utils import ConnectorRunner, full_refresh_only_catalog, serialize
+from source_acceptance_test.utils import ConnectorRunner, full_refresh_only_catalog, make_hashible
 
 
 @pytest.mark.default_timeout(20 * 60)
@@ -19,7 +19,7 @@ def test_sequential_reads(self, connector_config, configured_catalog, docker_run
         output = docker_runner.call_read(connector_config, configured_catalog)
         records_2 = [message.record.data for message in output if message.type == Type.RECORD]
 
-        output_diff = set(map(serialize, records_1)) - set(map(serialize, records_2))
+        output_diff = set(map(make_hashible, records_1)).symmetric_difference(set(map(make_hashible, records_2)))
         if output_diff:
             msg = "The two sequential reads should produce either equal set of records or one of them is a strict subset of the other"
             detailed_logger.info(msg)

diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/__init__.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/__init__.py
@@ -1,6 +1,9 @@
+#
+# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
+#
 from .asserts import verify_records_schema
 from .common import SecretDict, filter_output, full_refresh_only_catalog, incremental_only_catalog, load_config
-from .compare import diff_dicts, serialize
+from .compare import diff_dicts, make_hashible
 from .connector_runner import ConnectorRunner
 from .json_schema_helper import JsonSchemaHelper
 
@@ -13,6 +16,6 @@
     "SecretDict",
     "ConnectorRunner",
     "diff_dicts",
-    "serialize",
+    "make_hashible",
     "verify_records_schema",
 ]
diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/compare.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/compare.py
@@ -4,7 +4,6 @@
 
 
 import functools
-import json
 from typing import List, Mapping, Optional
 
 import icdiff
@@ -52,13 +51,20 @@ def diff_dicts(left, right, use_markup) -> Optional[List[str]]:
 
 
 @functools.total_ordering
-class DictWithHash(dict):
+class HashMixin:
+    _hash = None
 
-    _hash: str = None
+    @staticmethod
+    def get_hash(obj):
+        if isinstance(obj, Mapping):
+            return hash(str({k: (HashMixin.get_hash(v)) for k, v in sorted(obj.items())}))
+        if isinstance(obj, List):
+            return hash(str(sorted([HashMixin.get_hash(v) for v in obj])))
+        return hash(obj)
 
     def __hash__(self):
         if not self._hash:
-            self._hash = hash(json.dumps({k: serialize(v) for k, v in self.items()}, sort_keys=True))
+            self._hash = HashMixin.get_hash(self)
         return self._hash
 
     def __lt__(self, other):
@@ -68,10 +74,17 @@ def __eq__(self, other):
         return hash(self) == hash(other)
 
 
-def serialize(value) -> str:
-    """Simplify comparison of nested dicts/lists"""
-    if isinstance(value, Mapping):
-        return DictWithHash(value)
-    if isinstance(value, List):
-        return sorted([serialize(v) for v in value])
-    return str(value)
+class DictWithHashMixin(HashMixin, dict):
+    pass
+
+
+class ListWithHashMixin(HashMixin, list):
+    pass
+
+
+def make_hashible(obj):
+    if isinstance(obj, Mapping):
+        return DictWithHashMixin(obj)
+    if isinstance(obj, List):
+        return ListWithHashMixin(obj)
+    return obj
diff --git a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py
@@ -3,86 +3,148 @@
 #
 
 import pytest
-from source_acceptance_test.utils.compare import serialize
+from source_acceptance_test.utils.compare import make_hashible
 
 
-@pytest.fixture(name="not_sorted_data")
-def not_sorted_data_fixture():
-    return [
-        {
-            "date_created": "0001-01-01T00:00:00",
-            "date_updated": "0001-01-01T00:00:00",
-            "editable": False,
-            "id": "superuser",
-            "name": "Super User",
-            "organization_id": "orga_ya3w9oMjeLtWe7zFGZr63Dz8ruBbjybG0EIUdUXaESi",
-            "permissions": [
-                "bulk_edit",
-                "delete_own_opportunities",
-                "export",
-                "manage_group_numbers",
-                "manage_email_sequences",
-                "delete_leads",
-                "call_coach_listen",
-                "call_coach_barge",
-                "manage_others_tasks",
-                "manage_others_activities",
-                "delete_own_tasks",
-                "manage_customizations",
-                "manage_team_smart_views",
-                "bulk_delete",
-                "manage_team_email_templates",
-                "bulk_email",
-                "merge_leads",
-                "calling",
-                "bulk_sequence_subscriptions",
-                "bulk_import",
-                "delete_own_activities",
-                "manage_others_opportunities",
-            ],
-        }
-    ]
+def not_sorted_data():
+    return {
+        "date_created": "0001-01-01T00:00:00",
+        "date_updated": "0001-01-01T00:00:00",
+        "editable": False,
+        "id": "superuser",
+        "name": "Super User",
+        "organization_id": "orga_ya3w9oMjeLtWe7zFGZr63Dz8ruBbjybG0EIUdUXaESi",
+        "permissions": [
+            "bulk_edit",
+            "delete_own_opportunities",
+            "export",
+            "manage_group_numbers",
+            "manage_email_sequences",
+            "delete_leads",
+            "call_coach_listen",
+            "call_coach_barge",
+            "manage_others_tasks",
+            "manage_others_activities",
+            "delete_own_tasks",
+            "manage_customizations",
+            "manage_team_smart_views",
+            "bulk_delete",
+            "manage_team_email_templates",
+            "bulk_email",
+            "merge_leads",
+            "calling",
+            "bulk_sequence_subscriptions",
+            "bulk_import",
+            "delete_own_activities",
+            "manage_others_opportunities",
+        ],
+    }
 
 
-@pytest.fixture(name="sorted_data")
-def sorted_data_fixture():
-    return [
-        {
-            "date_created": "0001-01-01T00:00:00",
-            "date_updated": "0001-01-01T00:00:00",
-            "editable": False,
-            "id": "superuser",
-            "name": "Super User",
-            "organization_id": "orga_ya3w9oMjeLtWe7zFGZr63Dz8ruBbjybG0EIUdUXaESi",
-            "permissions": [
-                "bulk_delete",
-                "bulk_edit",
-                "bulk_email",
-                "bulk_import",
-                "bulk_sequence_subscriptions",
-                "call_coach_barge",
-                "call_coach_listen",
-                "calling",
-                "delete_leads",
-                "delete_own_activities",
-                "delete_own_opportunities",
-                "delete_own_tasks",
-                "export",
-                "manage_customizations",
-                "manage_email_sequences",
-                "manage_group_numbers",
-                "manage_others_activities",
-                "manage_others_opportunities",
-                "manage_others_tasks",
-                "manage_team_email_templates",
-                "manage_team_smart_views",
-                "merge_leads",
-            ],
-        }
-    ]
+def sorted_data():
+    return {
+        "date_created": "0001-01-01T00:00:00",
+        "date_updated": "0001-01-01T00:00:00",
+        "editable": False,
+        "id": "superuser",
+        "name": "Super User",
+        "organization_id": "orga_ya3w9oMjeLtWe7zFGZr63Dz8ruBbjybG0EIUdUXaESi",
+        "permissions": [
+            "bulk_delete",
+            "bulk_edit",
+            "bulk_email",
+            "bulk_import",
+            "bulk_sequence_subscriptions",
+            "call_coach_barge",
+            "call_coach_listen",
+            "calling",
+            "delete_leads",
+            "delete_own_activities",
+            "delete_own_opportunities",
+            "delete_own_tasks",
+            "export",
+            "manage_customizations",
+            "manage_email_sequences",
+            "manage_group_numbers",
+            "manage_others_activities",
+            "manage_others_opportunities",
+            "manage_others_tasks",
+            "manage_team_email_templates",
+            "manage_team_smart_views",
+            "merge_leads",
+        ],
+    }
 
 
-def test_compare_two_records(not_sorted_data, sorted_data):
+@pytest.mark.parametrize(
+    "obj1,obj2,is_same",
+    [
+        (sorted_data(), not_sorted_data(), True),
+        (
+            {
+                "organization": {
+                    "features": [
+                        "issue-percent-filters",
+                        "performance-tag-page",
+                    ]
+                }
+            },
+            {
+                "organization": {
+                    "features": [
+                        "performance-tag-page",
+                        "issue-percent-filters",
+                    ]
+                }
+            },
+            True,
+        ),
+        (
+            {
+                "organization": {
+                    "features": [
+                        "issue-percent-filters",
+                        "performance-tag-page",
+                    ]
+                }
+            },
+            {
+                "organization": {
+                    "features": [
+                        "performance-tag-pag",
+                        "issue-percent-filters",
+                    ]
+                }
+            },
+            False,
+        ),
+        (
+            {
+                "organization": {
+                    "features": [
+                        "issue-percent-filters",
+                        "performance-tag-page",
+                    ]
+                }
+            },
+            {
+                "organization": {
+                    "features": [
+                        "performance-tag-page",
+                    ]
+                }
+            },
+            False,
+        ),
+        ({"a": 1, "b": 2}, {"b": 2, "a": 1}, True),
+        ({"a": 1, "b": 2, "c": {"d": [1, 2]}}, {"b": 2, "a": 1, "c": {"d": [2, 1]}}, True),
+        ({"a": 1, "b": 2, "c": {"d": [1, 2]}}, {"b": 2, "a": 1, "c": {"d": [3, 4]}}, False),
+    ],
+)
+def test_compare_two_records_nested_with_different_orders(obj1, obj2, is_same):
     """Test that compare two records with equals, not sorted data."""
-    output_diff = set(map(serialize, sorted_data)) - set(map(serialize, not_sorted_data))
-    assert not output_diff
+    output_diff = set(map(make_hashible, [obj1])).symmetric_difference(set(map(make_hashible, [obj2])))
+    if is_same:
+        assert not output_diff, f"{obj1} should be equal to {obj2}"
+    else:
+        assert output_diff, f"{obj1} shouldnt be equal to {obj2}"