From f7c11df0008a90bc13941eeb7e6a96d2a866b08d Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 15 Nov 2022 10:27:06 +0100 Subject: [PATCH] add test to check id_hash_keys is not ignored --- test/others/test_schema.py | 11 +++++++++++ test/others/test_utils.py | 1 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/test/others/test_schema.py b/test/others/test_schema.py index 49e831ba1d..df407c91fd 100644 --- a/test/others/test_schema.py +++ b/test/others/test_schema.py @@ -484,3 +484,14 @@ def test_assert_span_vs_span(): assert not Span(0, 1) in Span(5, 15) assert not Span(0, 10) in Span(5, 15) assert not Span(10, 20) in Span(5, 15) + + +def test_id_hash_keys_not_ignored(): + # Test that two documents with the same content but different metadata get assigned different ids if and only if + # id_hash_keys is set to 'meta' + doc1 = Document(content="hello world", meta={"doc_id": "1"}, id_hash_keys=["meta"]) + doc2 = Document(content="hello world", meta={"doc_id": "2"}, id_hash_keys=["meta"]) + assert doc1.id != doc2.id + doc3 = Document(content="hello world", meta={"doc_id": "3"}) + doc4 = Document(content="hello world", meta={"doc_id": "4"}) + assert doc3.id == doc4.id diff --git a/test/others/test_utils.py b/test/others/test_utils.py index e3b50d06ae..f1d523214d 100644 --- a/test/others/test_utils.py +++ b/test/others/test_utils.py @@ -4,7 +4,6 @@ import numpy as np import pytest import pandas as pd -from pathlib import Path import responses from responses import matchers