From b8cddd86e2c1ead9a6603090ce408055fa854ae5 Mon Sep 17 00:00:00 2001 From: Guido Imperiale Date: Wed, 11 Sep 2019 09:40:46 +0100 Subject: [PATCH 1/3] Partially revert "Many speedups in UnitsContainer and ParserHelper" --- pint/util.py | 66 +++++++++++++++++++++------------------------------- 1 file changed, 27 insertions(+), 39 deletions(-) diff --git a/pint/util.py b/pint/util.py index 587517a68..f5282a300 100644 --- a/pint/util.py +++ b/pint/util.py @@ -286,20 +286,16 @@ def remove(self, keys): """ Create a new UnitsContainer purged from given keys. """ - new = self.copy() - for k in keys: - new._d.pop(k, None) - new._hash = None - return new + d = udict(self._d) + return UnitsContainer(((key, d[key]) for key in d if key not in keys)) def rename(self, oldkey, newkey): """ Create a new UnitsContainer in which an entry has been renamed. """ - new = self.copy() - new._d[newkey] = new._d.pop(oldkey) - new._hash = None - return new + d = udict(self._d) + d[newkey] = d.pop(oldkey) + return UnitsContainer(d) def __iter__(self): return iter(self._d) @@ -327,9 +323,8 @@ def __setstate__(self, state): def __eq__(self, other): if isinstance(other, UnitsContainer): - # Not the same as hash(self); see ParserHelper.__hash__ and __eq__ - return UnitsContainer.__hash__(self) == UnitsContainer.__hash__(other) - if isinstance(other, string_types): + other = other._d + elif isinstance(other, string_types): other = ParserHelper.from_string(other) other = other._d @@ -350,25 +345,20 @@ def format_babel(self, spec, **kwspec): return format_unit(self, spec, **kwspec) def __copy__(self): - # Skip expensive health checks performed by __init__ - out = object.__new__(self.__class__) - out._d = self._d.copy() - out._hash = self._hash - return out + return UnitsContainer(self._d) def __mul__(self, other): + d = udict(self._d) if not isinstance(other, self.__class__): err = 'Cannot multiply UnitsContainer by {}' raise TypeError(err.format(type(other))) - - new = self.copy() for key, value in other.items(): - new._d[key] += value - if new._d[key] == 0: - del new._d[key] + d[key] += value + keys = [key for key, value in d.items() if value == 0] + for key in keys: + del d[key] - new._hash = None - return new + return UnitsContainer(d) __rmul__ = __mul__ @@ -376,26 +366,26 @@ def __pow__(self, other): if not isinstance(other, NUMERIC_TYPES): err = 'Cannot power UnitsContainer by {}' raise TypeError(err.format(type(other))) - - new = self.copy() - for key, value in new._d.items(): - new._d[key] *= other - new._hash = None - return new + d = udict(self._d) + for key, value in d.items(): + d[key] *= other + return UnitsContainer(d) def __truediv__(self, other): if not isinstance(other, self.__class__): err = 'Cannot divide UnitsContainer by {}' raise TypeError(err.format(type(other))) - new = self.copy() + d = udict(self._d) + for key, value in other.items(): - new._d[key] -= value - if new._d[key] == 0: - del new._d[key] + d[key] -= value - new._hash = None - return new + keys = [key for key, value in d.items() if value == 0] + for key in keys: + del d[key] + + return UnitsContainer(d) def __rtruediv__(self, other): if not isinstance(other, self.__class__) and other != 1: @@ -483,9 +473,7 @@ def _from_string(cls, input_string): for key, value in ret.items())) def __copy__(self): - new = super(ParserHelper, self).__copy__() - new.scale = self.scale - return new + return ParserHelper(scale=self.scale, **self) def copy(self): return self.__copy__() From 7cd8e1ce906e0ea02eca9ff54559f8f1caa4baa1 Mon Sep 17 00:00:00 2001 From: Guido Imperiale Date: Wed, 11 Sep 2019 10:55:17 +0100 Subject: [PATCH 2/3] Identical hashes do not guarantee equality --- pint/testsuite/test_issues.py | 20 +++++++++- pint/util.py | 74 ++++++++++++++++++++++------------- 2 files changed, 66 insertions(+), 28 deletions(-) diff --git a/pint/testsuite/test_issues.py b/pint/testsuite/test_issues.py index 6ca703586..275ed338b 100644 --- a/pint/testsuite/test_issues.py +++ b/pint/testsuite/test_issues.py @@ -11,7 +11,7 @@ from pint.unit import UnitsContainer from pint.util import ParserHelper -from pint.compat import np, long_type +from pint.compat import np from pint.errors import UndefinedUnitError, DimensionalityError from pint.testsuite import QuantityTestCase, helpers @@ -699,3 +699,21 @@ def test_issue856b(self): ureg2.define('test123 = 456 kg') assert ureg1('1 test123').to('kg').magnitude == 123 assert ureg2('1 test123').to('kg').magnitude == 456 + + def test_issue876(self): + # Same hash must not imply equality. + + # As an implementation detail of CPython, hash(-1) == hash(-2) This test is + # useless in potential alternative Python implementations where hash(-1) != + # hash(-2); one would need to find hash collisions specific for each + # implementation + + a = UnitsContainer({"[mass]": -1}) + b = UnitsContainer({"[mass]": -2}) + c = UnitsContainer({"[mass]": -3}) + + # Guarantee working on alternative Python implementations + assert (hash(-1) == hash(-2)) == (hash(a) == hash(b)) + assert (hash(-1) == hash(-3)) == (hash(a) == hash(c)) + assert a != b + assert a != c diff --git a/pint/util.py b/pint/util.py index f5282a300..d8ebcace0 100644 --- a/pint/util.py +++ b/pint/util.py @@ -286,16 +286,20 @@ def remove(self, keys): """ Create a new UnitsContainer purged from given keys. """ - d = udict(self._d) - return UnitsContainer(((key, d[key]) for key in d if key not in keys)) + new = self.copy() + for k in keys: + new._d.pop(k, None) + new._hash = None + return new def rename(self, oldkey, newkey): """ Create a new UnitsContainer in which an entry has been renamed. """ - d = udict(self._d) - d[newkey] = d.pop(oldkey) - return UnitsContainer(d) + new = self.copy() + new._d[newkey] = new._d.pop(oldkey) + new._hash = None + return new def __iter__(self): return iter(self._d) @@ -323,7 +327,14 @@ def __setstate__(self, state): def __eq__(self, other): if isinstance(other, UnitsContainer): + out = UnitsContainer.__hash__(self) == UnitsContainer.__hash__(other) + # Different hashes guarantee that the actual contents are different, but + # identical hashes give no guarantee of equality + # e.g. in CPython, hash(-1) == hash(-2) + if not out: + return False other = other._d + elif isinstance(other, string_types): other = ParserHelper.from_string(other) other = other._d @@ -345,20 +356,25 @@ def format_babel(self, spec, **kwspec): return format_unit(self, spec, **kwspec) def __copy__(self): - return UnitsContainer(self._d) + # Skip expensive health checks performed by __init__ + out = object.__new__(self.__class__) + out._d = self._d.copy() + out._hash = self._hash + return out def __mul__(self, other): - d = udict(self._d) if not isinstance(other, self.__class__): err = 'Cannot multiply UnitsContainer by {}' raise TypeError(err.format(type(other))) + + new = self.copy() for key, value in other.items(): - d[key] += value - keys = [key for key, value in d.items() if value == 0] - for key in keys: - del d[key] + new._d[key] += value + if new._d[key] == 0: + del new._d[key] - return UnitsContainer(d) + new._hash = None + return new __rmul__ = __mul__ @@ -366,26 +382,26 @@ def __pow__(self, other): if not isinstance(other, NUMERIC_TYPES): err = 'Cannot power UnitsContainer by {}' raise TypeError(err.format(type(other))) - d = udict(self._d) - for key, value in d.items(): - d[key] *= other - return UnitsContainer(d) + + new = self.copy() + for key, value in new._d.items(): + new._d[key] *= other + new._hash = None + return new def __truediv__(self, other): if not isinstance(other, self.__class__): err = 'Cannot divide UnitsContainer by {}' raise TypeError(err.format(type(other))) - d = udict(self._d) - + new = self.copy() for key, value in other.items(): - d[key] -= value - - keys = [key for key, value in d.items() if value == 0] - for key in keys: - del d[key] + new._d[key] -= value + if new._d[key] == 0: + del new._d[key] - return UnitsContainer(d) + new._hash = None + return new def __rtruediv__(self, other): if not isinstance(other, self.__class__) and other != 1: @@ -473,7 +489,9 @@ def _from_string(cls, input_string): for key, value in ret.items())) def __copy__(self): - return ParserHelper(scale=self.scale, **self) + new = super(ParserHelper, self).__copy__() + new.scale = self.scale + return new def copy(self): return self.__copy__() @@ -492,9 +510,11 @@ def __setstate__(self, state): self._d, self._hash, self.scale = state def __eq__(self, other): - if isinstance(other, self.__class__): - return self.scale == other.scale and\ + if isinstance(other, ParserHelper): + return ( + self.scale == other.scale and super(ParserHelper, self).__eq__(other) + ) elif isinstance(other, string_types): return self == ParserHelper.from_string(other) elif isinstance(other, Number): From 0a84c83b4b75d2811ed2ab44bb8f5648b183e2bf Mon Sep 17 00:00:00 2001 From: Guido Imperiale Date: Wed, 11 Sep 2019 10:59:59 +0100 Subject: [PATCH 3/3] Minor comments tweak --- pint/testsuite/test_issues.py | 6 +++--- pint/util.py | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pint/testsuite/test_issues.py b/pint/testsuite/test_issues.py index 275ed338b..e031fe5c3 100644 --- a/pint/testsuite/test_issues.py +++ b/pint/testsuite/test_issues.py @@ -703,9 +703,9 @@ def test_issue856b(self): def test_issue876(self): # Same hash must not imply equality. - # As an implementation detail of CPython, hash(-1) == hash(-2) This test is - # useless in potential alternative Python implementations where hash(-1) != - # hash(-2); one would need to find hash collisions specific for each + # As an implementation detail of CPython, hash(-1) == hash(-2). + # This test is useless in potential alternative Python implementations where + # hash(-1) != hash(-2); one would need to find hash collisions specific for each # implementation a = UnitsContainer({"[mass]": -1}) diff --git a/pint/util.py b/pint/util.py index d8ebcace0..d89b9579a 100644 --- a/pint/util.py +++ b/pint/util.py @@ -327,11 +327,12 @@ def __setstate__(self, state): def __eq__(self, other): if isinstance(other, UnitsContainer): - out = UnitsContainer.__hash__(self) == UnitsContainer.__hash__(other) + # UnitsContainer.__hash__(self) is not the same as hash(self); see + # ParserHelper.__hash__ and __eq__. # Different hashes guarantee that the actual contents are different, but - # identical hashes give no guarantee of equality + # identical hashes give no guarantee of equality. # e.g. in CPython, hash(-1) == hash(-2) - if not out: + if UnitsContainer.__hash__(self) != UnitsContainer.__hash__(other): return False other = other._d