Add implementation of snapping mechanism

First draft implementation of the snapping mechanism. This addresses a vulnerability in the Laplace mechanism and its derivatives, stemming from floating-point numbers. The mechanism was proposed as a solution to this vulnerability by Ilya Mironov Paper link: https://www.microsoft.com/en-us/research/wp-content/uploads/2012/10/lsbs.pdf
IBM · Oct 27, 2021 · 0b0768b · 0b0768b
1 parent 90b319a
commit 0b0768b
Show file tree

Hide file tree

Showing 4 changed files with 339 additions and 0 deletions.
diff --git a/diffprivlib/mechanisms/__init__.py b/diffprivlib/mechanisms/__init__.py
@@ -28,6 +28,7 @@
 from diffprivlib.mechanisms.geometric import Geometric, GeometricFolded, GeometricTruncated
 from diffprivlib.mechanisms.laplace import Laplace, LaplaceBoundedDomain, LaplaceBoundedNoise, LaplaceFolded,\
     LaplaceTruncated
+from diffprivlib.mechanisms.snapping import Snapping
 from diffprivlib.mechanisms.staircase import Staircase
 from diffprivlib.mechanisms.uniform import Uniform
 from diffprivlib.mechanisms.vector import Vector
diff --git a/diffprivlib/mechanisms/snapping.py b/diffprivlib/mechanisms/snapping.py
@@ -0,0 +1,241 @@
+
+"""
+The Snapping mechanism in differential privacy, which eliminates a weakness to floating point errors in the classic
+Laplace mechanism with standard Laplace sampling.
+"""
+import math
+import secrets
+import struct
+from numbers import Real
+
+import crlibm
+import numpy as np
+
+from diffprivlib.mechanisms import DPMechanism
+from diffprivlib.mechanisms.base import TruncationAndFoldingMixin
+
+
+class Snapping(DPMechanism, TruncationAndFoldingMixin):
+    r"""
+    The Snapping mechanism for differential privacy.
+
+    First proposed by Ilya Mironov [M12]_.
+
+    It eliminates a vulnerability stemming from the representation of reals as floating-point numbers in implementations
+    of the classic Laplace mechanism and its variants which use the inverse CDF of the Laplace distribution to sample
+    it. It causes a high degree of reduction in the granularity of the output.
+
+    Parameters
+    ----------
+    epsilon : float
+        Privacy parameter :math:`\epsilon` for the mechanism.  Must be in [0, ∞].
+
+    sensitivity : float
+        The sensitivity of the mechanism.  Must be in [0, ∞).
+
+    lower : float
+        The lower bound of the mechanism.
+
+    upper : float
+        The upper bound of the mechanism.
+    References
+    ----------
+    .. [Mir12] Mironov, Ilya. "On significance of the least significant bits for differential privacy." Proceedings of
+     the 2012 ACM conference on Computer and communications security (2012).
+    """
+    def __init__(self, *, epsilon, sensitivity, lower, upper):
+        super().__init__(epsilon=epsilon, delta=0.0)
+        self._check_sensitivity(sensitivity)
+        self.sensitivity = sensitivity
+        TruncationAndFoldingMixin.__init__(self, lower=lower, upper=upper)
+        self._bound = self._scale_bound()
+
+    @classmethod
+    def _check_sensitivity(cls, sensitivity):
+        if not isinstance(sensitivity, Real):
+            raise TypeError("Sensitivity must be numeric")
+
+        if sensitivity < 0:
+            raise ValueError("Sensitivity must be non-negative")
+        return float(sensitivity)
+
+    def _check_all(self, value):
+        super()._check_all(value)
+        TruncationAndFoldingMixin._check_all(self, value)
+        self._check_sensitivity(sensitivity=self.sensitivity)
+
+        if not isinstance(value, Real):
+            raise TypeError("Value to be randomised must be a number")
+
+        return True
+
+    def _scale_bound(self):
+        """
+        Scales the lower and upper bounds to be proportionate to sensitivity 1, and symmetrical about 0.
+        For sensitivity 0, only centres the bound, as scaling up and down is not defined.
+
+        Returns
+        -------
+        float
+            A symmetric bound around 0 scaled to sensitivity 1
+        """
+        if self.sensitivity > 0:
+            return (self.upper - self.lower) / 2.0 / self.sensitivity
+        return (self.upper - self.lower) / 2.0
+
+    def _truncate(self, value):
+        if value > self._bound:
+            return self._bound
+        if value < -self._bound:
+            return -self._bound
+
+        return value
+
+    def bias(self, value):
+        raise NotImplementedError
+
+    def variance(self, value):
+        raise NotImplementedError
+
+    def mse(self, value):
+        raise NotImplementedError
+
+    def effective_epsilon(self):
+        r"""
+        Computes the effective value of :math:`\epsilon` that the Snapping mechanism guarantees compared to an
+        equivalent Laplace mechanims based on the bounds and the machine epsilon.
+        Defined in section 5.2 of [Mir12]_.
+
+        Returns
+        -------
+        float
+            The effective value of :math:`\epsilon`
+        """
+        machine_epsilon = np.finfo(float).epsneg
+        return self.epsilon + 12.0 * self._bound * self.epsilon + 2.0 * machine_epsilon
+
+    def _scale_and_offset_value(self, value):
+        """
+        Centre value around 0 with symmetric bound and scale to sensitivity 1
+
+        Parameters
+        ----------
+        value : float
+            value to be scaled
+        Returns
+        -------
+        float
+            value offset to be centered on 0 and scaled to sensitivity 1
+        """
+        value_scaled = value / self.sensitivity
+        return value_scaled - self._bound - (self.lower / self.sensitivity)
+
+    def _reverse_scale_and_offset_value(self, value):
+        return (value + self._bound) * self.sensitivity + self.lower
+
+    def _get_nearest_power_of_2(self, x):
+        def float_to_bits(d):
+            s = struct.pack('>d', d)
+            return struct.unpack('>q', s)[0]
+
+        def bits_to_float(b):
+            s = struct.pack('>q', b)
+            return struct.unpack('>d', s)[0]
+
+        bits = float_to_bits(x)
+        if bits % (1 << 52) == 0:
+            return x
+        return bits_to_float(((bits >> 52) + 1) << 52)
+
+    def _round_to_nearest_power_of_2(self, value):
+        """ Performs the rounding step from [Mir12]_ with ties resolved towards +∞
+
+        Parameters
+        ----------
+        value : float
+            Value to be rounded
+
+        Returns
+        -------
+        float
+            Rounded value
+
+        """
+        if self.epsilon == float('inf'):  # infinitely small rounding
+            return value
+        base = self._get_nearest_power_of_2(1.0 / self.epsilon)
+        remainder = value % base
+        if remainder > base / 2:
+            return value - remainder + base
+        if remainder == base / 2:
+            return value + remainder
+        return value - remainder
+
+    def _sample_uniform(self):
+        """
+        Uniformly sample the full domain of floating-point numbers between (0, 1), rather than only multiples of 2^-53.
+        A uniform distribution over D ∩ (0, 1) can be generated by independently sampling an exponent
+        from the geometric distribution with parameter .5 and a significand by drawing a uniform string from
+        {0, 1}^52 [Mir12]_
+
+        Based on code recipe in Python standard library documentation [Py21]_.
+
+        Returns
+        -------
+        float
+            A value sampled from float in (0, 1) with probability proportional to the size of the infinite-precision
+            real interval each float represents
+
+        References
+        ----------
+        .. [Py21]  The Python Standard Library. "random — Generate pseudo-random numbers", 2021
+        https://docs.python.org/3/library/random.html#recipes
+        """
+        mantissa = 1 << 52 | secrets.randbits(52)
+        exponent = -53
+        x = 0
+        while not x:
+            x = secrets.randbits(32)
+            exponent += x.bit_length() - 32
+        return math.ldexp(mantissa, exponent)
+
+    def _sample_laplace(self):
+        r"""
+        Laplace inverse CDF random sampling implementation which uses full domain uniform sampling and exact log
+        implementation from crlibm, as mentioned in [Mir12]_.
+        Outputs a random value scaled according to privacy budget and sensitivity 1, as bounds and input are scaled to
+        sensitivity 1 before Laplacian noise is added.
+
+        Returns
+        -------
+        float
+            Random value from Laplace distribution scaled according to :math:`\epsilon`
+        """
+        sign = secrets.randbits(1)
+        uniform = self._sample_uniform()
+        laplace = (-1)**sign * 1.0 / self.epsilon * crlibm.log_rn(uniform)
+        return laplace
+
+    def randomise(self, value):
+        """Randomise `value` with the mechanism.
+
+        Parameters
+        ----------
+        value : float
+            The value to be randomised.
+
+        Returns
+        -------
+        float
+            The randomised value.
+
+        """
+        self._check_all(value)
+        if self.sensitivity > 0:
+            value_scaled_offset = self._scale_and_offset_value(value)
+            value_clamped = self._truncate(value_scaled_offset)
+            laplace = self._sample_laplace()
+            value_rounded = self._round_to_nearest_power_of_2(value_clamped + laplace)
+            return self._reverse_scale_and_offset_value(self._truncate(value_rounded))
+        else:
+            return self._truncate(value)
diff --git a/setup.py b/setup.py
@@ -25,6 +25,7 @@ def get_version(file_path):
                     'scikit-learn >= 0.23.0',
                     'scipy >= 1.5.0',
                     'joblib >= 0.16.0',
+                    'crlibm >= 1.0.3',
                     ]
 
 docs_require = ['sphinx >= 1.4',

diff --git a/tests/mechanisms/test_Snapping.py b/tests/mechanisms/test_Snapping.py
@@ -0,0 +1,96 @@
+import numpy as np
+from unittest import TestCase
+
+import pytest
+
+from diffprivlib.mechanisms import Snapping
+from diffprivlib.utils import global_seed
+
+
+class TestSnapping(TestCase):
+    def setup_method(self, method):
+        if method.__name__ .endswith("prob"):
+            global_seed(314159)
+
+        self.mech = Snapping
+
+    def teardown_method(self, method):
+        del self.mech
+
+    def test_class(self):
+        from diffprivlib.mechanisms import DPMechanism
+        self.assertTrue(issubclass(Snapping, DPMechanism))
+
+    def test_neg_sensitivity(self):
+        with self.assertRaises(ValueError):
+            self.mech(epsilon=1, sensitivity=-1, lower=0, upper=1000)
+
+    def test_str_sensitivity(self):
+        with self.assertRaises(TypeError):
+            self.mech(epsilon=1, sensitivity="1", lower=0, upper=1000)
+
+    def test_zero_sensitivity(self):
+        mech = self.mech(epsilon=1, sensitivity=0, lower=0, upper=1000)
+
+        for i in range(1000):
+            self.assertAlmostEqual(mech.randomise(1), 1)
+
+    def test_neg_epsilon(self):
+        with self.assertRaises(ValueError):
+            self.mech(epsilon=-1, sensitivity=1, lower=0, upper=1000)
+
+    def test_inf_epsilon(self):
+        mech = self.mech(epsilon=float("inf"), sensitivity=1, lower=0, upper=1000)
+
+        for i in range(1000):
+            self.assertAlmostEqual(mech.randomise(1), 1)
+
+    def test_complex_epsilon(self):
+        with self.assertRaises(TypeError):
+            self.mech(epsilon=1 + 2j, sensitivity=1, lower=0, upper=1000)
+
+    def test_string_epsilon(self):
+        with self.assertRaises(TypeError):
+            self.mech(epsilon="Two", sensitivity=1, lower=0, upper=1000)
+
+    def test_repr(self):
+        repr_ = repr(self.mech(epsilon=1, sensitivity=1, lower=0, upper=1000))
+        self.assertIn(".Snapping(", repr_)
+
+    def test_epsilon(self):
+        mech = self.mech(epsilon=1, sensitivity=1, lower=0, upper=1000)
+        self.assertIsNotNone(mech.randomise(1))
+
+    def test_non_numeric(self):
+        mech = self.mech(epsilon=1, sensitivity=1, lower=0, upper=1000)
+        with self.assertRaises(TypeError):
+            mech.randomise("Hello")
+
+    def test_zero_median_prob(self):
+        mech = self.mech(epsilon=1, sensitivity=1, lower=0, upper=1000)
+        vals = []
+
+        for i in range(10000):
+            vals.append(mech.randomise(0))
+
+        median = float(np.median(vals))
+        self.assertAlmostEqual(np.abs(median), 0.0, delta=0.1)
+
+    def test_neighbours_prob(self):
+        epsilon = 1
+        runs = 10000
+        mech = self.mech(epsilon=epsilon, sensitivity=1, lower=0, upper=1000)
+        count = [0, 0]
+
+        for i in range(runs):
+            val0 = mech.randomise(0)
+            if val0 <= 0:
+                count[0] += 1
+
+            val1 = mech.randomise(1)
+            if val1 <= 0:
+                count[1] += 1
+
+        self.assertGreater(count[0], count[1])
+        self.assertLessEqual(count[0] / runs, np.exp(epsilon) * count[1] / runs + 0.1)
+