diff --git a/python/pyspark/rddsampler.py b/python/pyspark/rddsampler.py index fd7639ff7a138..5928b1d892de0 100644 --- a/python/pyspark/rddsampler.py +++ b/python/pyspark/rddsampler.py @@ -70,7 +70,7 @@ def func(self, split, iterator): yield obj else: for obj in iterator: - if self.getUniformSample() <= self._fraction: + if self.getUniformSample() < self._fraction: yield obj @@ -106,5 +106,5 @@ def func(self, split, iterator): yield key, val else: for key, val in iterator: - if self.getUniformSample() <= self._fractions[key]: + if self.getUniformSample() < self._fractions[key]: yield key, val