Skip to content

Commit

Permalink
Minor whitespace, indentation, and quoting changes to improve interna…
Browse files Browse the repository at this point in the history
…l consistency and appease linters (pythonGH-14888)
  • Loading branch information
rhettinger authored Jul 21, 2019
1 parent 22f0483 commit 1c0e9bb
Showing 1 changed file with 74 additions and 53 deletions.
127 changes: 74 additions & 53 deletions Lib/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,25 @@
"""

__all__ = [ 'StatisticsError', 'NormalDist', 'quantiles',
'pstdev', 'pvariance', 'stdev', 'variance',
'median', 'median_low', 'median_high', 'median_grouped',
'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean',
'geometric_mean',
]
__all__ = [
'NormalDist',
'StatisticsError',
'fmean',
'geometric_mean',
'harmonic_mean',
'mean',
'median',
'median_grouped',
'median_high',
'median_low',
'mode',
'multimode',
'pstdev',
'pvariance',
'quantiles',
'stdev',
'variance',
]

import math
import numbers
Expand Down Expand Up @@ -304,16 +317,16 @@ def mean(data):
assert count == n
return _convert(total/n, T)


def fmean(data):
""" Convert data to floats and compute the arithmetic mean.
"""Convert data to floats and compute the arithmetic mean.
This runs faster than the mean() function and it always returns a float.
The result is highly accurate but not as perfect as mean().
If the input dataset is empty, it raises a StatisticsError.
>>> fmean([3.5, 4.0, 5.25])
4.25
"""
try:
n = len(data)
Expand All @@ -332,6 +345,7 @@ def count(iterable):
except ZeroDivisionError:
raise StatisticsError('fmean requires at least one data point') from None


def geometric_mean(data):
"""Convert data to floats and compute the geometric mean.
Expand All @@ -350,6 +364,7 @@ def geometric_mean(data):
raise StatisticsError('geometric mean requires a non-empty dataset '
' containing positive numbers') from None


def harmonic_mean(data):
"""Return the harmonic mean of data.
Expand Down Expand Up @@ -547,23 +562,23 @@ def mode(data):


def multimode(data):
""" Return a list of the most frequently occurring values.
Will return more than one result if there are multiple modes
or an empty list if *data* is empty.
"""Return a list of the most frequently occurring values.
>>> multimode('aabbbbbbbbcc')
['b']
>>> multimode('aabbbbccddddeeffffgg')
['b', 'd', 'f']
>>> multimode('')
[]
Will return more than one result if there are multiple modes
or an empty list if *data* is empty.
>>> multimode('aabbbbbbbbcc')
['b']
>>> multimode('aabbbbccddddeeffffgg')
['b', 'd', 'f']
>>> multimode('')
[]
"""
counts = Counter(iter(data)).most_common()
maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, []))
return list(map(itemgetter(0), mode_items))


# Notes on methods for computing quantiles
# ----------------------------------------
#
Expand Down Expand Up @@ -601,7 +616,7 @@ def multimode(data):
# external packages can be used for anything more advanced.

def quantiles(dist, /, *, n=4, method='exclusive'):
'''Divide *dist* into *n* continuous intervals with equal probability.
"""Divide *dist* into *n* continuous intervals with equal probability.
Returns a list of (n - 1) cut points separating the intervals.
Expand All @@ -616,7 +631,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'):
If *method* is set to *inclusive*, *dist* is treated as population
data. The minimum value is treated as the 0th percentile and the
maximum value is treated as the 100th percentile.
'''
"""
if n < 1:
raise StatisticsError('n must be at least 1')
if hasattr(dist, 'inv_cdf'):
Expand Down Expand Up @@ -646,6 +661,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'):
return result
raise ValueError(f'Unknown method: {method!r}')


# === Measures of spread ===

# See http://mathworld.wolfram.com/Variance.html
Expand Down Expand Up @@ -805,59 +821,64 @@ def pstdev(data, mu=None):
except AttributeError:
return math.sqrt(var)


## Normal Distribution #####################################################

class NormalDist:
'Normal distribution of a random variable'
"Normal distribution of a random variable"
# https://en.wikipedia.org/wiki/Normal_distribution
# https://en.wikipedia.org/wiki/Variance#Properties

__slots__ = {'_mu': 'Arithmetic mean of a normal distribution',
'_sigma': 'Standard deviation of a normal distribution'}
__slots__ = {
'_mu': 'Arithmetic mean of a normal distribution',
'_sigma': 'Standard deviation of a normal distribution',
}

def __init__(self, mu=0.0, sigma=1.0):
'NormalDist where mu is the mean and sigma is the standard deviation.'
"NormalDist where mu is the mean and sigma is the standard deviation."
if sigma < 0.0:
raise StatisticsError('sigma must be non-negative')
self._mu = mu
self._sigma = sigma

@classmethod
def from_samples(cls, data):
'Make a normal distribution instance from sample data.'
"Make a normal distribution instance from sample data."
if not isinstance(data, (list, tuple)):
data = list(data)
xbar = fmean(data)
return cls(xbar, stdev(data, xbar))

def samples(self, n, *, seed=None):
'Generate *n* samples for a given mean and standard deviation.'
"Generate *n* samples for a given mean and standard deviation."
gauss = random.gauss if seed is None else random.Random(seed).gauss
mu, sigma = self._mu, self._sigma
return [gauss(mu, sigma) for i in range(n)]

def pdf(self, x):
'Probability density function. P(x <= X < x+dx) / dx'
"Probability density function. P(x <= X < x+dx) / dx"
variance = self._sigma ** 2.0
if not variance:
raise StatisticsError('pdf() not defined when sigma is zero')
return exp((x - self._mu)**2.0 / (-2.0*variance)) / sqrt(tau * variance)
return exp((x - self._mu)**2.0 / (-2.0*variance)) / sqrt(tau*variance)

def cdf(self, x):
'Cumulative distribution function. P(X <= x)'
"Cumulative distribution function. P(X <= x)"
if not self._sigma:
raise StatisticsError('cdf() not defined when sigma is zero')
return 0.5 * (1.0 + erf((x - self._mu) / (self._sigma * sqrt(2.0))))

def inv_cdf(self, p):
'''Inverse cumulative distribution function. x : P(X <= x) = p
"""Inverse cumulative distribution function. x : P(X <= x) = p
Finds the value of the random variable such that the probability of the
variable being less than or equal to that value equals the given probability.
Finds the value of the random variable such that the probability of
the variable being less than or equal to that value equals the given
probability.
This function is also called the percent point function or quantile function.
'''
if (p <= 0.0 or p >= 1.0):
This function is also called the percent point function or quantile
function.
"""
if p <= 0.0 or p >= 1.0:
raise StatisticsError('p must be in the range 0.0 < p < 1.0')
if self._sigma <= 0.0:
raise StatisticsError('cdf() not defined when sigma at or below zero')
Expand Down Expand Up @@ -933,7 +954,7 @@ def inv_cdf(self, p):
return self._mu + (x * self._sigma)

def overlap(self, other):
'''Compute the overlapping coefficient (OVL) between two normal distributions.
"""Compute the overlapping coefficient (OVL) between two normal distributions.
Measures the agreement between two normal probability distributions.
Returns a value between 0.0 and 1.0 giving the overlapping area in
Expand All @@ -943,7 +964,7 @@ def overlap(self, other):
>>> N2 = NormalDist(3.2, 2.0)
>>> N1.overlap(N2)
0.8035050657330205
'''
"""
# See: "The overlapping coefficient as a measure of agreement between
# probability distributions and point estimation of the overlap of two
# normal densities" -- Henry F. Inman and Edwin L. Bradley Jr
Expand All @@ -968,87 +989,87 @@ def overlap(self, other):

@property
def mean(self):
'Arithmetic mean of the normal distribution.'
"Arithmetic mean of the normal distribution."
return self._mu

@property
def stdev(self):
'Standard deviation of the normal distribution.'
"Standard deviation of the normal distribution."
return self._sigma

@property
def variance(self):
'Square of the standard deviation.'
"Square of the standard deviation."
return self._sigma ** 2.0

def __add__(x1, x2):
'''Add a constant or another NormalDist instance.
"""Add a constant or another NormalDist instance.
If *other* is a constant, translate mu by the constant,
leaving sigma unchanged.
If *other* is a NormalDist, add both the means and the variances.
Mathematically, this works only if the two distributions are
independent or if they are jointly normally distributed.
'''
"""
if isinstance(x2, NormalDist):
return NormalDist(x1._mu + x2._mu, hypot(x1._sigma, x2._sigma))
return NormalDist(x1._mu + x2, x1._sigma)

def __sub__(x1, x2):
'''Subtract a constant or another NormalDist instance.
"""Subtract a constant or another NormalDist instance.
If *other* is a constant, translate by the constant mu,
leaving sigma unchanged.
If *other* is a NormalDist, subtract the means and add the variances.
Mathematically, this works only if the two distributions are
independent or if they are jointly normally distributed.
'''
"""
if isinstance(x2, NormalDist):
return NormalDist(x1._mu - x2._mu, hypot(x1._sigma, x2._sigma))
return NormalDist(x1._mu - x2, x1._sigma)

def __mul__(x1, x2):
'''Multiply both mu and sigma by a constant.
"""Multiply both mu and sigma by a constant.
Used for rescaling, perhaps to change measurement units.
Sigma is scaled with the absolute value of the constant.
'''
"""
return NormalDist(x1._mu * x2, x1._sigma * fabs(x2))

def __truediv__(x1, x2):
'''Divide both mu and sigma by a constant.
"""Divide both mu and sigma by a constant.
Used for rescaling, perhaps to change measurement units.
Sigma is scaled with the absolute value of the constant.
'''
"""
return NormalDist(x1._mu / x2, x1._sigma / fabs(x2))

def __pos__(x1):
'Return a copy of the instance.'
"Return a copy of the instance."
return NormalDist(x1._mu, x1._sigma)

def __neg__(x1):
'Negates mu while keeping sigma the same.'
"Negates mu while keeping sigma the same."
return NormalDist(-x1._mu, x1._sigma)

__radd__ = __add__

def __rsub__(x1, x2):
'Subtract a NormalDist from a constant or another NormalDist.'
"Subtract a NormalDist from a constant or another NormalDist."
return -(x1 - x2)

__rmul__ = __mul__

def __eq__(x1, x2):
'Two NormalDist objects are equal if their mu and sigma are both equal.'
"Two NormalDist objects are equal if their mu and sigma are both equal."
if not isinstance(x2, NormalDist):
return NotImplemented
return (x1._mu, x2._sigma) == (x2._mu, x2._sigma)

def __hash__(self):
'NormalDist objects hash equal if their mu and sigma are both equal.'
"NormalDist objects hash equal if their mu and sigma are both equal."
return hash((self._mu, self._sigma))

def __repr__(self):
Expand Down

0 comments on commit 1c0e9bb

Please sign in to comment.