Skip to content

Commit

Permalink
Added pre-commit config with black, isort, and pyupgrade
Browse files Browse the repository at this point in the history
  • Loading branch information
astrofrog committed Oct 17, 2023
1 parent 0fff46c commit 0ecedd5
Show file tree
Hide file tree
Showing 7 changed files with 291 additions and 152 deletions.
29 changes: 29 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-added-large-files
- id: check-case-conflict
- id: check-yaml
- id: debug-statements
- id: end-of-file-fixer
- id: trailing-whitespace

- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
name: isort (python)
args:
- "--filter-files"

- repo: https://github.com/asottile/pyupgrade
rev: v3.10.1
hooks:
- id: pyupgrade
args: ["--py38-plus"]

- repo: https://github.com/psf/black
rev: 23.9.1
hooks:
- id: black
82 changes: 58 additions & 24 deletions comparison/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Script to compare the speedup provided by fast-histogram

from timeit import repeat, timeit

import numpy as np
from timeit import timeit, repeat

SETUP_1D = """
import numpy as np
Expand Down Expand Up @@ -29,42 +30,75 @@


def time_stats(stmt=None, setup=None):

# Call once to check how long it takes
time_single = timeit(stmt=stmt, setup=setup, number=1)

# Find out how many times we can call it. We always call it at least three
# times for accuracy
number = max(3, int(TARGET_TIME / time_single))

print(' -> estimated time to complete test: {0:.1f}s'.format(time_single * 10 * number))
print(f" -> estimated time to complete test: {time_single * 10 * number:.1f}s")

times = repeat(stmt=stmt, setup=setup, repeat=10, number=number)

return np.min(times) / number, np.mean(times) / number, np.median(times) / number


FMT_HEADER = '# {:7s}' + ' {:10s}' * 12 + '\n'
FMT = '{:9d}' + ' {:10.7e}' * 12 + '\n'

with open('benchmark_times.txt', 'w') as f:

f.write(FMT_HEADER.format('size',
'np_1d_min', 'np_1d_mean', 'np_1d_median', 'fa_1d_min', 'fa_1d_mean', 'fa_1d_median',
'np_2d_min', 'np_2d_mean', 'np_2d_median', 'fa_2d_min', 'fa_2d_mean', 'fa_2d_median'))
FMT_HEADER = "# {:7s}" + " {:10s}" * 12 + "\n"
FMT = "{:9d}" + " {:10.7e}" * 12 + "\n"

with open("benchmark_times.txt", "w") as f:
f.write(
FMT_HEADER.format(
"size",
"np_1d_min",
"np_1d_mean",
"np_1d_median",
"fa_1d_min",
"fa_1d_mean",
"fa_1d_median",
"np_2d_min",
"np_2d_mean",
"np_2d_median",
"fa_2d_min",
"fa_2d_mean",
"fa_2d_median",
)
)

for log10_size in range(0, 9):

size = int(10 ** log10_size)

print('Running benchmarks for size={0}'.format(size))

np_1d_min, np_1d_mean, np_1d_median = time_stats(stmt=NUMPY_1D_STMT, setup=SETUP_1D.format(size=size))
fa_1d_min, fa_1d_mean, fa_1d_median = time_stats(stmt=FAST_1D_STMT, setup=SETUP_1D.format(size=size))
np_2d_min, np_2d_mean, np_2d_median = time_stats(stmt=NUMPY_2D_STMT, setup=SETUP_2D.format(size=size))
fa_2d_min, fa_2d_mean, fa_2d_median = time_stats(stmt=FAST_2D_STMT, setup=SETUP_2D.format(size=size))

f.write(FMT.format(size,
np_1d_min, np_1d_mean, np_1d_median, fa_1d_min, fa_1d_mean, fa_1d_median,
np_2d_min, np_2d_mean, np_2d_median, fa_2d_min, fa_2d_mean, fa_2d_median))
size = int(10**log10_size)

print(f"Running benchmarks for size={size}")

np_1d_min, np_1d_mean, np_1d_median = time_stats(
stmt=NUMPY_1D_STMT, setup=SETUP_1D.format(size=size)
)
fa_1d_min, fa_1d_mean, fa_1d_median = time_stats(
stmt=FAST_1D_STMT, setup=SETUP_1D.format(size=size)
)
np_2d_min, np_2d_mean, np_2d_median = time_stats(
stmt=NUMPY_2D_STMT, setup=SETUP_2D.format(size=size)
)
fa_2d_min, fa_2d_mean, fa_2d_median = time_stats(
stmt=FAST_2D_STMT, setup=SETUP_2D.format(size=size)
)

f.write(
FMT.format(
size,
np_1d_min,
np_1d_mean,
np_1d_median,
fa_1d_min,
fa_1d_mean,
fa_1d_median,
np_2d_min,
np_2d_mean,
np_2d_median,
fa_2d_min,
fa_2d_mean,
fa_2d_median,
)
)
f.flush()
32 changes: 22 additions & 10 deletions comparison/plot.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,33 @@
# Script to make the comparison plot for the benchmark

import numpy as np
import matplotlib.pyplot as plt
import numpy as np

(size,
np_1d_min, np_1d_mean, np_1d_median, fa_1d_min, fa_1d_mean, fa_1d_median,
np_2d_min, np_2d_mean, np_2d_median, fa_2d_min, fa_2d_mean, fa_2d_median) = np.loadtxt('benchmark_times.txt', unpack=True)
(
size,
np_1d_min,
np_1d_mean,
np_1d_median,
fa_1d_min,
fa_1d_mean,
fa_1d_median,
np_2d_min,
np_2d_mean,
np_2d_median,
fa_2d_min,
fa_2d_mean,
fa_2d_median,
) = np.loadtxt("benchmark_times.txt", unpack=True)

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(size, np_1d_min / fa_1d_min, color=(34 / 255, 122 / 255, 181 / 255), label='1D')
ax.plot(size, np_2d_min / fa_2d_min, color=(255 / 255, 133 / 255, 25 / 255), label='2D')
ax.set_xscale('log')
ax.plot(size, np_1d_min / fa_1d_min, color=(34 / 255, 122 / 255, 181 / 255), label="1D")
ax.plot(size, np_2d_min / fa_2d_min, color=(255 / 255, 133 / 255, 25 / 255), label="2D")
ax.set_xscale("log")
ax.set_xlim(0.3, 3e8)
ax.set_ylim(1, 35)
ax.grid()
ax.set_xlabel('Array size')
ax.set_ylabel('Speedup (fast-histogram / numpy)')
ax.set_xlabel("Array size")
ax.set_ylabel("Speedup (fast-histogram / numpy)")
ax.legend()
fig.savefig('speedup_compared.png', bbox_inches='tight')
fig.savefig("speedup_compared.png", bbox_inches="tight")
40 changes: 24 additions & 16 deletions fast_histogram/histogram.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
from __future__ import division

import numbers

import numpy as np

from ._histogram_core import (_histogram1d,
_histogram2d,
_histogramdd,
_histogram1d_weighted,
_histogram2d_weighted,
_histogramdd_weighted)
from ._histogram_core import (
_histogram1d,
_histogram1d_weighted,
_histogram2d,
_histogram2d_weighted,
_histogramdd,
_histogramdd_weighted,
)

NUMERICAL_TYPES = {'f', 'i', 'u'}
NUMERICAL_TYPES = {"f", "i", "u"}

__all__ = ['histogram1d', 'histogram2d', 'histogramdd']
__all__ = ["histogram1d", "histogram2d", "histogramdd"]


def histogram1d(x, bins, range, weights=None):
Expand All @@ -40,7 +40,7 @@ def histogram1d(x, bins, range, weights=None):
nx = bins

if not np.isscalar(bins):
raise TypeError('bins should be an integer')
raise TypeError("bins should be an integer")

xmin, xmax = range

Expand All @@ -66,7 +66,9 @@ def histogram1d(x, bins, range, weights=None):
else:
weights = np.atleast_1d(weights)
if weights.dtype.kind not in NUMERICAL_TYPES:
raise TypeError("weights is not or cannot be converted to a numerical array")
raise TypeError(
"weights is not or cannot be converted to a numerical array"
)
return _histogram1d_weighted(x, weights, nx, xmin, xmax)


Expand Down Expand Up @@ -99,7 +101,7 @@ def histogram2d(x, y, bins, range, weights=None):
nx, ny = bins

if not np.isscalar(nx) or not np.isscalar(ny):
raise TypeError('bins should be an iterable of two integers')
raise TypeError("bins should be an iterable of two integers")

(xmin, xmax), (ymin, ymax) = range

Expand Down Expand Up @@ -141,7 +143,9 @@ def histogram2d(x, y, bins, range, weights=None):
else:
weights = np.atleast_1d(weights)
if weights.dtype.kind not in NUMERICAL_TYPES:
raise TypeError("weights is not or cannot be converted to a numerical array")
raise TypeError(
"weights is not or cannot be converted to a numerical array"
)
return _histogram2d_weighted(x, y, weights, nx, xmin, xmax, ny, ymin, ymax)


Expand Down Expand Up @@ -206,7 +210,9 @@ def histogramdd(sample, bins, range, weights=None):
raise ValueError("number of ranges does not equal number of dimensions")
for i, r in enumerate(range):
if not len(r) == 2:
raise ValueError("should pass a minimum and maximum value for each dimension")
raise ValueError(
"should pass a minimum and maximum value for each dimension"
)
if r[0] >= r[1]:
raise ValueError("each range should be strictly increasing")
_range[i][0] = r[0]
Expand All @@ -217,5 +223,7 @@ def histogramdd(sample, bins, range, weights=None):
else:
weights = np.atleast_1d(weights)
if weights.dtype.kind not in NUMERICAL_TYPES:
raise TypeError("weights is not or cannot be converted to a numerical array")
raise TypeError(
"weights is not or cannot be converted to a numerical array"
)
return _histogramdd_weighted(_sample, _bins, _range, weights)
Loading

0 comments on commit 0ecedd5

Please sign in to comment.