From 6385972032d36dd9778a1f7d0d0466d459481d6b Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Wed, 23 May 2018 17:18:39 +0100
Subject: [PATCH 1/9] Added support for datetimes in histogram operation
---
holoviews/operation/element.py | 27 +++++++++++++++++++--------
1 file changed, 19 insertions(+), 8 deletions(-)
diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py
index e1d2559c84..5cb505700e 100644
--- a/holoviews/operation/element.py
+++ b/holoviews/operation/element.py
@@ -13,7 +13,7 @@
HoloMap, Dataset, Element, Collator, Dimension)
from ..core.data import ArrayInterface, DictInterface
from ..core.util import (group_sanitizer, label_sanitizer, pd,
- basestring, datetime_types, isfinite)
+ basestring, datetime_types, isfinite, dt_to_int)
from ..element.chart import Histogram, Scatter
from ..element.raster import Raster, Image, RGB, QuadMesh
from ..element.path import Contours, Polygons
@@ -553,31 +553,42 @@ def _process(self, view, key=None):
# Avoids range issues including zero bin range and empty bins
if hist_range == (0, 0) or any(not isfinite(r) for r in hist_range):
hist_range = (0, 1)
+
+ datetimes = False
+ steps = self.p.num_bins + 1
+ start, end = hist_range
+ if data.dtype.kind == 'M' or (data.dtype.kind == 'O' and isinstance(data[0], datetime_types)):
+ start, end = dt_to_int(start, 'ns'), dt_to_int(end, 'ns')
+ datetimes = True
+ data = data.astype('datetime64[ns]').astype('int64') * 1000.
+ hist_range = start, end
+
if self.p.log:
- bin_min = max([abs(hist_range[0]), data[data>0].min()])
- edges = np.logspace(np.log10(bin_min), np.log10(hist_range[1]),
- self.p.num_bins+1)
+ bin_min = max([abs(start), data[data>0].min()])
+ edges = np.logspace(np.log10(bin_min), np.log10(end), steps)
else:
- edges = np.linspace(hist_range[0], hist_range[1], self.p.num_bins + 1)
+ edges = np.linspace(start, end, steps)
normed = False if self.p.mean_weighted and self.p.weight_dimension else self.p.normed
if len(data):
if normed:
# This covers True, 'height', 'integral'
- hist, edges = np.histogram(data, density=True, range=hist_range,
+ hist, edges = np.histogram(data, density=True, range=(start, end),
weights=weights, bins=edges)
if normed=='height':
hist /= hist.max()
else:
- hist, edges = np.histogram(data, normed=normed, range=hist_range,
+ hist, edges = np.histogram(data, normed=normed, range=(start, end),
weights=weights, bins=edges)
if self.p.weight_dimension and self.p.mean_weighted:
- hist_mean, _ = np.histogram(data, density=False, range=hist_range,
+ hist_mean, _ = np.histogram(data, density=False, range=(start, end),
bins=self.p.num_bins)
hist /= hist_mean
else:
hist = np.zeros(self.p.num_bins)
hist[np.isnan(hist)] = 0
+ if datetimes:
+ edges = (edges/10e5).astype('datetime64[us]')
params = {}
if self.p.weight_dimension:
From e9b52f3aa64583b380ad4a9bc66d8ebd09388c63 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Wed, 23 May 2018 17:19:17 +0100
Subject: [PATCH 2/9] Allowed datetime bins in GridInterface
---
holoviews/core/data/grid.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py
index 58e8bd9444..523904b2e4 100644
--- a/holoviews/core/data/grid.py
+++ b/holoviews/core/data/grid.py
@@ -204,7 +204,7 @@ def coords(cls, dataset, dim, ordered=False, expanded=False, edges=False):
if edges and not isedges:
data = cls._infer_interval_breaks(data)
elif not edges and isedges:
- data = np.convolve(data, [0.5, 0.5], 'valid')
+ data = data[:-1] + np.diff(data)/2.
return data
From c89056120945363397361dd5e1a2198e6bbcdfec Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Thu, 24 May 2018 17:59:31 +0100
Subject: [PATCH 3/9] Added datetime support to matplotlib HistogramPlot
---
holoviews/plotting/mpl/chart.py | 25 +++++++++++++++++++------
1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/holoviews/plotting/mpl/chart.py b/holoviews/plotting/mpl/chart.py
index 0607ccdec7..5500e31160 100644
--- a/holoviews/plotting/mpl/chart.py
+++ b/holoviews/plotting/mpl/chart.py
@@ -14,7 +14,9 @@
import param
from ...core import OrderedDict, Dimension, Store
-from ...core.util import match_spec, unique_iterator, basestring, max_range, isfinite
+from ...core.util import (
+ match_spec, unique_iterator, basestring, max_range, isfinite, datetime_types
+)
from ...element import Raster, HeatMap
from ...operation import interpolate_curve
from ..plot import PlotSelector
@@ -258,7 +260,11 @@ def initialize_plot(self, ranges=None):
el_ranges = match_spec(hist, ranges)
# Get plot ranges and values
- edges, hvals, widths, lims = self._process_hist(hist)
+ dims = hist.dimensions()[:2]
+ edges, hvals, widths, lims, datetime = self._process_hist(hist)
+ if datetime and not dims[0].value_format:
+ dt_format = Dimension.type_formatters[np.datetime64]
+ dims[0] = dims[0](value_format=DateFormatter(dt_format))
style = self.style[self.cyclic_index]
if self.invert_axes:
@@ -275,6 +281,7 @@ def initialize_plot(self, ranges=None):
ticks = self._compute_ticks(hist, edges, widths, lims)
ax_settings = self._process_axsettings(hist, lims, ticks)
+ ax_settings['dimensions'] = dims
return self._finalize_axis(self.keys[-1], ranges=el_ranges, element=hist, **ax_settings)
@@ -288,9 +295,15 @@ def _process_hist(self, hist):
edges = hist.interface.coords(hist, x, edges=True)
values = hist.dimension_values(1)
hist_vals = np.array(values)
+ xlim = hist.range(0)
+ ylim = hist.range(1)
+ datetime = False
+ if edges.dtype.kind == 'M' or isinstance(edges[0], datetime_types):
+ edges = date2num([v.tolist() if isinstance(v, np.datetime64) else v for v in edges])
+ xlim = date2num([v.tolist() if isinstance(v, np.datetime64) else v for v in xlim])
+ datetime = True
widths = np.diff(edges)
- lims = hist.range(0) + hist.range(1)
- return edges[:-1], hist_vals, widths, lims
+ return edges[:-1], hist_vals, widths, xlim+ylim, datetime
def _compute_ticks(self, element, edges, widths, lims):
@@ -381,12 +394,12 @@ def _process_hist(self, hist):
"""
Subclassed to offset histogram by defined amount.
"""
- edges, hvals, widths, lims = super(SideHistogramPlot, self)._process_hist(hist)
+ edges, hvals, widths, lims, datetime = super(SideHistogramPlot, self)._process_hist(hist)
offset = self.offset * lims[3]
hvals *= 1-self.offset
hvals += offset
lims = lims[0:3] + (lims[3] + offset,)
- return edges, hvals, widths, lims
+ return edges, hvals, widths, lims, datetime
def _update_artists(self, n, element, edges, hvals, widths, lims, ranges):
From e146de72c4853ac0cd949a3a97f7b0324daae2bb Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Thu, 24 May 2018 17:59:44 +0100
Subject: [PATCH 4/9] Minor fix for unit test
---
tests/core/data/testbinneddatasets.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tests/core/data/testbinneddatasets.py b/tests/core/data/testbinneddatasets.py
index f98cab0a13..a297c54795 100644
--- a/tests/core/data/testbinneddatasets.py
+++ b/tests/core/data/testbinneddatasets.py
@@ -145,7 +145,8 @@ def test_qmesh_slice_xcoords_ycoords(self):
def test_groupby_xdim(self):
grouped = self.dataset2d.groupby('x', group_type=Dataset)
- holomap = HoloMap({self.xs[i:i+2].mean(): Dataset((self.ys, self.zs[:, i]), 'y', 'z')
+ holomap = HoloMap({(self.xs[i]+np.diff(self.xs[i:i+2])/2.)[0]:
+ Dataset((self.ys, self.zs[:, i]), 'y', 'z')
for i in range(3)}, kdims=['x'])
self.assertEqual(grouped, holomap)
From 5ceb6809c264ee5b1b7cdd3804d70dc87a9ab50a Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Thu, 24 May 2018 18:52:49 +0100
Subject: [PATCH 5/9] Fixed bug in matplotlib HistogramPlot
---
holoviews/plotting/mpl/chart.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/holoviews/plotting/mpl/chart.py b/holoviews/plotting/mpl/chart.py
index 5500e31160..2566937a2e 100644
--- a/holoviews/plotting/mpl/chart.py
+++ b/holoviews/plotting/mpl/chart.py
@@ -370,7 +370,7 @@ def _update_artists(self, key, hist, edges, hvals, widths, lims, ranges):
def update_handles(self, key, axis, element, ranges, style):
# Process values, axes and style
- edges, hvals, widths, lims = self._process_hist(element)
+ edges, hvals, widths, lims, datetime = self._process_hist(element)
ticks = self._compute_ticks(element, edges, widths, lims)
ax_settings = self._process_axsettings(element, lims, ticks)
From 918a70b0f26164c8317da6345ba8ed36d886eecc Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Thu, 24 May 2018 19:40:14 +0100
Subject: [PATCH 6/9] Small fixes for pandas Period and Timestamp
---
holoviews/core/util.py | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/holoviews/core/util.py b/holoviews/core/util.py
index 876ea29868..df338b0566 100644
--- a/holoviews/core/util.py
+++ b/holoviews/core/util.py
@@ -1675,6 +1675,13 @@ def dt_to_int(value, time_unit='us'):
"""
Converts a datetime type to an integer with the supplied time unit.
"""
+ if pd:
+ if isinstance(value, pd.Period):
+ value = value.to_timestamp()
+ if isinstance(value, pd.Timestamp):
+ value = value.to_pydatetime()
+ value = np.datetime64(value)
+
if isinstance(value, np.datetime64):
value = np.datetime64(value, 'ns')
if time_unit == 'ns':
@@ -1685,9 +1692,8 @@ def dt_to_int(value, time_unit='us'):
tscale = 1000.
else:
tscale = 1./np.timedelta64(1, time_unit).tolist().total_seconds()
- if pd and isinstance(value, pd.Timestamp):
- value = value.to_pydatetime()
- elif isinstance(value, np.datetime64):
+
+ if isinstance(value, np.datetime64):
value = value.tolist()
if isinstance(value, (int, long)):
# Handle special case of nanosecond precision which cannot be
From 98c9e380270276d14e9c7adf81e10132afe57bfc Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Thu, 24 May 2018 19:40:54 +0100
Subject: [PATCH 7/9] Added unit tests for histogram operation datetime
handling
---
tests/operation/testoperation.py | 39 +++++++++++++++++++++++++++++++-
1 file changed, 38 insertions(+), 1 deletion(-)
diff --git a/tests/operation/testoperation.py b/tests/operation/testoperation.py
index c63d0f6df1..7b59174b77 100644
--- a/tests/operation/testoperation.py
+++ b/tests/operation/testoperation.py
@@ -1,9 +1,12 @@
+import datetime as dt
+
import numpy as np
from nose.plugins.attrib import attr
from holoviews import (HoloMap, NdOverlay, NdLayout, GridSpace, Image,
Contours, Polygons, Points, Histogram, Curve, Area,
- QuadMesh)
+ QuadMesh, Dataset)
+from holoviews.core.util import pd
from holoviews.element.comparison import ComparisonTestCase
from holoviews.operation.element import (operation, transform, threshold,
gradient, contours, histogram,
@@ -138,6 +141,40 @@ def test_points_histogram_not_normed(self):
hist = Histogram(([3, 3, 4], [0, 3, 6, 9]))
self.assertEqual(op_hist, hist)
+ def test_histogram_operation_datetime(self):
+ dates = np.array([dt.datetime(2017, 1, i) for i in range(1, 5)])
+ op_hist = histogram(Dataset(dates, 'Date'), num_bins=4)
+ hist_data = {'Date': np.array(['2017-01-01T00:00:00.000000', '2017-01-01T17:59:59.999999',
+ '2017-01-02T12:00:00.000000', '2017-01-03T06:00:00.000000',
+ '2017-01-04T00:00:00.000000'], dtype='datetime64[us]'),
+ 'Date_frequency': np.array([ 3.85802469e-18, 3.85802469e-18, 3.85802469e-18,
+ 3.85802469e-18])}
+ hist = Histogram(hist_data, kdims='Date', vdims=('Date_frequency', 'Date Frequency'))
+ self.assertEqual(op_hist, hist)
+
+ def test_histogram_operation_datetime64(self):
+ dates = np.array([dt.datetime(2017, 1, i) for i in range(1, 5)]).astype('M')
+ op_hist = histogram(Dataset(dates, 'Date'), num_bins=4)
+ hist_data = {'Date': np.array(['2017-01-01T00:00:00.000000', '2017-01-01T17:59:59.999999',
+ '2017-01-02T12:00:00.000000', '2017-01-03T06:00:00.000000',
+ '2017-01-04T00:00:00.000000'], dtype='datetime64[us]'),
+ 'Date_frequency': np.array([ 3.85802469e-18, 3.85802469e-18, 3.85802469e-18,
+ 3.85802469e-18])}
+ hist = Histogram(hist_data, kdims='Date', vdims=('Date_frequency', 'Date Frequency'))
+ self.assertEqual(op_hist, hist)
+
+ @attr(optional=1) # Requires matplotlib
+ def test_histogram_operation_pd_period(self):
+ dates = pd.date_range('2017-01-01', '2017-01-04', freq='D').to_period('D')
+ op_hist = histogram(Dataset(dates, 'Date'), num_bins=4)
+ hist_data = {'Date': np.array(['2017-01-01T00:00:00.000000', '2017-01-01T17:59:59.999999',
+ '2017-01-02T12:00:00.000000', '2017-01-03T06:00:00.000000',
+ '2017-01-04T00:00:00.000000'], dtype='datetime64[us]'),
+ 'Date_frequency': np.array([ 3.85802469e-18, 3.85802469e-18, 3.85802469e-18,
+ 3.85802469e-18])}
+ hist = Histogram(hist_data, kdims='Date', vdims=('Date_frequency', 'Date Frequency'))
+ self.assertEqual(op_hist, hist)
+
def test_points_histogram_weighted(self):
points = Points([float(i) for i in range(10)])
op_hist = histogram(points, num_bins=3, weight_dimension='y')
From a9878594908aa0d427bdc985b1043c407024243d Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Thu, 24 May 2018 19:55:44 +0100
Subject: [PATCH 8/9] Added unit tests for HistogramPlot datetime handling
---
tests/plotting/bokeh/testhistogramplot.py | 27 ++++++++++++++++++-
.../plotting/matplotlib/testhistogramplot.py | 20 ++++++++++++++
2 files changed, 46 insertions(+), 1 deletion(-)
create mode 100644 tests/plotting/matplotlib/testhistogramplot.py
diff --git a/tests/plotting/bokeh/testhistogramplot.py b/tests/plotting/bokeh/testhistogramplot.py
index 1a0c797949..5bda5f3928 100644
--- a/tests/plotting/bokeh/testhistogramplot.py
+++ b/tests/plotting/bokeh/testhistogramplot.py
@@ -1,6 +1,11 @@
+import datetime as dt
+
import numpy as np
-from holoviews.element import Image, Points
+from holoviews.element import Image, Points, Dataset
+from holoviews.operation import histogram
+
+from bokeh.models import DatetimeAxis
from .testplot import TestBokehPlot, bokeh_renderer
@@ -47,3 +52,23 @@ def test_side_histogram_cmapper_weighted(self):
self.assertIs(main_plot.handles['color_mapper'],
top_plot.handles['color_mapper'])
self.assertEqual(main_plot.handles['color_dim'], img.vdims[0])
+
+ def test_histogram_datetime64_plot(self):
+ dates = np.array([dt.datetime(2017, 1, i) for i in range(1, 5)])
+ hist = histogram(Dataset(dates, 'Date'), num_bins=4)
+ plot = bokeh_renderer.get_plot(hist)
+ source = plot.handles['source']
+ data = {'top': np.array([ 3.85802469e-18, 3.85802469e-18, 3.85802469e-18, 3.85802469e-18]),
+ 'left': np.array(['2017-01-01T00:00:00.000000', '2017-01-01T17:59:59.999999',
+ '2017-01-02T12:00:00.000000', '2017-01-03T06:00:00.000000'],
+ dtype='datetime64[us]'),
+ 'right': np.array(['2017-01-01T17:59:59.999999', '2017-01-02T12:00:00.000000',
+ '2017-01-03T06:00:00.000000', '2017-01-04T00:00:00.000000'],
+ dtype='datetime64[us]')}
+ for k, v in data.items():
+ self.assertEqual(source.data[k], v)
+ xaxis = plot.handles['xaxis']
+ range_x = plot.handles['x_range']
+ self.assertIsInstance(xaxis, DatetimeAxis)
+ self.assertEqual(range_x.start, np.datetime64('2017-01-01T00:00:00.000000', 'us'))
+ self.assertEqual(range_x.end, np.datetime64('2017-01-04T00:00:00.000000', 'us'))
diff --git a/tests/plotting/matplotlib/testhistogramplot.py b/tests/plotting/matplotlib/testhistogramplot.py
new file mode 100644
index 0000000000..0e85f3ff52
--- /dev/null
+++ b/tests/plotting/matplotlib/testhistogramplot.py
@@ -0,0 +1,20 @@
+import datetime as dt
+
+import numpy as np
+
+from holoviews.element import Dataset
+from holoviews.operation import histogram
+
+from .testplot import TestMPLPlot, mpl_renderer
+
+
+class TestCurvePlot(TestMPLPlot):
+
+ def test_histogram_datetime64_plot(self):
+ dates = np.array([dt.datetime(2017, 1, i) for i in range(1, 5)])
+ hist = histogram(Dataset(dates, 'Date'), num_bins=4)
+ plot = mpl_renderer.get_plot(hist)
+ artist = plot.handles['artist']
+ ax = plot.handles['axis']
+ self.assertEqual(ax.get_xlim(), (736330.0, 736333.0))
+ self.assertEqual([p.get_x() for p in artist.patches], [736330.0, 736330.75, 736331.5, 736332.25])
From 400540ec275f7c9b0614b5a0f548e6eb766f79b4 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sat, 26 May 2018 13:17:57 +0100
Subject: [PATCH 9/9] Better variable naming
---
holoviews/plotting/mpl/chart.py | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/holoviews/plotting/mpl/chart.py b/holoviews/plotting/mpl/chart.py
index 2566937a2e..f203719978 100644
--- a/holoviews/plotting/mpl/chart.py
+++ b/holoviews/plotting/mpl/chart.py
@@ -261,8 +261,8 @@ def initialize_plot(self, ranges=None):
# Get plot ranges and values
dims = hist.dimensions()[:2]
- edges, hvals, widths, lims, datetime = self._process_hist(hist)
- if datetime and not dims[0].value_format:
+ edges, hvals, widths, lims, isdatetime = self._process_hist(hist)
+ if isdatetime and not dims[0].value_format:
dt_format = Dimension.type_formatters[np.datetime64]
dims[0] = dims[0](value_format=DateFormatter(dt_format))
@@ -297,13 +297,13 @@ def _process_hist(self, hist):
hist_vals = np.array(values)
xlim = hist.range(0)
ylim = hist.range(1)
- datetime = False
+ isdatetime = False
if edges.dtype.kind == 'M' or isinstance(edges[0], datetime_types):
edges = date2num([v.tolist() if isinstance(v, np.datetime64) else v for v in edges])
xlim = date2num([v.tolist() if isinstance(v, np.datetime64) else v for v in xlim])
- datetime = True
+ isdatetime = True
widths = np.diff(edges)
- return edges[:-1], hist_vals, widths, xlim+ylim, datetime
+ return edges[:-1], hist_vals, widths, xlim+ylim, isdatetime
def _compute_ticks(self, element, edges, widths, lims):
@@ -370,7 +370,7 @@ def _update_artists(self, key, hist, edges, hvals, widths, lims, ranges):
def update_handles(self, key, axis, element, ranges, style):
# Process values, axes and style
- edges, hvals, widths, lims, datetime = self._process_hist(element)
+ edges, hvals, widths, lims, _ = self._process_hist(element)
ticks = self._compute_ticks(element, edges, widths, lims)
ax_settings = self._process_axsettings(element, lims, ticks)
@@ -394,12 +394,12 @@ def _process_hist(self, hist):
"""
Subclassed to offset histogram by defined amount.
"""
- edges, hvals, widths, lims, datetime = super(SideHistogramPlot, self)._process_hist(hist)
+ edges, hvals, widths, lims, isdatetime = super(SideHistogramPlot, self)._process_hist(hist)
offset = self.offset * lims[3]
hvals *= 1-self.offset
hvals += offset
lims = lims[0:3] + (lims[3] + offset,)
- return edges, hvals, widths, lims, datetime
+ return edges, hvals, widths, lims, isdatetime
def _update_artists(self, n, element, edges, hvals, widths, lims, ranges):