From 6385972032d36dd9778a1f7d0d0466d459481d6b Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 23 May 2018 17:18:39 +0100 Subject: [PATCH 1/9] Added support for datetimes in histogram operation --- holoviews/operation/element.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index e1d2559c84..5cb505700e 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -13,7 +13,7 @@ HoloMap, Dataset, Element, Collator, Dimension) from ..core.data import ArrayInterface, DictInterface from ..core.util import (group_sanitizer, label_sanitizer, pd, - basestring, datetime_types, isfinite) + basestring, datetime_types, isfinite, dt_to_int) from ..element.chart import Histogram, Scatter from ..element.raster import Raster, Image, RGB, QuadMesh from ..element.path import Contours, Polygons @@ -553,31 +553,42 @@ def _process(self, view, key=None): # Avoids range issues including zero bin range and empty bins if hist_range == (0, 0) or any(not isfinite(r) for r in hist_range): hist_range = (0, 1) + + datetimes = False + steps = self.p.num_bins + 1 + start, end = hist_range + if data.dtype.kind == 'M' or (data.dtype.kind == 'O' and isinstance(data[0], datetime_types)): + start, end = dt_to_int(start, 'ns'), dt_to_int(end, 'ns') + datetimes = True + data = data.astype('datetime64[ns]').astype('int64') * 1000. + hist_range = start, end + if self.p.log: - bin_min = max([abs(hist_range[0]), data[data>0].min()]) - edges = np.logspace(np.log10(bin_min), np.log10(hist_range[1]), - self.p.num_bins+1) + bin_min = max([abs(start), data[data>0].min()]) + edges = np.logspace(np.log10(bin_min), np.log10(end), steps) else: - edges = np.linspace(hist_range[0], hist_range[1], self.p.num_bins + 1) + edges = np.linspace(start, end, steps) normed = False if self.p.mean_weighted and self.p.weight_dimension else self.p.normed if len(data): if normed: # This covers True, 'height', 'integral' - hist, edges = np.histogram(data, density=True, range=hist_range, + hist, edges = np.histogram(data, density=True, range=(start, end), weights=weights, bins=edges) if normed=='height': hist /= hist.max() else: - hist, edges = np.histogram(data, normed=normed, range=hist_range, + hist, edges = np.histogram(data, normed=normed, range=(start, end), weights=weights, bins=edges) if self.p.weight_dimension and self.p.mean_weighted: - hist_mean, _ = np.histogram(data, density=False, range=hist_range, + hist_mean, _ = np.histogram(data, density=False, range=(start, end), bins=self.p.num_bins) hist /= hist_mean else: hist = np.zeros(self.p.num_bins) hist[np.isnan(hist)] = 0 + if datetimes: + edges = (edges/10e5).astype('datetime64[us]') params = {} if self.p.weight_dimension: From e9b52f3aa64583b380ad4a9bc66d8ebd09388c63 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 23 May 2018 17:19:17 +0100 Subject: [PATCH 2/9] Allowed datetime bins in GridInterface --- holoviews/core/data/grid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py index 58e8bd9444..523904b2e4 100644 --- a/holoviews/core/data/grid.py +++ b/holoviews/core/data/grid.py @@ -204,7 +204,7 @@ def coords(cls, dataset, dim, ordered=False, expanded=False, edges=False): if edges and not isedges: data = cls._infer_interval_breaks(data) elif not edges and isedges: - data = np.convolve(data, [0.5, 0.5], 'valid') + data = data[:-1] + np.diff(data)/2. return data From c89056120945363397361dd5e1a2198e6bbcdfec Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 24 May 2018 17:59:31 +0100 Subject: [PATCH 3/9] Added datetime support to matplotlib HistogramPlot --- holoviews/plotting/mpl/chart.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/holoviews/plotting/mpl/chart.py b/holoviews/plotting/mpl/chart.py index 0607ccdec7..5500e31160 100644 --- a/holoviews/plotting/mpl/chart.py +++ b/holoviews/plotting/mpl/chart.py @@ -14,7 +14,9 @@ import param from ...core import OrderedDict, Dimension, Store -from ...core.util import match_spec, unique_iterator, basestring, max_range, isfinite +from ...core.util import ( + match_spec, unique_iterator, basestring, max_range, isfinite, datetime_types +) from ...element import Raster, HeatMap from ...operation import interpolate_curve from ..plot import PlotSelector @@ -258,7 +260,11 @@ def initialize_plot(self, ranges=None): el_ranges = match_spec(hist, ranges) # Get plot ranges and values - edges, hvals, widths, lims = self._process_hist(hist) + dims = hist.dimensions()[:2] + edges, hvals, widths, lims, datetime = self._process_hist(hist) + if datetime and not dims[0].value_format: + dt_format = Dimension.type_formatters[np.datetime64] + dims[0] = dims[0](value_format=DateFormatter(dt_format)) style = self.style[self.cyclic_index] if self.invert_axes: @@ -275,6 +281,7 @@ def initialize_plot(self, ranges=None): ticks = self._compute_ticks(hist, edges, widths, lims) ax_settings = self._process_axsettings(hist, lims, ticks) + ax_settings['dimensions'] = dims return self._finalize_axis(self.keys[-1], ranges=el_ranges, element=hist, **ax_settings) @@ -288,9 +295,15 @@ def _process_hist(self, hist): edges = hist.interface.coords(hist, x, edges=True) values = hist.dimension_values(1) hist_vals = np.array(values) + xlim = hist.range(0) + ylim = hist.range(1) + datetime = False + if edges.dtype.kind == 'M' or isinstance(edges[0], datetime_types): + edges = date2num([v.tolist() if isinstance(v, np.datetime64) else v for v in edges]) + xlim = date2num([v.tolist() if isinstance(v, np.datetime64) else v for v in xlim]) + datetime = True widths = np.diff(edges) - lims = hist.range(0) + hist.range(1) - return edges[:-1], hist_vals, widths, lims + return edges[:-1], hist_vals, widths, xlim+ylim, datetime def _compute_ticks(self, element, edges, widths, lims): @@ -381,12 +394,12 @@ def _process_hist(self, hist): """ Subclassed to offset histogram by defined amount. """ - edges, hvals, widths, lims = super(SideHistogramPlot, self)._process_hist(hist) + edges, hvals, widths, lims, datetime = super(SideHistogramPlot, self)._process_hist(hist) offset = self.offset * lims[3] hvals *= 1-self.offset hvals += offset lims = lims[0:3] + (lims[3] + offset,) - return edges, hvals, widths, lims + return edges, hvals, widths, lims, datetime def _update_artists(self, n, element, edges, hvals, widths, lims, ranges): From e146de72c4853ac0cd949a3a97f7b0324daae2bb Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 24 May 2018 17:59:44 +0100 Subject: [PATCH 4/9] Minor fix for unit test --- tests/core/data/testbinneddatasets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/core/data/testbinneddatasets.py b/tests/core/data/testbinneddatasets.py index f98cab0a13..a297c54795 100644 --- a/tests/core/data/testbinneddatasets.py +++ b/tests/core/data/testbinneddatasets.py @@ -145,7 +145,8 @@ def test_qmesh_slice_xcoords_ycoords(self): def test_groupby_xdim(self): grouped = self.dataset2d.groupby('x', group_type=Dataset) - holomap = HoloMap({self.xs[i:i+2].mean(): Dataset((self.ys, self.zs[:, i]), 'y', 'z') + holomap = HoloMap({(self.xs[i]+np.diff(self.xs[i:i+2])/2.)[0]: + Dataset((self.ys, self.zs[:, i]), 'y', 'z') for i in range(3)}, kdims=['x']) self.assertEqual(grouped, holomap) From 5ceb6809c264ee5b1b7cdd3804d70dc87a9ab50a Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 24 May 2018 18:52:49 +0100 Subject: [PATCH 5/9] Fixed bug in matplotlib HistogramPlot --- holoviews/plotting/mpl/chart.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/plotting/mpl/chart.py b/holoviews/plotting/mpl/chart.py index 5500e31160..2566937a2e 100644 --- a/holoviews/plotting/mpl/chart.py +++ b/holoviews/plotting/mpl/chart.py @@ -370,7 +370,7 @@ def _update_artists(self, key, hist, edges, hvals, widths, lims, ranges): def update_handles(self, key, axis, element, ranges, style): # Process values, axes and style - edges, hvals, widths, lims = self._process_hist(element) + edges, hvals, widths, lims, datetime = self._process_hist(element) ticks = self._compute_ticks(element, edges, widths, lims) ax_settings = self._process_axsettings(element, lims, ticks) From 918a70b0f26164c8317da6345ba8ed36d886eecc Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 24 May 2018 19:40:14 +0100 Subject: [PATCH 6/9] Small fixes for pandas Period and Timestamp --- holoviews/core/util.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/holoviews/core/util.py b/holoviews/core/util.py index 876ea29868..df338b0566 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -1675,6 +1675,13 @@ def dt_to_int(value, time_unit='us'): """ Converts a datetime type to an integer with the supplied time unit. """ + if pd: + if isinstance(value, pd.Period): + value = value.to_timestamp() + if isinstance(value, pd.Timestamp): + value = value.to_pydatetime() + value = np.datetime64(value) + if isinstance(value, np.datetime64): value = np.datetime64(value, 'ns') if time_unit == 'ns': @@ -1685,9 +1692,8 @@ def dt_to_int(value, time_unit='us'): tscale = 1000. else: tscale = 1./np.timedelta64(1, time_unit).tolist().total_seconds() - if pd and isinstance(value, pd.Timestamp): - value = value.to_pydatetime() - elif isinstance(value, np.datetime64): + + if isinstance(value, np.datetime64): value = value.tolist() if isinstance(value, (int, long)): # Handle special case of nanosecond precision which cannot be From 98c9e380270276d14e9c7adf81e10132afe57bfc Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 24 May 2018 19:40:54 +0100 Subject: [PATCH 7/9] Added unit tests for histogram operation datetime handling --- tests/operation/testoperation.py | 39 +++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/tests/operation/testoperation.py b/tests/operation/testoperation.py index c63d0f6df1..7b59174b77 100644 --- a/tests/operation/testoperation.py +++ b/tests/operation/testoperation.py @@ -1,9 +1,12 @@ +import datetime as dt + import numpy as np from nose.plugins.attrib import attr from holoviews import (HoloMap, NdOverlay, NdLayout, GridSpace, Image, Contours, Polygons, Points, Histogram, Curve, Area, - QuadMesh) + QuadMesh, Dataset) +from holoviews.core.util import pd from holoviews.element.comparison import ComparisonTestCase from holoviews.operation.element import (operation, transform, threshold, gradient, contours, histogram, @@ -138,6 +141,40 @@ def test_points_histogram_not_normed(self): hist = Histogram(([3, 3, 4], [0, 3, 6, 9])) self.assertEqual(op_hist, hist) + def test_histogram_operation_datetime(self): + dates = np.array([dt.datetime(2017, 1, i) for i in range(1, 5)]) + op_hist = histogram(Dataset(dates, 'Date'), num_bins=4) + hist_data = {'Date': np.array(['2017-01-01T00:00:00.000000', '2017-01-01T17:59:59.999999', + '2017-01-02T12:00:00.000000', '2017-01-03T06:00:00.000000', + '2017-01-04T00:00:00.000000'], dtype='datetime64[us]'), + 'Date_frequency': np.array([ 3.85802469e-18, 3.85802469e-18, 3.85802469e-18, + 3.85802469e-18])} + hist = Histogram(hist_data, kdims='Date', vdims=('Date_frequency', 'Date Frequency')) + self.assertEqual(op_hist, hist) + + def test_histogram_operation_datetime64(self): + dates = np.array([dt.datetime(2017, 1, i) for i in range(1, 5)]).astype('M') + op_hist = histogram(Dataset(dates, 'Date'), num_bins=4) + hist_data = {'Date': np.array(['2017-01-01T00:00:00.000000', '2017-01-01T17:59:59.999999', + '2017-01-02T12:00:00.000000', '2017-01-03T06:00:00.000000', + '2017-01-04T00:00:00.000000'], dtype='datetime64[us]'), + 'Date_frequency': np.array([ 3.85802469e-18, 3.85802469e-18, 3.85802469e-18, + 3.85802469e-18])} + hist = Histogram(hist_data, kdims='Date', vdims=('Date_frequency', 'Date Frequency')) + self.assertEqual(op_hist, hist) + + @attr(optional=1) # Requires matplotlib + def test_histogram_operation_pd_period(self): + dates = pd.date_range('2017-01-01', '2017-01-04', freq='D').to_period('D') + op_hist = histogram(Dataset(dates, 'Date'), num_bins=4) + hist_data = {'Date': np.array(['2017-01-01T00:00:00.000000', '2017-01-01T17:59:59.999999', + '2017-01-02T12:00:00.000000', '2017-01-03T06:00:00.000000', + '2017-01-04T00:00:00.000000'], dtype='datetime64[us]'), + 'Date_frequency': np.array([ 3.85802469e-18, 3.85802469e-18, 3.85802469e-18, + 3.85802469e-18])} + hist = Histogram(hist_data, kdims='Date', vdims=('Date_frequency', 'Date Frequency')) + self.assertEqual(op_hist, hist) + def test_points_histogram_weighted(self): points = Points([float(i) for i in range(10)]) op_hist = histogram(points, num_bins=3, weight_dimension='y') From a9878594908aa0d427bdc985b1043c407024243d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 24 May 2018 19:55:44 +0100 Subject: [PATCH 8/9] Added unit tests for HistogramPlot datetime handling --- tests/plotting/bokeh/testhistogramplot.py | 27 ++++++++++++++++++- .../plotting/matplotlib/testhistogramplot.py | 20 ++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 tests/plotting/matplotlib/testhistogramplot.py diff --git a/tests/plotting/bokeh/testhistogramplot.py b/tests/plotting/bokeh/testhistogramplot.py index 1a0c797949..5bda5f3928 100644 --- a/tests/plotting/bokeh/testhistogramplot.py +++ b/tests/plotting/bokeh/testhistogramplot.py @@ -1,6 +1,11 @@ +import datetime as dt + import numpy as np -from holoviews.element import Image, Points +from holoviews.element import Image, Points, Dataset +from holoviews.operation import histogram + +from bokeh.models import DatetimeAxis from .testplot import TestBokehPlot, bokeh_renderer @@ -47,3 +52,23 @@ def test_side_histogram_cmapper_weighted(self): self.assertIs(main_plot.handles['color_mapper'], top_plot.handles['color_mapper']) self.assertEqual(main_plot.handles['color_dim'], img.vdims[0]) + + def test_histogram_datetime64_plot(self): + dates = np.array([dt.datetime(2017, 1, i) for i in range(1, 5)]) + hist = histogram(Dataset(dates, 'Date'), num_bins=4) + plot = bokeh_renderer.get_plot(hist) + source = plot.handles['source'] + data = {'top': np.array([ 3.85802469e-18, 3.85802469e-18, 3.85802469e-18, 3.85802469e-18]), + 'left': np.array(['2017-01-01T00:00:00.000000', '2017-01-01T17:59:59.999999', + '2017-01-02T12:00:00.000000', '2017-01-03T06:00:00.000000'], + dtype='datetime64[us]'), + 'right': np.array(['2017-01-01T17:59:59.999999', '2017-01-02T12:00:00.000000', + '2017-01-03T06:00:00.000000', '2017-01-04T00:00:00.000000'], + dtype='datetime64[us]')} + for k, v in data.items(): + self.assertEqual(source.data[k], v) + xaxis = plot.handles['xaxis'] + range_x = plot.handles['x_range'] + self.assertIsInstance(xaxis, DatetimeAxis) + self.assertEqual(range_x.start, np.datetime64('2017-01-01T00:00:00.000000', 'us')) + self.assertEqual(range_x.end, np.datetime64('2017-01-04T00:00:00.000000', 'us')) diff --git a/tests/plotting/matplotlib/testhistogramplot.py b/tests/plotting/matplotlib/testhistogramplot.py new file mode 100644 index 0000000000..0e85f3ff52 --- /dev/null +++ b/tests/plotting/matplotlib/testhistogramplot.py @@ -0,0 +1,20 @@ +import datetime as dt + +import numpy as np + +from holoviews.element import Dataset +from holoviews.operation import histogram + +from .testplot import TestMPLPlot, mpl_renderer + + +class TestCurvePlot(TestMPLPlot): + + def test_histogram_datetime64_plot(self): + dates = np.array([dt.datetime(2017, 1, i) for i in range(1, 5)]) + hist = histogram(Dataset(dates, 'Date'), num_bins=4) + plot = mpl_renderer.get_plot(hist) + artist = plot.handles['artist'] + ax = plot.handles['axis'] + self.assertEqual(ax.get_xlim(), (736330.0, 736333.0)) + self.assertEqual([p.get_x() for p in artist.patches], [736330.0, 736330.75, 736331.5, 736332.25]) From 400540ec275f7c9b0614b5a0f548e6eb766f79b4 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 26 May 2018 13:17:57 +0100 Subject: [PATCH 9/9] Better variable naming --- holoviews/plotting/mpl/chart.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/holoviews/plotting/mpl/chart.py b/holoviews/plotting/mpl/chart.py index 2566937a2e..f203719978 100644 --- a/holoviews/plotting/mpl/chart.py +++ b/holoviews/plotting/mpl/chart.py @@ -261,8 +261,8 @@ def initialize_plot(self, ranges=None): # Get plot ranges and values dims = hist.dimensions()[:2] - edges, hvals, widths, lims, datetime = self._process_hist(hist) - if datetime and not dims[0].value_format: + edges, hvals, widths, lims, isdatetime = self._process_hist(hist) + if isdatetime and not dims[0].value_format: dt_format = Dimension.type_formatters[np.datetime64] dims[0] = dims[0](value_format=DateFormatter(dt_format)) @@ -297,13 +297,13 @@ def _process_hist(self, hist): hist_vals = np.array(values) xlim = hist.range(0) ylim = hist.range(1) - datetime = False + isdatetime = False if edges.dtype.kind == 'M' or isinstance(edges[0], datetime_types): edges = date2num([v.tolist() if isinstance(v, np.datetime64) else v for v in edges]) xlim = date2num([v.tolist() if isinstance(v, np.datetime64) else v for v in xlim]) - datetime = True + isdatetime = True widths = np.diff(edges) - return edges[:-1], hist_vals, widths, xlim+ylim, datetime + return edges[:-1], hist_vals, widths, xlim+ylim, isdatetime def _compute_ticks(self, element, edges, widths, lims): @@ -370,7 +370,7 @@ def _update_artists(self, key, hist, edges, hvals, widths, lims, ranges): def update_handles(self, key, axis, element, ranges, style): # Process values, axes and style - edges, hvals, widths, lims, datetime = self._process_hist(element) + edges, hvals, widths, lims, _ = self._process_hist(element) ticks = self._compute_ticks(element, edges, widths, lims) ax_settings = self._process_axsettings(element, lims, ticks) @@ -394,12 +394,12 @@ def _process_hist(self, hist): """ Subclassed to offset histogram by defined amount. """ - edges, hvals, widths, lims, datetime = super(SideHistogramPlot, self)._process_hist(hist) + edges, hvals, widths, lims, isdatetime = super(SideHistogramPlot, self)._process_hist(hist) offset = self.offset * lims[3] hvals *= 1-self.offset hvals += offset lims = lims[0:3] + (lims[3] + offset,) - return edges, hvals, widths, lims, datetime + return edges, hvals, widths, lims, isdatetime def _update_artists(self, n, element, edges, hvals, widths, lims, ranges):