Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support datetimes in histogram operation #2719

Merged
merged 9 commits into from
May 28, 2018
2 changes: 1 addition & 1 deletion holoviews/core/data/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def coords(cls, dataset, dim, ordered=False, expanded=False, edges=False):
if edges and not isedges:
data = cls._infer_interval_breaks(data)
elif not edges and isedges:
data = np.convolve(data, [0.5, 0.5], 'valid')
data = data[:-1] + np.diff(data)/2.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Much simpler than a weird and confusing convolve call!

return data


Expand Down
12 changes: 9 additions & 3 deletions holoviews/core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1675,6 +1675,13 @@ def dt_to_int(value, time_unit='us'):
"""
Converts a datetime type to an integer with the supplied time unit.
"""
if pd:
if isinstance(value, pd.Period):
value = value.to_timestamp()
if isinstance(value, pd.Timestamp):
value = value.to_pydatetime()
value = np.datetime64(value)

if isinstance(value, np.datetime64):
value = np.datetime64(value, 'ns')
if time_unit == 'ns':
Expand All @@ -1685,9 +1692,8 @@ def dt_to_int(value, time_unit='us'):
tscale = 1000.
else:
tscale = 1./np.timedelta64(1, time_unit).tolist().total_seconds()
if pd and isinstance(value, pd.Timestamp):
value = value.to_pydatetime()
elif isinstance(value, np.datetime64):

if isinstance(value, np.datetime64):
value = value.tolist()
if isinstance(value, (int, long)):
# Handle special case of nanosecond precision which cannot be
Expand Down
27 changes: 19 additions & 8 deletions holoviews/operation/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
HoloMap, Dataset, Element, Collator, Dimension)
from ..core.data import ArrayInterface, DictInterface
from ..core.util import (group_sanitizer, label_sanitizer, pd,
basestring, datetime_types, isfinite)
basestring, datetime_types, isfinite, dt_to_int)
from ..element.chart import Histogram, Scatter
from ..element.raster import Raster, Image, RGB, QuadMesh
from ..element.path import Contours, Polygons
Expand Down Expand Up @@ -553,31 +553,42 @@ def _process(self, view, key=None):
# Avoids range issues including zero bin range and empty bins
if hist_range == (0, 0) or any(not isfinite(r) for r in hist_range):
hist_range = (0, 1)

datetimes = False
steps = self.p.num_bins + 1
start, end = hist_range
if data.dtype.kind == 'M' or (data.dtype.kind == 'O' and isinstance(data[0], datetime_types)):
start, end = dt_to_int(start, 'ns'), dt_to_int(end, 'ns')
datetimes = True
data = data.astype('datetime64[ns]').astype('int64') * 1000.
hist_range = start, end

if self.p.log:
bin_min = max([abs(hist_range[0]), data[data>0].min()])
edges = np.logspace(np.log10(bin_min), np.log10(hist_range[1]),
self.p.num_bins+1)
bin_min = max([abs(start), data[data>0].min()])
edges = np.logspace(np.log10(bin_min), np.log10(end), steps)
else:
edges = np.linspace(hist_range[0], hist_range[1], self.p.num_bins + 1)
edges = np.linspace(start, end, steps)
normed = False if self.p.mean_weighted and self.p.weight_dimension else self.p.normed

if len(data):
if normed:
# This covers True, 'height', 'integral'
hist, edges = np.histogram(data, density=True, range=hist_range,
hist, edges = np.histogram(data, density=True, range=(start, end),
weights=weights, bins=edges)
if normed=='height':
hist /= hist.max()
else:
hist, edges = np.histogram(data, normed=normed, range=hist_range,
hist, edges = np.histogram(data, normed=normed, range=(start, end),
weights=weights, bins=edges)
if self.p.weight_dimension and self.p.mean_weighted:
hist_mean, _ = np.histogram(data, density=False, range=hist_range,
hist_mean, _ = np.histogram(data, density=False, range=(start, end),
bins=self.p.num_bins)
hist /= hist_mean
else:
hist = np.zeros(self.p.num_bins)
hist[np.isnan(hist)] = 0
if datetimes:
edges = (edges/10e5).astype('datetime64[us]')

params = {}
if self.p.weight_dimension:
Expand Down
27 changes: 20 additions & 7 deletions holoviews/plotting/mpl/chart.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
import param

from ...core import OrderedDict, Dimension, Store
from ...core.util import match_spec, unique_iterator, basestring, max_range, isfinite
from ...core.util import (
match_spec, unique_iterator, basestring, max_range, isfinite, datetime_types
)
from ...element import Raster, HeatMap
from ...operation import interpolate_curve
from ..plot import PlotSelector
Expand Down Expand Up @@ -258,7 +260,11 @@ def initialize_plot(self, ranges=None):
el_ranges = match_spec(hist, ranges)

# Get plot ranges and values
edges, hvals, widths, lims = self._process_hist(hist)
dims = hist.dimensions()[:2]
edges, hvals, widths, lims, isdatetime = self._process_hist(hist)
if isdatetime and not dims[0].value_format:
dt_format = Dimension.type_formatters[np.datetime64]
dims[0] = dims[0](value_format=DateFormatter(dt_format))

style = self.style[self.cyclic_index]
if self.invert_axes:
Expand All @@ -275,6 +281,7 @@ def initialize_plot(self, ranges=None):

ticks = self._compute_ticks(hist, edges, widths, lims)
ax_settings = self._process_axsettings(hist, lims, ticks)
ax_settings['dimensions'] = dims

return self._finalize_axis(self.keys[-1], ranges=el_ranges, element=hist, **ax_settings)

Expand All @@ -288,9 +295,15 @@ def _process_hist(self, hist):
edges = hist.interface.coords(hist, x, edges=True)
values = hist.dimension_values(1)
hist_vals = np.array(values)
xlim = hist.range(0)
ylim = hist.range(1)
isdatetime = False
if edges.dtype.kind == 'M' or isinstance(edges[0], datetime_types):
edges = date2num([v.tolist() if isinstance(v, np.datetime64) else v for v in edges])
xlim = date2num([v.tolist() if isinstance(v, np.datetime64) else v for v in xlim])
isdatetime = True
widths = np.diff(edges)
lims = hist.range(0) + hist.range(1)
return edges[:-1], hist_vals, widths, lims
return edges[:-1], hist_vals, widths, xlim+ylim, isdatetime
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can still conceive of wanting support for other numeric types one day (e.g rationals?) but this is fine for now. We can use a general type specifier when we find ourselves needing to do something similar for another type. This this fine for now.



def _compute_ticks(self, element, edges, widths, lims):
Expand Down Expand Up @@ -357,7 +370,7 @@ def _update_artists(self, key, hist, edges, hvals, widths, lims, ranges):

def update_handles(self, key, axis, element, ranges, style):
# Process values, axes and style
edges, hvals, widths, lims = self._process_hist(element)
edges, hvals, widths, lims, _ = self._process_hist(element)

ticks = self._compute_ticks(element, edges, widths, lims)
ax_settings = self._process_axsettings(element, lims, ticks)
Expand All @@ -381,12 +394,12 @@ def _process_hist(self, hist):
"""
Subclassed to offset histogram by defined amount.
"""
edges, hvals, widths, lims = super(SideHistogramPlot, self)._process_hist(hist)
edges, hvals, widths, lims, isdatetime = super(SideHistogramPlot, self)._process_hist(hist)
offset = self.offset * lims[3]
hvals *= 1-self.offset
hvals += offset
lims = lims[0:3] + (lims[3] + offset,)
return edges, hvals, widths, lims
return edges, hvals, widths, lims, isdatetime


def _update_artists(self, n, element, edges, hvals, widths, lims, ranges):
Expand Down
3 changes: 2 additions & 1 deletion tests/core/data/testbinneddatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ def test_qmesh_slice_xcoords_ycoords(self):

def test_groupby_xdim(self):
grouped = self.dataset2d.groupby('x', group_type=Dataset)
holomap = HoloMap({self.xs[i:i+2].mean(): Dataset((self.ys, self.zs[:, i]), 'y', 'z')
holomap = HoloMap({(self.xs[i]+np.diff(self.xs[i:i+2])/2.)[0]:
Dataset((self.ys, self.zs[:, i]), 'y', 'z')
for i in range(3)}, kdims=['x'])
self.assertEqual(grouped, holomap)

Expand Down
39 changes: 38 additions & 1 deletion tests/operation/testoperation.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import datetime as dt

import numpy as np
from nose.plugins.attrib import attr

from holoviews import (HoloMap, NdOverlay, NdLayout, GridSpace, Image,
Contours, Polygons, Points, Histogram, Curve, Area,
QuadMesh)
QuadMesh, Dataset)
from holoviews.core.util import pd
from holoviews.element.comparison import ComparisonTestCase
from holoviews.operation.element import (operation, transform, threshold,
gradient, contours, histogram,
Expand Down Expand Up @@ -138,6 +141,40 @@ def test_points_histogram_not_normed(self):
hist = Histogram(([3, 3, 4], [0, 3, 6, 9]))
self.assertEqual(op_hist, hist)

def test_histogram_operation_datetime(self):
dates = np.array([dt.datetime(2017, 1, i) for i in range(1, 5)])
op_hist = histogram(Dataset(dates, 'Date'), num_bins=4)
hist_data = {'Date': np.array(['2017-01-01T00:00:00.000000', '2017-01-01T17:59:59.999999',
'2017-01-02T12:00:00.000000', '2017-01-03T06:00:00.000000',
'2017-01-04T00:00:00.000000'], dtype='datetime64[us]'),
'Date_frequency': np.array([ 3.85802469e-18, 3.85802469e-18, 3.85802469e-18,
3.85802469e-18])}
hist = Histogram(hist_data, kdims='Date', vdims=('Date_frequency', 'Date Frequency'))
self.assertEqual(op_hist, hist)

def test_histogram_operation_datetime64(self):
dates = np.array([dt.datetime(2017, 1, i) for i in range(1, 5)]).astype('M')
op_hist = histogram(Dataset(dates, 'Date'), num_bins=4)
hist_data = {'Date': np.array(['2017-01-01T00:00:00.000000', '2017-01-01T17:59:59.999999',
'2017-01-02T12:00:00.000000', '2017-01-03T06:00:00.000000',
'2017-01-04T00:00:00.000000'], dtype='datetime64[us]'),
'Date_frequency': np.array([ 3.85802469e-18, 3.85802469e-18, 3.85802469e-18,
3.85802469e-18])}
hist = Histogram(hist_data, kdims='Date', vdims=('Date_frequency', 'Date Frequency'))
self.assertEqual(op_hist, hist)

@attr(optional=1) # Requires matplotlib
def test_histogram_operation_pd_period(self):
dates = pd.date_range('2017-01-01', '2017-01-04', freq='D').to_period('D')
op_hist = histogram(Dataset(dates, 'Date'), num_bins=4)
hist_data = {'Date': np.array(['2017-01-01T00:00:00.000000', '2017-01-01T17:59:59.999999',
'2017-01-02T12:00:00.000000', '2017-01-03T06:00:00.000000',
'2017-01-04T00:00:00.000000'], dtype='datetime64[us]'),
'Date_frequency': np.array([ 3.85802469e-18, 3.85802469e-18, 3.85802469e-18,
3.85802469e-18])}
hist = Histogram(hist_data, kdims='Date', vdims=('Date_frequency', 'Date Frequency'))
self.assertEqual(op_hist, hist)

def test_points_histogram_weighted(self):
points = Points([float(i) for i in range(10)])
op_hist = histogram(points, num_bins=3, weight_dimension='y')
Expand Down
27 changes: 26 additions & 1 deletion tests/plotting/bokeh/testhistogramplot.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import datetime as dt

import numpy as np

from holoviews.element import Image, Points
from holoviews.element import Image, Points, Dataset
from holoviews.operation import histogram

from bokeh.models import DatetimeAxis

from .testplot import TestBokehPlot, bokeh_renderer

Expand Down Expand Up @@ -47,3 +52,23 @@ def test_side_histogram_cmapper_weighted(self):
self.assertIs(main_plot.handles['color_mapper'],
top_plot.handles['color_mapper'])
self.assertEqual(main_plot.handles['color_dim'], img.vdims[0])

def test_histogram_datetime64_plot(self):
dates = np.array([dt.datetime(2017, 1, i) for i in range(1, 5)])
hist = histogram(Dataset(dates, 'Date'), num_bins=4)
plot = bokeh_renderer.get_plot(hist)
source = plot.handles['source']
data = {'top': np.array([ 3.85802469e-18, 3.85802469e-18, 3.85802469e-18, 3.85802469e-18]),
'left': np.array(['2017-01-01T00:00:00.000000', '2017-01-01T17:59:59.999999',
'2017-01-02T12:00:00.000000', '2017-01-03T06:00:00.000000'],
dtype='datetime64[us]'),
'right': np.array(['2017-01-01T17:59:59.999999', '2017-01-02T12:00:00.000000',
'2017-01-03T06:00:00.000000', '2017-01-04T00:00:00.000000'],
dtype='datetime64[us]')}
for k, v in data.items():
self.assertEqual(source.data[k], v)
xaxis = plot.handles['xaxis']
range_x = plot.handles['x_range']
self.assertIsInstance(xaxis, DatetimeAxis)
self.assertEqual(range_x.start, np.datetime64('2017-01-01T00:00:00.000000', 'us'))
self.assertEqual(range_x.end, np.datetime64('2017-01-04T00:00:00.000000', 'us'))
20 changes: 20 additions & 0 deletions tests/plotting/matplotlib/testhistogramplot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import datetime as dt

import numpy as np

from holoviews.element import Dataset
from holoviews.operation import histogram

from .testplot import TestMPLPlot, mpl_renderer


class TestCurvePlot(TestMPLPlot):

def test_histogram_datetime64_plot(self):
dates = np.array([dt.datetime(2017, 1, i) for i in range(1, 5)])
hist = histogram(Dataset(dates, 'Date'), num_bins=4)
plot = mpl_renderer.get_plot(hist)
artist = plot.handles['artist']
ax = plot.handles['axis']
self.assertEqual(ax.get_xlim(), (736330.0, 736333.0))
self.assertEqual([p.get_x() for p in artist.patches], [736330.0, 736330.75, 736331.5, 736332.25])