Skip to content

Commit

Permalink
python 3.8 external pins pandas<1.1.0, numpy<1.19.0 (#962)
Browse files Browse the repository at this point in the history
  • Loading branch information
dunckerr authored Sep 3, 2022
1 parent cf1f019 commit 7c4b378
Show file tree
Hide file tree
Showing 13 changed files with 64 additions and 48 deletions.
3 changes: 2 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
## Changelog

### 1.80.5
### 1.80.5 (2022-09-05)
* Feature: #950 remove all traces of python 2.7 and six package
* Feature: #959 add python 3.8 support
* Feature: #962 pin pandas<1.1.0, numpy<1.19.0

### 1.80.4 (2022-01-25)
* Bugfix: #940 fix rows per chunk causing divide by zero
Expand Down
13 changes: 5 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

[![Documentation Status](https://readthedocs.org/projects/arctic/badge/?version=latest)](https://arctic.readthedocs.io/en/latest/?badge=latest)
[![CircleCI](https://circleci.com/gh/man-group/arctic/tree/master.svg?style=shield)](https://app.circleci.com/pipelines/github/man-group/arctic?branch=master)
[![PyPI](https://img.shields.io/pypi/v/arctic)](https://pypi.org/project/arctic/)
[![PyPI](https://img.shields.io/pypi/v/arctic)](https://pypi.org/project/arctic)
[![Python](https://img.shields.io/badge/Python-3.6|3.7|3.8-green.svg)](https://github.com/man-group/arctic)
[![Join the chat at https://gitter.im/man-group/arctic](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/man-group/arctic?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)

Expand All @@ -19,9 +19,6 @@ Arctic has been under active development at [Man Group](https://www.man.com/) si

---


:loudspeaker::loudspeaker::loudspeaker: **BUILDING THE NEXT GENERATION OF ARCTIC** :loudspeaker::loudspeaker::loudspeaker:

## Man Group is currently building the next generation of Arctic.

This will offer the same intuitive Python-centric API whilst utilizing a custom C++ storage engine and modern S3 compatible object storage to provide a timeseries database that is:
Expand Down Expand Up @@ -141,10 +138,10 @@ Arctic storage implementations are **pluggable**. VersionStore is the default.

Arctic currently works with:

* Python 3.6, 3.7
* pymongo >= 3.6.0 <= 3.11.0
* Pandas >= 0.22.0 <= 1.0.3
* MongoDB >= 2.4.x <= 4.2.8
* python 3.6, 3.7, 3.8
* pymongo >= 3.6.0 <= 3.11.0
* pandas >= 0.22.0 <1.1.0
* MongoDB >= 2.4.x <= 4.2.8


Operating Systems:
Expand Down
5 changes: 3 additions & 2 deletions arctic/date/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import datetime
import sys
from datetime import timedelta

import pandas as pd

from ._daterange import DateRange
Expand Down Expand Up @@ -168,11 +167,13 @@ def datetime_to_ms(d):
try:
millisecond = d.microsecond // 1000

# python3.8 workaround https://github.com/pandas-dev/pandas/issues/32174
# https://github.com/pandas-dev/pandas/issues/32526
# https://github.com/pandas-dev/pandas/issues/32174
if sys.version_info < (3, 8, 0):
return calendar.timegm(_add_tzone(d).utctimetuple()) * 1000 + millisecond
else:
tmp = _add_tzone(d)
# convert to Datetime seems to be the only reliable option
if isinstance(tmp, pd.Timestamp):
return calendar.timegm(tmp.to_pydatetime().utctimetuple()) * 1000 + millisecond
else:
Expand Down
8 changes: 7 additions & 1 deletion arctic/serialization/numpy_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,13 @@ def _index_from_records(self, recarr):
if len(index) == 1:
rtn = Index(np.copy(recarr[str(index[0])]), name=index[0])
if isinstance(rtn, DatetimeIndex) and 'index_tz' in recarr.dtype.metadata:
rtn = rtn.tz_localize('UTC').tz_convert(recarr.dtype.metadata['index_tz'])
if PD_VER >= '1.0.4':
if isinstance(recarr.dtype.metadata['index_tz'], list):
rtn = rtn.tz_localize('UTC').tz_convert(recarr.dtype.metadata['index_tz'][0])
else:
rtn = rtn.tz_localize('UTC').tz_convert(recarr.dtype.metadata['index_tz'])
else:
rtn = rtn.tz_localize('UTC').tz_convert(recarr.dtype.metadata['index_tz'])
else:
level_arrays = []
index_tz = recarr.dtype.metadata.get('index_tz', [])
Expand Down
2 changes: 1 addition & 1 deletion arctic/store/_pickle_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def write(self, arctic_lib, version, symbol, item, _previous_version):
version['blob'] = _MAGIC_CHUNKEDV2
# Python 3.8 onwards uses protocol 5 which cannot be unpickled in Python versions below that, so limiting
# it to use a maximum of protocol 4 in Python which is understood by 3.4 onwards and is still fairly efficient.
# The min() used to allow lower versions to be used in py2 (which supported a max of 2)
# pickle version 4 is introduced with python 3.4 and default with 3.8 onward
pickle_protocol = min(pickle.HIGHEST_PROTOCOL, 4)
pickled = pickle.dumps(item, protocol=pickle_protocol)

Expand Down
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
[metadata]
name = arctic

[pycodestyle]
ignore = E122,E126,E501,E731,W503,E722,W504
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def run_tests(self):
name="arctic",
version="1.80.5",
author="Man AHL Technology",
author_email="ManAHLTech@ahl.com",
author_email="arctic@man.com",
description=("AHL Research Versioned TimeSeries and Tick store"),
license="GPL",
keywords=["ahl", "keyvalue", "tickstore", "mongo", "timeseries", ],
Expand All @@ -69,15 +69,15 @@ def run_tests(self):
long_description='\n'.join((long_description, changelog)),
long_description_content_type="text/markdown",
cmdclass={'test': PyTest},
setup_requires=["numpy<=1.18.4",
setup_requires=["numpy<1.19.0",
"setuptools-git",
],
install_requires=["decorator",
"enum-compat",
"mock",
"mockextras",
"pandas<=1.0.3",
"numpy<=1.18.4",
"pandas<1.1.0",
"numpy<1.19.0",
"pymongo>=3.6.0, <= 3.11.0",
"pytz",
"tzlocal",
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/chunkstore/test_chunkstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from arctic.date import DateRange
from arctic.exceptions import NoDataFoundException
from tests.integration.chunkstore.test_utils import create_test_data
from tests.util import assert_frame_equal_
from tests.util import assert_frame_equal_, assert_series_equal_


def test_write_dataframe(chunkstore_lib):
Expand Down Expand Up @@ -714,7 +714,7 @@ def test_overwrite_series(chunkstore_lib):

chunkstore_lib.write('test', s)
chunkstore_lib.write('test', s + 1)
assert_series_equal(chunkstore_lib.read('test'), s + 1)
assert_series_equal_(chunkstore_lib.read('test'), s + 1, check_freq=False)


def test_overwrite_series_monthly(chunkstore_lib):
Expand Down
30 changes: 15 additions & 15 deletions tests/integration/store/test_pandas_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# Do not remove PandasStore, used in global scope
from arctic.store._pandas_ndarray_store import PandasDataFrameStore, PandasSeriesStore, PandasStore
from arctic.store.version_store import register_versioned_storage
from tests.util import assert_frame_equal_
from tests.util import assert_frame_equal_, assert_series_equal_

register_versioned_storage(PandasDataFrameStore)

Expand Down Expand Up @@ -851,30 +851,30 @@ def test_daterange_large_DataFrame_middle(library):

@pytest.mark.parametrize("df,assert_equal", [
(DataFrame(index=date_range(dt(2001, 1, 1), freq='D', periods=30000),
data=list(range(30000)), columns=['A']), assert_frame_equal),
data=list(range(30000)), columns=['A']), assert_frame_equal_),
(Series(index=date_range(dt(2001, 1, 1), freq='D', periods=30000),
data=range(30000)), assert_series_equal),
data=range(30000)), assert_series_equal_),
])
def test_daterange(library, df, assert_equal):
df.index.name = 'idx'
df.name = 'FOO'
library.write('MYARR', df)
# whole array
saved_arr = library.read('MYARR').data
assert_equal(df, saved_arr)
assert_equal(df, library.read('MYARR', date_range=DateRange(df.index[0])).data)
assert_equal(df, library.read('MYARR', date_range=DateRange(df.index[0], df.index[-1])).data)
assert_equal(df, library.read('MYARR', date_range=DateRange()).data)
assert_equal(df[df.index[10]:], library.read('MYARR', date_range=DateRange(df.index[10])).data)
assert_equal(df[:df.index[10]], library.read('MYARR', date_range=DateRange(end=df.index[10])).data)
assert_equal(df[df.index[-1]:], library.read('MYARR', date_range=DateRange(df.index[-1])).data)
assert_equal(df[df.index[-1]:], library.read('MYARR', date_range=DateRange(df.index[-1], df.index[-1])).data)
assert_equal(df[df.index[0]:df.index[0]], library.read('MYARR', date_range=DateRange(df.index[0], df.index[0])).data)
assert_equal(df[:df.index[0]], library.read('MYARR', date_range=DateRange(end=df.index[0])).data)
assert_equal(df, saved_arr, check_freq=False)
assert_equal(df, library.read('MYARR', date_range=DateRange(df.index[0])).data, check_freq=False)
assert_equal(df, library.read('MYARR', date_range=DateRange(df.index[0], df.index[-1])).data, check_freq=False)
assert_equal(df, library.read('MYARR', date_range=DateRange()).data, check_freq=False)
assert_equal(df[df.index[10]:], library.read('MYARR', date_range=DateRange(df.index[10])).data, check_freq=False)
assert_equal(df[:df.index[10]], library.read('MYARR', date_range=DateRange(end=df.index[10])).data, check_freq=False)
assert_equal(df[df.index[-1]:], library.read('MYARR', date_range=DateRange(df.index[-1])).data, check_freq=False)
assert_equal(df[df.index[-1]:], library.read('MYARR', date_range=DateRange(df.index[-1], df.index[-1])).data, check_freq=False)
assert_equal(df[df.index[0]:df.index[0]], library.read('MYARR', date_range=DateRange(df.index[0], df.index[0])).data, check_freq=False)
assert_equal(df[:df.index[0]], library.read('MYARR', date_range=DateRange(end=df.index[0])).data, check_freq=False)
assert_equal(df[df.index[0] - DateOffset(days=1):],
library.read('MYARR', date_range=DateRange(df.index[0] - DateOffset(days=1))).data)
library.read('MYARR', date_range=DateRange(df.index[0] - DateOffset(days=1))).data, check_freq=False)
assert_equal(df[df.index[-1] + DateOffset(days=1):],
library.read('MYARR', date_range=DateRange(df.index[-1] + DateOffset(days=1))).data)
library.read('MYARR', date_range=DateRange(df.index[-1] + DateOffset(days=1))).data, check_freq=False)
assert len(library.read('MYARR', date_range=DateRange(dt(1950, 1, 1), dt(1951, 1, 1))).data) == 0
assert len(library.read('MYARR', date_range=DateRange(dt(2091, 1, 1), dt(2091, 1, 1))).data) == 0

Expand Down
4 changes: 2 additions & 2 deletions tests/integration/store/test_version_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from tests.unit.serialization.serialization_test_data import _mixed_test_data
from ...util import read_str_as_pandas
from ..test_utils import enable_profiling_for_library
from tests.util import assert_frame_equal_
from tests.util import assert_frame_equal_, assert_series_equal_

ts1 = read_str_as_pandas(""" times | near
2012-09-08 17:06:11.040 | 1.0
Expand Down Expand Up @@ -1743,7 +1743,7 @@ def test_can_write_tz_aware_data_series(library, fw_pointers_cfg):
# Arctic converts by default the data to UTC, convert back
read_data = read_data.dt.tz_localize('UTC').dt.tz_convert(read_data.index.tzinfo)
assert library._versions.find_one({'symbol': 'symTzSer'})['type'] == PandasSeriesStore.TYPE
assert_series_equal(myseries, read_data)
assert_series_equal_(myseries, read_data, check_freq=False)


@pytest.mark.parametrize('write_cfg, read_cfg, append_cfg, reread_cfg', [
Expand Down
11 changes: 6 additions & 5 deletions tests/integration/tickstore/test_toplevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd
import pytest
from pandas.util.testing import assert_frame_equal
from tests.util import assert_frame_equal_

from arctic.date import DateRange, mktz
from arctic.exceptions import NoDataFoundException, LibraryNotFoundException, OverlappingDataException
Expand Down Expand Up @@ -62,7 +63,7 @@ def test_should_return_data_when_date_range_falls_in_a_single_underlying_library
tickstore_current.write('blah', df)
res = toplevel_tickstore.read('blah', DateRange(start=dt(2010, 1, 1), end=dt(2010, 1, 6)), list('ABCD'))

assert_frame_equal(df, res.tz_convert(mktz('Europe/London')))
assert_frame_equal_(df, res.tz_convert(mktz('Europe/London')), check_freq=False)


def test_should_return_data_when_date_range_spans_libraries(toplevel_tickstore, arctic):
Expand Down Expand Up @@ -98,7 +99,7 @@ def test_should_return_data_when_date_range_spans_libraries_even_if_one_returns_
tickstore_2011.write('blah', df_11)
res = toplevel_tickstore.read('blah', DateRange(start=dt(2010, 1, 2), end=dt(2011, 1, 4)), list('ABCD'))
expected_df = df_10[1:]
assert_frame_equal(expected_df, res.tz_convert(mktz('Europe/London')))
assert_frame_equal_(expected_df, res.tz_convert(mktz('Europe/London')), check_freq=False)


def test_should_add_underlying_library_where_none_exists(toplevel_tickstore, arctic):
Expand Down Expand Up @@ -194,10 +195,10 @@ def test_should_write_top_level_with_list_of_dicts(arctic):
expected = pd.DataFrame(np.arange(57, dtype=np.float64), index=dates, columns=list('a'))
toplevel_tickstore.write('blah', data)
res = toplevel_tickstore.read('blah', DateRange(start=dt(2010, 12, 1), end=dt(2011, 2, 1)), columns=list('a'))
assert_frame_equal(expected, res.tz_convert(mktz('Europe/London')))
assert_frame_equal_(expected, res.tz_convert(mktz('Europe/London')), check_freq=False)
lib2010 = arctic['FEED_2010.LEVEL1']
res = lib2010.read('blah', DateRange(start=dt(2010, 12, 1), end=dt(2011, 1, 1)))
assert_frame_equal(expected[dt(2010, 12, 1): dt(2010, 12, 31)], res.tz_convert(mktz('Europe/London')))
assert_frame_equal_(expected[dt(2010, 12, 1): dt(2010, 12, 31)], res.tz_convert(mktz('Europe/London')), check_freq=False)


def test_should_write_top_level_with_correct_timezone(arctic):
Expand All @@ -212,7 +213,7 @@ def test_should_write_top_level_with_correct_timezone(arctic):
expected = pd.DataFrame(np.arange(len(dates), dtype=np.float64), index=dates.tz_convert(utc), columns=list('a'))
toplevel_tickstore.write('blah', data)
res = toplevel_tickstore.read('blah', DateRange(start=dt(2010, 1, 1), end=dt(2011, 12, 31)), columns=list('a')).tz_convert(utc)
assert_frame_equal(expected, res)
assert_frame_equal_(expected, res, check_freq=False)
lib2010 = arctic['FEED_2010.LEVEL1']
# Check that only one point was written into 2010 being 3am on 31st
assert len(lib2010.read('blah', DateRange(start=dt(2010, 12, 1), end=dt(2011, 1, 1)))) == 1
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/store/test_pickle_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def test_write_object():

# Python 3.8 onwards uses protocol 5 which cannot be unpickled in Python versions below that, so limiting
# it to use a maximum of protocol 4 in Python which is understood by 3.4 onwards and is still fairly efficient.
# The min() used to allow lower versions to be used in py2 (which supported a max of 2)
# pickle version 4 is introduced with python 3.4 and default with 3.8 onward
pickle_protocol = min(4, pickle.HIGHEST_PROTOCOL)
assert coll.update_one.call_args_list == [call({'sha': checksum('sentinel.symbol', {'segment': 0, 'data': Binary(compress(pickle.dumps(sentinel.item, pickle_protocol)))}),
'symbol': 'sentinel.symbol'},
Expand Down
19 changes: 13 additions & 6 deletions tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,25 @@
import numpy as np
import pandas
from dateutil.rrule import rrule, DAILY
from pandas.util.testing import assert_frame_equal
from pandas.util.testing import assert_frame_equal, assert_series_equal


# check_freq default True added in pandas 1.1.0
def assert_frame_equal_(df1, df2, check_freq=True, check_names=True):
if pandas.__version__ > '1.0.3':
# had to add check_freq because pandas 1.1.5 has different freq metadata behaviour
if pandas.__version__ >= '1.1.0':
assert_frame_equal(df1.sort_index(axis=1), df2.sort_index(axis=1), check_names=check_names, check_freq=check_freq)
else:
# pandas 1.0.3
# python 3.x
assert_frame_equal(df1.sort_index(axis=1), df2.sort_index(axis=1), check_names=check_names)
#else: # 0.22.0 python 2.7
#assert_frame_equal(df1, df2, check_names=check_names)
#else: # python 2.7
#assert_frame_equal(df1, df2)

# check_freq default True added in pandas 1.1.0
def assert_series_equal_(s1, s2, check_freq=True):
if pandas.__version__ >= '1.1.0':
assert_series_equal(s1, s2, check_freq=check_freq)
else:
assert_series_equal(s1, s2)


def dt_or_str_parser(string):
Expand Down

0 comments on commit 7c4b378

Please sign in to comment.