Skip to content

Commit

Permalink
Merge pull request pandas-dev#177 from manahl/issue-176
Browse files Browse the repository at this point in the history
Fix issue pandas-dev#176
  • Loading branch information
bmoscon authored Jul 19, 2016
2 parents c09d5bb + 10ba630 commit d9d2485
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* Bugfix: #169 Dtype mismatch in chunkstore updates
* Feature: #171 allow deleting of values within a date range in ChunkStore
* Bugfix: #172 Fix date range bug when querying dates in the middle of chunks
* Bugfix: #176 Fix overwrite failures in Chunkstore

### 1.25 (2016-05-23)

Expand Down
8 changes: 4 additions & 4 deletions arctic/chunkstore/chunkstore.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
import pymongo
import numpy as np
import bson
import ast

from bson.binary import Binary
Expand Down Expand Up @@ -134,7 +133,7 @@ def read(self, symbol, chunk_range=None, filter_data=True):

sym = self._get_symbol_info(symbol)
if not sym:
raise NoDataFoundException('No data found for %s in library %s' % (symbol, self._collection.get_name()))
raise NoDataFoundException('No data found for %s' % (symbol))

spec = {'symbol': symbol,
}
Expand Down Expand Up @@ -223,14 +222,15 @@ def write(self, symbol, item, chunk_size):
chunk['end'] = end
chunk['symbol'] = symbol
chunk['sha'] = checksum(symbol, chunk)

if chunk['sha'] not in previous_shas:
op = True
bulk.find({'symbol': symbol, 'sha': chunk['sha']},
bulk.find({'symbol': symbol, 'start': start, 'end': end},
).upsert().update_one({'$set': chunk})
else:
# already exists, dont need to update in mongo
previous_shas.remove(chunk['sha'])

if op:
bulk.execute()

Expand Down
84 changes: 84 additions & 0 deletions tests/integration/chunkstore/test_chunkstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from datetime import datetime as dt
from pandas.util.testing import assert_frame_equal, assert_series_equal
from arctic.date import DateRange
from arctic.exceptions import NoDataFoundException
import pandas as pd
import numpy as np
import random
Expand Down Expand Up @@ -41,6 +42,51 @@ def test_overwrite_dataframe(chunkstore_lib):
assert_frame_equal(dg, read_df)


def test_overwrite_dataframe_monthly(chunkstore_lib):
df = DataFrame(data={'data': [1, 2, 3, 4, 5, 6]},
index=MultiIndex.from_tuples([(dt(2016, 1, 5), 1),
(dt(2016, 2, 5), 1),
(dt(2016, 3, 5), 1),
(dt(2016, 4, 5), 1),
(dt(2016, 5, 5), 1),
(dt(2016, 6, 5), 1)],
names=['date', 'id'])
)

dg = DataFrame(data={'data': [1, 2, 3, 4, 5, 6]},
index=MultiIndex.from_tuples([(dt(2016, 1, 1), 1),
(dt(2016, 2, 2), 1),
(dt(2016, 3, 3), 1),
(dt(2016, 4, 4), 1),
(dt(2016, 5, 5), 1),
(dt(2016, 6, 6), 1)],
names=['date', 'id'])
)
chunkstore_lib.write('test_df', df, 'M')
chunkstore_lib.write('test_df', dg, 'M')
read_df = chunkstore_lib.read('test_df')
assert_frame_equal(dg, read_df)


def test_overwrite_series(chunkstore_lib):
s = pd.Series([1], index=pd.date_range('2016-01-01',
'2016-01-01',
name='date'),
name='vals')

chunkstore_lib.write('test', s, 'D')
chunkstore_lib.write('test', s + 1, 'D')
assert_series_equal(chunkstore_lib.read('test'), s + 1)


def test_overwrite_series_monthly(chunkstore_lib):
s = pd.Series([1, 2], index=pd.Index(data=[dt(2016, 1, 1), dt(2016, 2, 1)], name='date'), name='vals')

chunkstore_lib.write('test', s, 'M')
chunkstore_lib.write('test', s + 1, 'M')
assert_series_equal(chunkstore_lib.read('test'), s + 1)


def test_write_read_with_daterange(chunkstore_lib):
df = DataFrame(data={'data': [1, 2, 3]},
index=MultiIndex.from_tuples([(dt(2016, 1, 1), 1),
Expand Down Expand Up @@ -686,3 +732,41 @@ def test_read_chunk_range(chunkstore_lib):

df2 = chunkstore_lib.read('test', chunk_range=DateRange(None, None))
assert_frame_equal(df, df2)


def test_read_data_doesnt_exist(chunkstore_lib):
with pytest.raises(NoDataFoundException) as e:
chunkstore_lib.read('some_data')
assert('No data found' in str(e))


def test_invalid_type(chunkstore_lib):
with pytest.raises(Exception) as e:
chunkstore_lib.write('some_data', str("Cannot write a string"), 'D')
assert('Can only chunk Series and DataFrames' in str(e))


def test_append_no_data(chunkstore_lib):
with pytest.raises(NoDataFoundException) as e:
chunkstore_lib.append('some_data', "")
assert('Symbol does not exist.' in str(e))


def test_append_no_new_data(chunkstore_lib):
df = DataFrame(data={'data': [1, 2, 3, 4, 5, 6, 7, 8, 9]},
index=MultiIndex.from_tuples([(dt(2016, 1, 1), 1),
(dt(2016, 1, 2), 1),
(dt(2016, 1, 3), 1),
(dt(2016, 2, 1), 1),
(dt(2016, 2, 2), 1),
(dt(2016, 2, 3), 1),
(dt(2016, 3, 1), 1),
(dt(2016, 3, 2), 1),
(dt(2016, 3, 3), 1)],
names=['date', 'id'])
)

chunkstore_lib.write('test', df, 'D')
chunkstore_lib.append('test', df)
r = chunkstore_lib.read('test')
assert_frame_equal(df, r)

0 comments on commit d9d2485

Please sign in to comment.