Skip to content

Commit

Permalink
Merge pull request pandas-dev#326 from manahl/speed_up_low_freq_reads
Browse files Browse the repository at this point in the history
MDP-1754 Improve the memory and cpu performance of reading a subset of columns
  • Loading branch information
jamesblackburn authored Feb 22, 2017
2 parents 712288f + 1d52fef commit 981bc7a
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 11 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
### 1.38
* Bugfix: #324 Datetime indexes must be sorted in chunkstore

* Feature: #290 improve performance of tickstore column reads

### 1.37 (2017-1-31)
* Bugfix: #300 to_datetime deprecated in pandas, use to_pydatetime instead
* Bugfix: #309 formatting change for DateRange ```__str__```
Expand Down
20 changes: 18 additions & 2 deletions arctic/tickstore/tickstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,10 +414,25 @@ def _read_bucket(self, doc, column_set, column_dtypes, include_symbol, include_i
raise ArcticException("Unhandled document version: %s" % doc[VERSION])
rtn[INDEX] = np.cumsum(np.fromstring(lz4.decompress(doc[INDEX]), dtype='uint64'))
doc_length = len(rtn[INDEX])
rtn_length = len(rtn[INDEX])
column_set.update(doc[COLUMNS].keys())

# get the mask for the columns we're about to load
union_mask = np.zeros((doc_length + 7) // 8, dtype='uint8')
for c in column_set:
try:
coldata = doc[COLUMNS][c]
mask = np.fromstring(lz4.decompress(coldata[ROWMASK]), dtype='uint8')
union_mask = union_mask | mask
except KeyError:
rtn[c] = None
union_mask = np.unpackbits(union_mask)[:doc_length].astype('bool')
rtn_length = np.sum(union_mask)

rtn[INDEX] = rtn[INDEX][union_mask]
if include_symbol:
rtn['SYMBOL'] = [doc[SYMBOL], ] * rtn_length
column_set.update(doc[COLUMNS].keys())

# Unpack each requested column in turn
for c in column_set:
try:
coldata = doc[COLUMNS][c]
Expand All @@ -427,6 +442,7 @@ def _read_bucket(self, doc, column_set, column_dtypes, include_symbol, include_i
rtn[c] = self._empty(rtn_length, dtype=column_dtypes[c])
rowmask = np.unpackbits(np.fromstring(lz4.decompress(coldata[ROWMASK]),
dtype='uint8'))[:doc_length].astype('bool')
rowmask = rowmask[union_mask]
rtn[c][rowmask] = values
except KeyError:
rtn[c] = None
Expand Down
15 changes: 6 additions & 9 deletions tests/integration/tickstore/test_ts_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@ def test_read_allow_secondary(tickstore_lib):

def test_read_symbol_as_column(tickstore_lib):
data = [{'ASK': 1545.25,
'index': 1185076787070},
{'CUMVOL': 354.0,
'index': 1185141600600}]
'index': 1185076787070},
{'CUMVOL': 354.0,
'index': 1185141600600}]
tickstore_lib.write('FEED::SYMBOL', data)

df = tickstore_lib.read('FEED::SYMBOL', columns=['SYMBOL'])
df = tickstore_lib.read('FEED::SYMBOL', columns=['SYMBOL', 'CUMVOL'])
assert all(df['SYMBOL'].values == ['FEED::SYMBOL'])


Expand Down Expand Up @@ -596,15 +596,12 @@ def test_read_with_image(tickstore_lib):
# Read one column from the updates
df = tickstore_lib.read('SYM', columns=('a',), date_range=dr, include_images=True)
assert set(df.columns) == set(('a',))
assert_array_equal(df['a'].values, np.array([37, 1, np.nan]))
assert_array_equal(df['a'].values, np.array([37, 1]))
assert df.index[0] == dt(2013, 1, 1, 10, tzinfo=mktz('Europe/London'))
assert df.index[1] == dt(2013, 1, 1, 11, tzinfo=mktz('Europe/London'))
assert df.index[2] == dt(2013, 1, 1, 12, tzinfo=mktz('Europe/London'))

# Read just the image column
df = tickstore_lib.read('SYM', columns=['c'], date_range=dr, include_images=True)
assert set(df.columns) == set(['c'])
assert_array_equal(df['c'].values, np.array([2, np.nan, np.nan]))
assert_array_equal(df['c'].values, np.array([2]))
assert df.index[0] == dt(2013, 1, 1, 10, tzinfo=mktz('Europe/London'))
assert df.index[1] == dt(2013, 1, 1, 11, tzinfo=mktz('Europe/London'))
assert df.index[2] == dt(2013, 1, 1, 12, tzinfo=mktz('Europe/London'))

0 comments on commit 981bc7a

Please sign in to comment.