Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding reversed scanner, and new compat mode 0.98 #155

Merged
merged 2 commits into from
Jan 31, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions happybase/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

STRING_OR_BINARY = (six.binary_type, six.text_type)

COMPAT_MODES = ('0.90', '0.92', '0.94', '0.96')
COMPAT_MODES = ('0.90', '0.92', '0.94', '0.96', '0.98')
THRIFT_TRANSPORTS = dict(
buffered=TBufferedTransport,
framed=TFramedTransport,
Expand All @@ -33,7 +33,7 @@
DEFAULT_HOST = 'localhost'
DEFAULT_PORT = 9090
DEFAULT_TRANSPORT = 'buffered'
DEFAULT_COMPAT = '0.96'
DEFAULT_COMPAT = '0.98'
DEFAULT_PROTOCOL = 'binary'


Expand Down
27 changes: 24 additions & 3 deletions happybase/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def cells(self, row, column, versions=None, timestamp=None,
def scan(self, row_start=None, row_stop=None, row_prefix=None,
columns=None, filter=None, timestamp=None,
include_timestamp=False, batch_size=1000, scan_batching=None,
limit=None, sorted_columns=False):
limit=None, sorted_columns=False, reverse=False):
"""Create a scanner for data in the table.

This method returns an iterable that can be used for looping over the
Expand Down Expand Up @@ -268,6 +268,11 @@ def scan(self, row_start=None, row_stop=None, row_prefix=None,
by this scanner will be retrieved in sorted order, and the data
will be stored in `OrderedDict` instances.

If `reverse` is `True`, the scanner will perform the scan in reverse.
This means that `row_start` must be lexicographically after `row_stop`.
Note that the start of the range is inclusive, while the end is
exclusive just as in the forward scan.

**Compatibility notes:**

* The `filter` argument is only available when using HBase 0.92
Expand All @@ -277,6 +282,12 @@ def scan(self, row_start=None, row_stop=None, row_prefix=None,
* The `sorted_columns` argument is only available when using
HBase 0.96 (or up).

* The `reverse` argument is only available when using HBase 0.98
(or up).

.. versionadded:: TODO
`reverse` argument

.. versionadded:: 0.8
`sorted_columns` argument

Expand All @@ -294,6 +305,7 @@ def scan(self, row_start=None, row_stop=None, row_prefix=None,
:param bool scan_batching: server-side scan batching (optional)
:param int limit: max number of rows to return
:param bool sorted_columns: whether to return sorted columns
:param bool reverse: whether to perform scan in reverse

:return: generator yielding the rows matching the scan
:rtype: iterable of `(row_key, row_data)` tuples
Expand All @@ -311,14 +323,22 @@ def scan(self, row_start=None, row_stop=None, row_prefix=None,
raise NotImplementedError(
"'sorted_columns' is only supported in HBase >= 0.96")

if reverse and self.connection.compat < '0.98':
raise NotImplementedError(
"'reverse' is only supported in HBase >= 0.98")

if row_prefix is not None:
if row_start is not None or row_stop is not None:
raise TypeError(
"'row_prefix' cannot be combined with 'row_start' "
"or 'row_stop'")

row_start = row_prefix
row_stop = bytes_increment(row_prefix)
if reverse:
row_start = bytes_increment(row_prefix)
row_stop = row_prefix
else:
row_start = row_prefix
row_stop = bytes_increment(row_prefix)

if row_start is None:
row_start = ''
Expand Down Expand Up @@ -376,6 +396,7 @@ def scan(self, row_start=None, row_stop=None, row_prefix=None,
filterString=filter,
batchSize=scan_batching,
sortColumns=sorted_columns,
reversed=reverse,
)
scan_id = self.connection.client.scannerOpenWithScan(
self.name, scan, {})
Expand Down
30 changes: 29 additions & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

HAPPYBASE_HOST = os.environ.get('HAPPYBASE_HOST')
HAPPYBASE_PORT = os.environ.get('HAPPYBASE_PORT')
HAPPYBASE_COMPAT = os.environ.get('HAPPYBASE_COMPAT', '0.96')
HAPPYBASE_COMPAT = os.environ.get('HAPPYBASE_COMPAT', '0.98')
HAPPYBASE_TRANSPORT = os.environ.get('HAPPYBASE_TRANSPORT', 'buffered')
KEEP_TABLE = ('HAPPYBASE_NO_CLEANUP' in os.environ)

Expand Down Expand Up @@ -446,6 +446,34 @@ def test_scan_sorting():
list(row.items()))


def test_scan_reverse():

if connection.compat < '0.98':
with assert_raises(NotImplementedError):
list(table.scan(reverse=True))
return

with table.batch() as b:
for i in range(2000):
b.put(('row-scan-reverse-%04d' % i).encode('ascii'),
{b'cf1:col1': b'v1',
b'cf1:col2': b'v2'})

scan = table.scan(row_prefix=b'row-scan-reverse', reverse=True)
assert_equal(2000, len(list(scan)))

scan = table.scan(limit=10, reverse=True)
assert_equal(10, len(list(scan)))

scan = table.scan(row_start=b'row-scan-reverse-1999',
row_stop=b'row-scan-reverse-0000', reverse=True)
key, data = next(scan)
assert_equal(b'row-scan-reverse-1999', key)

key, data = list(scan)[-1]
assert_equal(b'row-scan-reverse-0001', key)


def test_scan_filter_and_batch_size():
# See issue #54 and #56
filter = b"SingleColumnValueFilter ('cf1', 'qual1', =, 'binary:val1')"
Expand Down