Merge branch 'master' into stable

sripathikrishnan · Feb 26, 2017 · 49de5ee · 49de5ee
2 parents 28ccd5b + 1433f8f
commit 49de5ee
Show file tree

Hide file tree

Showing 21 changed files with 1,038 additions and 457 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,3 +9,4 @@ tests/dumps/dump_dealers_vins.rdb
 tests/dumps/dump_random_lists.rdb
 tests/dumps/dump_sorted_sets.rdb
 
+.idea/*
diff --git a/.travis.yml b/.travis.yml
@@ -2,6 +2,8 @@ language: python
 python:
   - "2.6"
   - "2.7"
+  - "3.4"
+  - "3.5"
 
 script: python run_tests
 
diff --git a/CHANGES b/CHANGES
@@ -1,11 +1,15 @@
-* 0.1.1
-    * Fixed lzf decompression
-    * Standard python project layout
-    * Python script to automatically create test RDB files
-    * Adds test cases
-    * Adds setup.py to easily install this library
-    * Adds MIT license
+* 0.1.9
+    * python 3 support
+    * rdb v8 (redis 4.0) support
+    * binary to string conversion fixes
+    * use ujson/cStringIO/python-lzf if they're available
+    * filter keys by size
+    * bugfixes parsing sorted sets
+    * fix setup.py dependancies and remove requirements.txt file
+
+* 0.1.8
+    * fix a crash in the memory profiler recently introduced.
+
+* 0.1.7
+    * rdb v7 (redis 3.2) support
 
-* 0.1.0
-    * Initial version
-    * Specification for RDB file format
diff --git a/README.md b/README.md
@@ -14,8 +14,7 @@ Rdbtools is written in Python, though there are similar projects in other langua
 
 Pre-Requisites : 
 
-1. python 2.x and pip.
-2. redis-py is optional and only needed to run test cases.
+1. redis-py is optional and only needed to run test cases.
 
 To install from PyPI (recommended) :
 
@@ -27,29 +26,83 @@ To install from source :
     cd redis-rdb-tools
     sudo python setup.py install
 
-## Converting dump files to JSON ##
+# Command line usage examples
+
+Every run of RDB Tool requires to specify a command to indicate what should be done with the parsed RDB data.
+Valid commands are: json, diff, justkeys, justkeyvals and protocol.
+
+JSON from a two database dump:
+
+    > rdb --command json /var/redis/6379/dump.rdb
 
-Parse the dump file and print the JSON on standard output
+    [{
+    "user003":{"fname":"Ron","sname":"Bumquist"},
+    "lizards":["Bush anole","Jackson's chameleon","Komodo dragon","Ground agama","Bearded dragon"],
+    "user001":{"fname":"Raoul","sname":"Duke"},
+    "user002":{"fname":"Gonzo","sname":"Dr"},
+    "user_list":["user003","user002","user001"]},{
+    "baloon":{"helium":"birthdays","medical":"angioplasty","weather":"meteorology"},
+    "armadillo":["chacoan naked-tailed","giant","Andean hairy","nine-banded","pink fairy"],
+    "aroma":{"pungent":"vinegar","putrid":"rotten eggs","floral":"roses"}}]
 
-    rdb --command json /var/redis/6379/dump.rdb
+## Filter parsed output
+
+Only process keys that match the regex, and only print key and values:
+
+    > rdb --command justkeyvals --key "user.*" /var/redis/6379/dump.rdb
+
+    user003 fname Ron,sname Bumquist,
+    user001 fname Raoul,sname Duke,
+    user002 fname Gonzo,sname Dr,
+    user_list user003,user002,user001
 
-Only process keys that match the regex
+Only process hashes starting with "a", in database 2:
 
-    rdb --command json --key "user.*" /var/redis/6379/dump.rdb
+    > rdb -c json --db 2 --type hash --key "a.*" /var/redis/6379/dump.rdb
 
-Only process hashes starting with "a", in database 2 
+    [{},{
+    "aroma":{"pungent":"vinegar","putrid":"rotten eggs","floral":"roses"}}]
+
+## Converting dump files to JSON ##
+
+The `json` command output is UTF-8 encoded JSON.
+By default, the callback try to parse RDB data using UTF-8 and escape non 'ASCII printable' characters with the `\U` notation, or non UTF-8 parsable bytes with `\x`.
+Attempting to decode RDB data can lead to binary data curroption, this can be avoided by using the `--escape raw` option.
+Another option, is to use `-e base64` for Base64 encoding of binary data.
+
+
+Parse the dump file and print the JSON on standard output:
+
+    > rdb -c json /var/redis/6379/dump.rdb
+
+    [{
+    "Citat":["B\u00e4ttre sent \u00e4n aldrig","Bra karl reder sig sj\u00e4lv","Man ska inte k\u00f6pa grisen i s\u00e4cken"],
+    "bin_data":"\\xFE\u0000\u00e2\\xF2"}]
+
+Parse the dump file to raw bytes and print the JSON on standard output:
 
-    rdb --command json --db 2 --type hash --key "a.*" /var/redis/6379/dump.rdb
+    > rdb -c json /var/redis/6379/dump.rdb --escape raw
 
+    [{
+    "Citat":["B\u00c3\u00a4ttre sent \u00c3\u00a4n aldrig","Bra karl reder sig sj\u00c3\u00a4lv","Man ska inte k\u00c3\u00b6pa grisen i s\u00c3\u00a4cken"],
+    "bin_data":"\u00fe\u0000\u00c3\u00a2\u00f2"}]
 
 ## Generate Memory Report ##
 
-Running with the  `-c memory` generates a CSV report with the approximate memory used by that key.
+Running with the  `-c memory` generates a CSV report with the approximate memory used by that key. `--bytes C` and `'--largest N` can be used to limit output to keys larger than C bytes, or the N largest keys.
 
-    rdb -c memory /var/redis/6379/dump.rdb > memory.csv
+    > rdb -c memory /var/redis/6379/dump.rdb --bytes 128 -f memory.csv
+	> cat memory.csv
 
+	database,type,key,size_in_bytes,encoding,num_elements,len_largest_element
+	0,list,lizards,241,quicklist,5,19
+	0,list,user_list,190,quicklist,3,7
+	2,hash,baloon,138,ziplist,3,11
+	2,list,armadillo,231,quicklist,5,20
+	2,hash,aroma,129,ziplist,3,11
 
-The generated CSV has the following columns - Database Number, Data Type, Key, Memory Used in bytes and Encoding. 
+
+The generated CSV has the following columns - Database Number, Data Type, Key, Memory Used in bytes and RDB Encoding type.
 Memory usage includes the key, the value and any other overheads.
 
 Note that the memory usage is approximate. In general, the actual memory used will be slightly higher than what is reported.
@@ -62,17 +115,13 @@ The memory report should help you detect memory leaks caused by your application
 
 Sometimes you just want to find the memory used by a particular key, and running the entire memory report on the dump file is time consuming.
 
-In such cases, you can use the `redis-memory-for-key` command
-
-Example :
+In such cases, you can use the `redis-memory-for-key` command:
 
-    redis-memory-for-key person:1
+    > redis-memory-for-key person:1
 
-    redis-memory-for-key -s localhost -p 6379 -a mypassword person:1
-
-Output :
+    > redis-memory-for-key -s localhost -p 6379 -a mypassword person:1
 
-    Key 			"person:1"
+    Key 			person:1
     Bytes				111
     Type				hash
     Encoding			ziplist
@@ -88,20 +137,20 @@ NOTE :
 
 First, use the --command diff option, and pipe the output to standard sort utility
 
-    rdb --command diff /var/redis/6379/dump1.rdb | sort > dump1.txt
-    rdb --command diff /var/redis/6379/dump2.rdb | sort > dump2.txt
+    > rdb --command diff /var/redis/6379/dump1.rdb | sort > dump1.txt
+    > rdb --command diff /var/redis/6379/dump2.rdb | sort > dump2.txt
 
 Then, run your favourite diff program
 
-    kdiff3 dump1.txt dump2.txt
+    > kdiff3 dump1.txt dump2.txt
 
-To limit the size of the files, you can filter on keys using the --key=regex option
+To limit the size of the files, you can filter on keys using the `--key` option
 
 ## Emitting Redis Protocol ##
 
-You can convert RDB file into a stream of [redis protocol](http://redis.io/topics/protocol) using the "protocol" command.
+You can convert RDB file into a stream of [redis protocol](http://redis.io/topics/protocol) using the `protocol` command.
 
-    rdb --command protocol /var/redis/6379/dump.rdb
+    > rdb --c protocol /var/redis/6379/dump.rdb
 
     *4
     $4
@@ -113,37 +162,49 @@ You can convert RDB file into a stream of [redis protocol](http://redis.io/topic
     $8
     Sripathi
 
-You can pipe the output to netcat and re-import a subset of the data. 
-For example, if you want to shard your data into two redis instances, you can use the --key flag to select a subset of data, 
+You can pipe the output to netcat and re-import a subset of the data.
+For example, if you want to shard your data into two redis instances, you can use the --key flag to select a subset of data,
 and then pipe the output to a running redis instance to load that data.
-
 Read [Redis Mass Insert](http://redis.io/topics/mass-insert) for more information on this.
 
-## Using the Parser ##
+When printing protocol output, the `--escape` option can be used with `printable` or `utf8` to avoid non printable/control characters.
+
+# Using the Parser ##
 
-    import sys
     from rdbtools import RdbParser, RdbCallback
+    from rdbtools.encodehelpers import bytes_to_unicode
 
-    class MyCallback(RdbCallback) :
-        ''' Simple example to show how callback works. 
+    class MyCallback(RdbCallback):
+        ''' Simple example to show how callback works.
             See RdbCallback for all available callback methods.
             See JsonCallback for a concrete example
-        ''' 
-        def set(self, key, value, expiry):
-            print('%s = %s' % (str(key), str(value)))
-        
+        '''
+
+        def __init__(self):
+            super(MyCallback, self).__init__(string_escape=None)
+
+        def encode_key(self, key):
+            return bytes_to_unicode(key, self._escape, skip_printable=True)
+
+        def encode_value(self, val):
+            return bytes_to_unicode(val, self._escape)
+
+        def set(self, key, value, expiry, info):
+            print('%s = %s' % (self.encode_key(key), self.encode_value(value)))
+
         def hset(self, key, field, value):
-            print('%s.%s = %s' % (str(key), str(field), str(value)))
-        
+            print('%s.%s = %s' % (self.encode_key(key), self.encode_key(field), self.encode_value(value)))
+
         def sadd(self, key, member):
-            print('%s has {%s}' % (str(key), str(member)))
-        
-        def rpush(self, key, value) :
-            print('%s has [%s]' % (str(key), str(value)))
-        
+            print('%s has {%s}' % (self.encode_key(key), self.encode_value(member)))
+
+        def rpush(self, key, value):
+            print('%s has [%s]' % (self.encode_key(key), self.encode_value(value)))
+
         def zadd(self, key, score, member):
             print('%s has {%s : %s}' % (str(key), str(member), str(score)))
 
+
     callback = MyCallback()
     parser = RdbParser(callback)
     parser.parse('/var/redis/6379/dump.rdb')

diff --git a/rdbtools/__init__.py b/rdbtools/__init__.py
@@ -1,10 +1,10 @@
 from rdbtools.parser import RdbCallback, RdbParser, DebugCallback
-from rdbtools.callbacks import JSONCallback, DiffCallback, ProtocolCallback
-from rdbtools.memprofiler import MemoryCallback, PrintAllKeys, StatsAggregator
+from rdbtools.callbacks import JSONCallback, DiffCallback, ProtocolCallback, KeyValsOnlyCallback, KeysOnlyCallback
+from rdbtools.memprofiler import MemoryCallback, PrintAllKeys, StatsAggregator, PrintJustKeys
 
-__version__ = '0.1.8'
+__version__ = '0.1.9'
 VERSION = tuple(map(int, __version__.split('.')))
 
 __all__ = [
-    'RdbParser', 'RdbCallback', 'JSONCallback', 'DiffCallback', 'MemoryCallback', 'ProtocolCallback', 'PrintAllKeys']
+    'RdbParser', 'RdbCallback', 'JSONCallback', 'DiffCallback', 'MemoryCallback', 'ProtocolCallback', 'KeyValsOnlyCallback', 'KeysOnlyCallback', 'PrintJustKeys']