Merge pull request #78 from mli/master

update kvstore doc
apache · Sep 15, 2015 · 41f8b07 · 41f8b07
2 parents c926bb1 + b42da79
commit 41f8b07
Show file tree

Hide file tree

Showing 7 changed files with 317 additions and 85 deletions.
diff --git a/doc/python/python_api.md b/doc/python/python_api.md
@@ -14,13 +14,20 @@
     :members:
 ```
 
-
 ## Executor API
+
 ```eval_rst
 .. automodule:: mxnet.executor
     :members:
 ```
 
+## KVStore API
+
+```eval_rst
+.. automodule:: mxnet.kvstore
+    :members:
+```
+
 ## IO API
 
 ```eval_rst

diff --git a/doc/python/python_guide.md b/doc/python/python_guide.md
@@ -1,19 +1,20 @@
 # MXNet Python Guide
 
-This page gives a general overvie of MXNet python package. MXNet contains a
-mixed flavor of elements you might need to bake flexible and efficient
-applications. There are mainly three concepts in MXNet:
+This page gives a general overview of MXNet's python package. MXNet contains a
+mixed flavor of elements to bake flexible and efficient
+applications. There are mainly three concepts:
 
-* Numpy style [NDArray](#ndarray-numpy-style-tensor-computations-on-cpu-gpu) offers matrix and tensor computations on both CPU and
-GPU, with automatic parallelization
+* Numpy style [NDArray](#ndarray-numpy-style-tensor-computations-on-cpus-and-gpus)
+  offers matrix and tensor computations on both CPU and GPU, with automatic
+  parallelization
 
-* [Symbol](#symbolic-and-automatic-differentiation) makes defining a neural network extremely easy, and it provides
-  automatic differentiation.
+* [Symbol](#symbolic-and-automatic-differentiation) makes defining a neural
+  network extremely easy, and provides automatic differentiation.
 
-* [KVStore](#distributed-key-value-store) allows data synchronization between
-  multi-GPUs and multi-machine easily
+* [KVStore](#distributed-key-value-store) easy the data synchronization between
+  multi-GPUs and multi-machines.
 
-## NDArray: Numpy style tensor computations on CPU/GPU
+## NDArray: Numpy style tensor computations on CPUs and GPUs
 
 `NDArray` is the basic operation unit in MXNet for matrix and tensor
 computations. It is similar to `numpy.ndarray`, but with two additional
@@ -375,14 +376,133 @@ greater flexiblity.
 
 ## Distributed Key-value Store
 
+`KVStore` is a place for data sharing. We can think it as a single object shared
+across different devices (GPUs and machines), where each device can push data in
+and pull data out.
+
+### Initialization
+
+Let's first consider a simple example. It initializes
+a (`int`, `NDAarray`) pair into the store, and then pull the value out.
+
+```python
+>>> mx.kv.start() # start the kvstore
+>>> shape = (2,3)
+>>> mx.kv.init(3, mx.nd.ones(shape)*2)
+>>> a = mx.nd.zeros(shape)
+>>> mx.kv.pull(3, out = a)
+>>> print a.asnumpy()
+[[ 2.  2.  2.]
+ [ 2.  2.  2.]]
+```
+
+### Push, Aggregation, and Updater
+
+For any key has been initialized, we can push a new value with the same shape to the key.
+
+```python
+>>> mx.kv.push(3, mx.nd.ones(shape)*8)
+>>> mx.kv.pull(3, out = a) # pull out the value
+>>> print a.asnumpy()
+[[ 8.  8.  8.]
+ [ 8.  8.  8.]]
+```
+
+The data for pushing can be on any device. Furthermore, we can push multiple
+values into the same key, where `kvstore` will first sum all these
+values and then push the aggregated value.
+
+```python
+>>> gpus = [mx.gpu(i) for i in range(4)]
+>>> b = [mx.nd.ones(shape, gpu) for gpu in gpus]
+>>> mx.kv.push(3, b)
+>>> mx.kv.pull(3, out = a)
+>>> print a.asnumpy()
+[[ 4.  4.  4.]
+ [ 4.  4.  4.]]
+```
+
+For each push, `kvstore` applies the pushed value into the value stored by a
+`updater`. The default updater is `ASSGIN`, we can replace the default one to
+control how data is merged.
+
+```python
+>>> def update(key, input, stored):
+>>>     print "update on key: %d" % key
+>>>     stored += input * 2
+>>> mx.kv.set_updater(update)
+>>> mx.kv.pull(3, out=a)
+>>> print a.asnumpy()
+[[ 4.  4.  4.]
+ [ 4.  4.  4.]]
+>>> mx.kv.push(3, mx.nd.ones(shape))
+update on key: 3
+>>> mx.kv.pull(3, out=a)
+>>> print a.asnumpy()
+[[ 6.  6.  6.]
+ [ 6.  6.  6.]]
+```
+
+### Pull
+
+We already see how to pull a single key-value pair. Similar to push, we can also
+pull the value into several devices by a single call.
+
+```python
+>>> b = [mx.nd.ones(shape, gpu) for gpu in gpus]
+>>> mx.kv.pull(3, out = b)
+>>> print b[1].asnumpy()
+[[ 6.  6.  6.]
+ [ 6.  6.  6.]]
+```
+
+### Handle a list of key-value pairs
+
+All operations introduced so far are about a single key. `KVStore` also provides
+the interface for a list of key-value pairs. For single device:
+
+```python
+>>> keys = [5, 7, 9]
+>>> mx.kv.init(keys, [mx.nd.ones(shape)]*len(keys))
+>>> mx.kv.push(keys, [mx.nd.ones(shape)]*len(keys))
+update on key: 5
+update on key: 7
+update on key: 9
+>>> b = [mx.nd.zeros(shape)]*len(keys)
+>>> mx.kv.pull(keys, out = b)
+>>> print b[1].asnumpy()
+[[ 3.  3.  3.]
+ [ 3.  3.  3.]]
+```
+
+For multi-devices:
+
+```pythoon
+>>> b = [[mx.nd.ones(shape, gpu) for gpu in gpus]] * len(keys)
+>>> mx.kv.push(keys, b)
+update on key: 5
+update on key: 7
+update on key: 9
+>>> mx.kv.pull(keys, out = b)
+>>> print b[1][1].asnumpy()
+[[ 11.  11.  11.]
+ [ 11.  11.  11.]]
+```
+
+### Multiple machines
+
+Base on parameter server. The `updater` will runs on the server nodes. MORE...
+
 ## How to Choose between APIs
 
 You can mix them all as much as you like. Here are some guidelines
 * Use Symbolic API and coarse grained operator to create established structure.
 * Use fine-grained operator to extend parts of of more flexible symbolic graph.
 * Do some dynamic NArray tricks, which are even more flexible, between the calls of forward and backward of executors.
 
-We believe that different ways offers you different levels of flexibilty and efficiency. Normally you do not need to
-be flexible in all parts of the networks, so we allow you to use the fast optimized parts,
-and compose it flexibly with fine-grained operator or dynamic NArray. We believe such kind of mixture allows you to build
-the deep learning architecture both efficiently and flexibly as your choice. To mix is to maximize the peformance and flexiblity.
+We believe that different ways offers you different levels of flexibilty and
+efficiency. Normally you do not need to be flexible in all parts of the
+networks, so we allow you to use the fast optimized parts, and compose it
+flexibly with fine-grained operator or dynamic NArray. We believe such kind of
+mixture allows you to build the deep learning architecture both efficiently and
+flexibly as your choice. To mix is to maximize the peformance and flexiblity.
diff --git a/example/cifar10/cifar10_multi_gpus.py b/example/cifar10/cifar10_multi_gpus.py
@@ -10,7 +10,7 @@
 # use multiple devices
 num_devs = 4
 devs = [mx.gpu(i) for i in range(num_devs)]
-mx.kvstore.start()
+mx.kv.start()
 
 # define the network
 conv_cnt = 1
@@ -113,7 +113,7 @@ def momentum_update(key, grad, weight):
 
 updater = momentum(
     learning_rate = .05, weight_decay = .0001, momentum = 0.9)
-mx.kvstore.set_updater(updater)
+mx.kv.set_updater(updater)
 
 # infer shape
 batch_size = 196
@@ -142,7 +142,7 @@ def momentum_update(key, grad, weight):
         val[:] = np.random.uniform(-0.1, 0.1, shape)
     elif "gamma" in param_names[idx]:
         val[:] = 1.0
-    mx.kvstore.init(idx, val)
+    mx.kv.init(idx, val)
 
 # data reader
 get_data.GetCifar10()
@@ -203,7 +203,7 @@ def train():
         for data, label in train_dataiter:
             tic = time.time()
             # pull weight
-            mx.kvstore.pull(sync_indices, out = sync_weights)
+            mx.kv.pull(sync_indices, out = sync_weights)
 
             # forward and backword
             data = data.asnumpy()
@@ -221,7 +221,7 @@ def train():
                     g /= batch_size
 
             # push gradient
-            mx.kvstore.push(sync_indices, sync_grads)
+            mx.kv.push(sync_indices, sync_grads)
 
             # evaluate
             for d in range(num_devs):

diff --git a/example/mnist/mlp_multi_gpu.py b/example/mnist/mlp_multi_gpu.py
@@ -3,14 +3,14 @@
 import numpy as np
 import os, gzip
 import sys
-sys.path.append("../../tests/python")
+sys.path.append("../../tests/python/common")
 import get_data
 import time
 
 # use multiple devices
 num_devs = 4
-devs = [mx.Context('gpu', i) for i in range(num_devs)]
-mx.kvstore.start()
+devs = [mx.Context('cpu', i) for i in range(num_devs)]
+mx.kv.start()
 
 # symbol net
 data = mx.symbol.Variable('data')
@@ -26,7 +26,7 @@
 def updater(key, grad, weight):
     weight -= lr * grad / batch_size
 
-mx.kvstore.set_updater(updater)
+mx.kv.set_updater(updater)
 
 # find the params needed to be synchronized between devices
 param_names = mlp.list_arguments()
@@ -39,14 +39,14 @@ def updater(key, grad, weight):
 input_shape = (batch_size / num_devs, 784)
 param_shapes, out_shapes, aux_shapes  = mlp.infer_shape(data=input_shape)
 
-# init param in the kvstore
+# init param in the kv
 np.random.seed(0)
 for idx in sync_indices:
     shape = param_shapes[idx]
     val = mx.nd.zeros(shape)
     if "weight" in param_names[idx]:
         val[:] = np.random.uniform(-0.07, 0.07, shape)
-    mx.kvstore.init(idx, val)
+    mx.kv.init(idx, val)
 
 # allocate device's memory
 params = [[mx.nd.zeros(s, d) for s in param_shapes] for d in devs]
@@ -86,7 +86,7 @@ def run_sgd():
         for data, label in train_dataiter:
             # pull weight
             for idx in sync_indices:
-                mx.kvstore.pull(idx, out = [p[idx] for p in params])
+                mx.kv.pull(idx, out = [p[idx] for p in params])
 
             # forward and backward
             data = data.asnumpy()
@@ -100,7 +100,7 @@ def run_sgd():
                 executors[d].backward()
             # push gradient
             for idx in sync_indices:
-                mx.kvstore.push(idx, [g[idx] for g in grads])
+                mx.kv.push(idx, [g[idx] for g in grads])
 
             # eval
             for d in range(num_devs):

diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py
@@ -12,7 +12,7 @@
 from .base import MXNetError
 from . import ndarray
 from . import symbol
-from . import kvstore
+from . import kvstore as kv
 from . import io
 # use mx.nd as short for mx.ndarray
 from . import ndarray as nd