diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000000..85feb30b6a
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,6 @@
+[report]
+omit =  
+    */python?.?/*
+    */site-packages/nose/*
+    *__init__*
+    *test/*
diff --git a/.gitignore b/.gitignore
index e1272e2367..0d47938e13 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,19 +1,162 @@
-*.pyc
-*~
+.nicesetup
+
 client.cfg
-build
-dist
-luigi.egg-info
+
+hadoop_test.py
+minicluster.py
+mrrunner.py
+
 packages.tar
+
 test/data
-hadoop_test.py
-.nicesetup
-.tox
+
+Vagrantfile
+
 *.pickle
 *.rej
 *.orig
-.DS_Store
-.idea/
+
+
+# Created by https://www.gitignore.io
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+
+### Vim ###
+[._]*.s[a-w][a-z]
+[._]s[a-w][a-z]
+*.un~
+Session.vim
+.netrwhist
+*~
+
+
+### PyCharm ###
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm
+
 *.iml
-Vagrantfile
+
+## Directory-based project format:
+.idea/
+# if you remove the above rule, at least ignore the following:
+
+# User-specific stuff:
+# .idea/workspace.xml
+# .idea/tasks.xml
+# .idea/dictionaries
+
+# Sensitive or high-churn files:
+# .idea/dataSources.ids
+# .idea/dataSources.xml
+# .idea/sqlDataSources.xml
+# .idea/dynamic.xml
+# .idea/uiDesigner.xml
+
+# Gradle:
+# .idea/gradle.xml
+# .idea/libraries
+
+# Mongo Explorer plugin:
+# .idea/mongoSettings.xml
+
+## File-based project format:
+*.ipr
+*.iws
+
+## Plugin-specific files:
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+
+
+### Vagrant ###
 .vagrant/
+
+
+### OSX ###
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear on external disk
+.Spotlight-V100
+.Trashes
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
diff --git a/.travis.yml b/.travis.yml
index 12e95c33f5..e0724e2dde 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -10,8 +10,11 @@ env:
   global:
     - PIP_DOWNLOAD_CACHE=$HOME/.pip-cache
   matrix:
+    - TOX_ENV=pep8
     - TOX_ENV=cdh
     - TOX_ENV=hdp
+    - TOX_ENV=nonhdfs
+    - TOX_ENV=docs
 
 sudo: false
 
@@ -21,6 +24,8 @@ cache:
 install:
   - pip install tox
 
-script: tox -e $TOX_ENV
+script:
+  - tox -e $TOX_ENV
 
-after_failure: cat /home/travis/build/spotify/luigi/.tox/cdh/log/cdh-1.log
+after_failure:
+  - cat /home/travis/build/spotify/luigi/.tox/cdh/log/cdh-1.log
diff --git a/README.rst b/README.rst
index dbe6735796..c21bcb785a 100644
--- a/README.rst
+++ b/README.rst
@@ -2,13 +2,24 @@
    :alt: Luigi Logo
    :align: center
 
-
 About Luigi
 -----------
 
-.. image:: https://travis-ci.org/spotify/luigi.svg?branch=master
+.. image:: https://img.shields.io/travis/spotify/luigi/master.svg?style=flat
     :target: https://travis-ci.org/spotify/luigi
 
+.. image:: https://img.shields.io/coveralls/spotify/luigi/master.svg?style=flat
+    :target: https://coveralls.io/r/spotify/luigi?branch=master
+
+.. image:: https://landscape.io/github/spotify/luigi/master/landscape.svg?style=flat
+   :target: https://landscape.io/github/spotify/luigi/master
+
+.. image:: https://img.shields.io/pypi/dm/luigi.svg?style=flat
+   :target: https://pypi.python.org/pypi/luigi
+
+.. image:: https://img.shields.io/pypi/l/luigi.svg?style=flat
+   :target: https://pypi.python.org/pypi/luigi
+
 Luigi is a Python package that helps you build complex pipelines of
 batch jobs. It handles dependency resolution, workflow management,
 visualization, handling failures, command line integration, and much
@@ -35,17 +46,11 @@ so that you can focus on the tasks themselves and their dependencies.
 You can build pretty much any task you want, but Luigi also comes with a
 *toolbox* of several common task templates that you use. It includes
 native Python support for running mapreduce jobs in Hadoop, as well as
-Pig and Jar jobs. It also comes with filesystem abstractions for HDFS
+Hive and Jar jobs. It also comes with filesystem abstractions for HDFS
 and local files that ensures all file system operations are atomic. This
 is important because it means your data pipeline will not crash in a
 state containing partial data.
 
-Luigi was built at `Spotify <http://www.spotify.com/>`_, mainly by
-`Erik Bernhardsson <https://github.com/erikbern>`_ and `Elias
-Freider <https://github.com/freider>`_, but many other people have
-contributed.
-
-
 Dependency graph example
 ------------------------
 
@@ -63,7 +68,7 @@ build up data files.
 Background
 ----------
 
-We use Luigi internally at `Spotify <http://www.spotify.com/>`_ to run
+We use Luigi internally at `Spotify <https://www.spotify.com/us/>`_ to run
 thousands of tasks every day, organized in complex dependency graphs.
 Most of these tasks are Hadoop jobs. Luigi provides an infrastructure
 that powers all kinds of stuff including recommendations, toplists, A/B
@@ -75,7 +80,7 @@ can help programmers focus on the most important bits and leave the rest
 Conceptually, Luigi is similar to `GNU
 Make <http://www.gnu.org/software/make/>`_ where you have certain tasks
 and these tasks in turn may have dependencies on other tasks. There are
-also some similarities to `Oozie <http://incubator.apache.org/oozie/>`_
+also some similarities to `Oozie <http://oozie.apache.org/>`_
 and `Azkaban <http://data.linkedin.com/opensource/azkaban>`_. One major
 difference is that Luigi is not just built specifically for Hadoop, and
 it's easy to extend it with other kinds of tasks.
@@ -100,12 +105,26 @@ if you want to run Hadoop jobs since it makes debugging easier. See
 Getting Started
 ---------------
 
-The `Luigi package documentation <http://luigi.readthedocs.org/en/latest/api/luigi.html>`_
-contains an overview of how to work with Luigi, including an `Example workflow
-<http://luigi.readthedocs.org/en/latest/example_top_artists.html>`_ and an `API overview
+Take a look at the `Example workflow
+<http://luigi.readthedocs.org/en/latest/example_top_artists.html>`_ and the `API overview
 <http://luigi.readthedocs.org/en/latest/api_overview.html>`_ which explains some of
 the most important concepts.
 
+Who uses Luigi?
+---------------
+
+Several companies have written blog posts or presentation about Luigi:
+
+* `Spotify : NYC Data Science <http://www.slideshare.net/erikbern/luigi-presentation-nyc-data-science>`_
+* `Foursquare <http://www.slideshare.net/OpenAnayticsMeetup/luigi-presentation-17-23199897>`_
+* `Mortar Data <https://help.mortardata.com/technologies/luigi>`_
+* `Stripe <http://www.slideshare.net/PyData/python-as-part-of-a-production-machine-learning-stack-by-michael-manapat-pydata-sv-2014>`_
+* `Asana <https://eng.asana.com/2014/11/stable-accessible-data-infrastructure-startup/>`_
+* `Buffer <https://overflow.bufferapp.com/2014/10/31/buffers-new-data-architecture/>`_
+* `SeatGeek <http://chairnerd.seatgeek.com/building-out-the-seatgeek-data-pipeline/>`_
+
+Please let us know if your company wants to be featured on this list!
+
 Getting Help
 ------------
 
@@ -113,11 +132,19 @@ Getting Help
 * Subscribe to the `luigi-user <http://groups.google.com/group/luigi-user/>`_
   group and ask a question.
 
-
 External links
 --------------
 
-* `Documentation <http://luigi.readthedocs.org/>`_ (Read the Docs)
-* `Mailing List <https://groups.google.com/forum/#!forum/luigi-user>`_ (Google Groups)
+* `Documentation <http://luigi.readthedocs.org/>`_, including the `Luigi package documentation <http://luigi.readthedocs.org/en/latest/api/luigi.html>`_ (Read the Docs)
+* `Mailing List <https://groups.google.com/d/forum/luigi-user/>`_ (Google Groups)
 * `Releases <https://pypi.python.org/pypi/luigi>`_ (PyPi)
 * `Source code <https://github.com/spotify/luigi>`_ (Github)
+
+Authors
+-------
+
+Luigi was built at `Spotify <https://www.spotify.com/us/>`_, mainly by
+`Erik Bernhardsson <https://github.com/erikbern>`_ and `Elias
+Freider <https://github.com/freider>`_, but many other people have
+contributed.
+
diff --git a/bin/deps.py b/bin/deps.py
index fc5445f870..939b55925b 100755
--- a/bin/deps.py
+++ b/bin/deps.py
@@ -37,12 +37,12 @@
 #
 
 
-from luigi.task import flatten
 import luigi.interface
-from luigi.target import FileSystemTarget
-from luigi.postgres import PostgresTarget
 from luigi.contrib.ssh import RemoteTarget
+from luigi.postgres import PostgresTarget
 from luigi.s3 import S3Target
+from luigi.target import FileSystemTarget
+from luigi.task import flatten
 
 
 def get_task_requires(task):
@@ -61,6 +61,7 @@ def dfs_paths(start_task, goal_task_name, path=None):
 
 
 class UpstreamArg(luigi.Task):
+
     'Used to provide the global parameter -- upstream'
     upstream = luigi.Parameter(is_global=True, default=None)
 
@@ -93,14 +94,14 @@ def find_deps_cli():
         task_name = d
         task_output = "n/a"
         if isinstance(d.output(), RemoteTarget):
-            task_output="[SSH] {0}:{1}".format(d.output()._fs.remote_context.host, d.output().path)
+            task_output = "[SSH] {0}:{1}".format(d.output()._fs.remote_context.host, d.output().path)
         elif isinstance(d.output(), S3Target):
-            task_output="[S3] {0}".format(d.output().path)
-        elif isinstance(d.output(),FileSystemTarget):
-            task_output="[FileSystem] {0}".format(d.output().path)
-        elif isinstance (d.output(), PostgresTarget):
-            task_output="[DB] {0}:{1}".format(d.output().host, d.output().table)
+            task_output = "[S3] {0}".format(d.output().path)
+        elif isinstance(d.output(), FileSystemTarget):
+            task_output = "[FileSystem] {0}".format(d.output().path)
+        elif isinstance(d.output(), PostgresTarget):
+            task_output = "[DB] {0}:{1}".format(d.output().host, d.output().table)
         else:
-            task_output= "to be determined"
+            task_output = "to be determined"
         print """   TASK: {0}
                        : {1}""".format(task_name, task_output)
diff --git a/bin/luigi-grep.py b/bin/luigi-grep.py
index 625697af03..cbdacd8d21 100755
--- a/bin/luigi-grep.py
+++ b/bin/luigi-grep.py
@@ -1,11 +1,9 @@
 #!/usr/bin/env python
 
-from collections import defaultdict
-
 import argparse
 import json
 import urllib2
-
+from collections import defaultdict
 
 parser = argparse.ArgumentParser(
     "luigi-grep is used to search for workflows using the luigi scheduler's json api")
@@ -18,6 +16,7 @@
 
 
 class LuigiGrep(object):
+
     def __init__(self, host, port):
         self._host = host
         self._port = port
diff --git a/doc/README.md b/doc/README.md
index ce45b4ec02..8d91d9d101 100644
--- a/doc/README.md
+++ b/doc/README.md
@@ -10,8 +10,7 @@ Sphinx uses ReStructuredText (RST) markup. There's a good
 describing the syntax. We also use the sphinx [autodoc](http://sphinx-
 doc.org/ext/autodoc.html) functionality to parse docstrings. For examples of
 cross-referencing modules/libraries/classes and for documentatingfunction/method
-arguments, see docs on [the python domain](http://sphinx-doc.org/domains.html
-#the-python-domain).
+arguments, see docs on [the python domain](http://sphinx-doc.org/domains.html#the-python-domain).
 
 API Documentation
 -----------------
diff --git a/doc/api/luigi.contrib.rst b/doc/api/luigi.contrib.rst
index af92020d31..a23500dcde 100644
--- a/doc/api/luigi.contrib.rst
+++ b/doc/api/luigi.contrib.rst
@@ -20,6 +20,14 @@ luigi.contrib.ftp module
     :undoc-members:
     :show-inheritance:
 
+luigi.contrib.sqla module
+-------------------------
+
+.. automodule:: luigi.contrib.sqla
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 luigi.contrib.mysqldb module
 ----------------------------
 
@@ -52,6 +60,14 @@ luigi.contrib.spark module
     :undoc-members:
     :show-inheritance:
 
+luigi.contrib.scalding module
+-----------------------------
+
+.. automodule:: luigi.contrib.scalding
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 luigi.contrib.sparkey module
 ----------------------------
 
diff --git a/doc/api/luigi.rst b/doc/api/luigi.rst
index ca1cfde128..fbbdadb2a4 100644
--- a/doc/api/luigi.rst
+++ b/doc/api/luigi.rst
@@ -163,14 +163,6 @@ luigi.s3 module
     :undoc-members:
     :show-inheritance:
 
-luigi.scalding module
----------------------
-
-.. automodule:: luigi.scalding
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 luigi.scheduler module
 ----------------------
 
diff --git a/doc/api/luigi.tools.rst b/doc/api/luigi.tools.rst
new file mode 100644
index 0000000000..b34b10c63c
--- /dev/null
+++ b/doc/api/luigi.tools.rst
@@ -0,0 +1,30 @@
+luigi.tools package
+===================
+
+Submodules
+----------
+
+luigi.tools.parse_task module
+-----------------------------
+
+.. automodule:: luigi.tools.parse_task
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+luigi.tools.range module
+------------------------
+
+.. automodule:: luigi.tools.range
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: luigi.tools
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/api_overview.rst b/doc/api_overview.rst
index c3daffed1a..5273cd40af 100644
--- a/doc/api_overview.rst
+++ b/doc/api_overview.rst
@@ -1,43 +1,44 @@
 API Overview
 ------------
 
-There are two fundamental building blocks of Luigi - the *Task* class
-and the *Target* class. Both are abstract classes and expect a few
-methods to be implemented. In addition to those two concepts, the
-*Parameter* class is an important concept that governs how a Task is
-run.
+There are two fundamental building blocks of Luigi -
+the *Task* class and the *Target* class.
+Both are abstract classes and expect a few methods to be implemented.
+In addition to those two concepts,
+the *Parameter* class is an important concept that governs how a Task is run.
 
 Target
 ~~~~~~
 
-Broadly speaking, the Target class corresponds to a file on a disk. Or a
-file on HDFS. Or some kind of a checkpoint, like an entry in a database.
+Broadly speaking,
+the Target class corresponds to a file on a disk,
+a file on HDFS or some kind of a checkpoint, like an entry in a database.
 Actually, the only method that Targets have to implement is the *exists*
 method which returns True if and only if the Target exists.
 
-In practice, implementing Target subclasses is rarely needed. You can
-probably get pretty far with the *LocalTarget* and *hdfs.HdfsTarget*
-classes that are available out of the box. These directly map to a file
-on the local drive, or a file in HDFS, respectively. In addition these
-also wrap the underlying operations to make them atomic. They both
-implement the *open(flag)* method which returns a stream object that
-could be read (flag = 'r') from or written to (flag = 'w'). Both
-LocalTarget and hdfs.HdfsTarget also optionally take a format parameter.
-Luigi comes with Gzip support by providing *format=format.Gzip* . Adding
-support for other formats is pretty simple.
+In practice, implementing Target subclasses is rarely needed.
+You can probably get pretty far with the *LocalTarget* and *hdfs.HdfsTarget*
+classes that are available out of the box.
+These directly map to a file on the local drive or a file in HDFS, respectively.
+In addition these also wrap the underlying operations to make them atomic.
+They both implement the *open(flag)* method which returns a stream object that
+could be read (flag = 'r') from or written to (flag = 'w').
+Both LocalTarget and hdfs.HdfsTarget also optionally take a format parameter.
+Luigi comes with Gzip support by providing *format=format.Gzip*.
+Adding support for other formats is pretty simple.
 
 Task
 ~~~~
 
 The *Task* class is a bit more conceptually interesting because this is
-where computation is done. There are a few methods that can be
-implemented to alter its behavior, most notably *run*, *output* and
-*requires*.
+where computation is done.
+There are a few methods that can be implemented to alter its behavior,
+most notably *run*, *output* and *requires*.
 
 The Task class corresponds to some type of job that is run, but in
-general you want to allow some form of parametrization of it. For
-instance, if your Task class runs a Hadoop job to create a report every
-night, you probably want to make the date a parameter of the class.
+general you want to allow some form of parametrization of it.
+For instance, if your Task class runs a Hadoop job to create a report every night,
+you probably want to make the date a parameter of the class.
 
 Parameter
 ^^^^^^^^^
@@ -53,12 +54,12 @@ Parameter objects on the class scope:
         # ...
 
 By doing this, Luigi can do take care of all the boilerplate code that
-would normally be needed in the constructor. Internally, the DailyReport
-object can now be constructed by running
-*DailyReport(datetime.date(2012, 5, 10))* or just *DailyReport()*. Luigi
-also creates a command line parser that automatically handles the
-conversion from strings to Python types. This way you can invoke the job
-on the command line eg. by passing *--date 2012-15-10*.
+would normally be needed in the constructor.
+Internally, the DailyReport object can now be constructed by running
+*DailyReport(datetime.date(2012, 5, 10))* or just *DailyReport()*.
+Luigi also creates a command line parser that automatically handles the
+conversion from strings to Python types.
+This way you can invoke the job on the command line eg. by passing *--date 2012-15-10*.
 
 The parameters are all set to their values on the Task object instance,
 i.e.
@@ -68,13 +69,13 @@ i.e.
     d = DailyReport(datetime.date(2012, 5, 10))
     print d.date
 
-will return the same date that the object was constructed with. Same
-goes if you invoke Luigi on the command line.
+will return the same date that the object was constructed with.
+Same goes if you invoke Luigi on the command line.
 
 Tasks are uniquely identified by their class name and values of their
-parameters. In fact, within the same worker, two tasks of the same class
-with parameters of the same values are not just equal, but the same
-instance:
+parameters.
+In fact, within the same worker, two tasks of the same class with
+parameters of the same values are not just equal, but the same instance:
 
 .. code:: python
 
@@ -96,10 +97,10 @@ instance:
     >>> c is d
     True
 
-However, if a parameter is created with *significant=False*, it is
-ignored as far as the Task signature is concerned. Tasks created with
-only insignificant parameters differing have the same signature, but are
-not the same instance:
+However, if a parameter is created with *significant=False*,
+it is ignored as far as the Task signature is concerned.
+Tasks created with only insignificant parameters differing have the same signature but
+are not the same instance:
 
 .. code:: python
 
@@ -122,38 +123,86 @@ not the same instance:
     True
 
 Python is not a typed language and you don't have to specify the types
-of any of your parameters. You can simply use *luigi.Parameter* if you
-don't care. In fact, the reason DateParameter et al exist is just in
-order to support command line interaction and make sure to convert the
-input to the corresponding type (i.e. datetime.date instead of a
-string).
+of any of your parameters.
+You can simply use *luigi.Parameter* if you don't care.
+In fact, the reason DateParameter et al exist is just in order to
+support command line interaction and make sure to convert the input to
+the corresponding type (i.e. datetime.date instead of a string).
+
+Setting parameter value for other classes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+All parameters are also exposed on a class level on the command line interface.
+For instance, say you have classes TaskA and TaskB:
+
+.. code:: python
+
+    class TaskA(luigi.Task):
+        x = luigi.Parameter()
+
+    class TaskB(luigi.Task):
+        y = luigi.Parameter()
+
+
+You can run *TaskB* on the command line: *python script.py TaskB --y 42*.
+But you can also set the class value of *TaskA* by running *python script.py
+TaskB --y 42 --TaskA-x 43*.
+This sets the value of *TaskA.x* to 43 on a *class* level.
+It is still possible to override it inside Python if you instantiate *TaskA(x=44)*.
+
+Parameters are resolved in the following order of decreasing priority:
+1. Any value passed to the constructor, or task level value set on the command line
+2. Any class level value set on the command line
+3. Any configuration option (if using the *config_path* argument)
+4. Any default value provided to the parameter
 
 Task.requires
 ^^^^^^^^^^^^^
 
-The *requires* method is used to specify dependencies on other Task
-object, which might even be of the same class. For instance, an example
-implementation could be
+The *requires* method is used to specify dependencies on other Task object,
+which might even be of the same class.
+For instance, an example implementation could be
 
 .. code:: python
 
     def requires(self):
         return OtherTask(self.date), DailyReport(self.date - datetime.timedelta(1))
 
-In this case, the DailyReport task depends on two inputs created
-earlier, one of which is the same class. requires can return other Tasks
-in any way wrapped up within dicts/lists/tuples/etc.
+In this case, the DailyReport task depends on two inputs created earlier,
+one of which is the same class.
+requires can return other Tasks in any way wrapped up within dicts/lists/tuples/etc.
+
+Requiring another Task
+^^^^^^^^^^^^^^^^^^^^^^
+
+Note that requires() can *not* return a Target object.
+If you have a simple Target object that is created externally
+you can wrap it in a Task class like this:
+
+.. code:: python
+
+    class LogFiles(luigi.Task):
+        def output(self):
+            return luigi.hdfs.HdfsTarget('/log')
+
+This also makes it easier to add parameters:
+
+.. code:: python
+
+    class LogFiles(luigi.Task):
+        date = luigi.DateParameter()
+        def output(self):
+            return luigi.hdfs.HdfsTarget(self.date.strftime('/log/%Y-%m-%d'))
 
 Task.output
 ^^^^^^^^^^^
 
-The *output* method returns one or more Target objects. Similarly to
-requires, can return wrap them up in any way that's convenient for you.
-However we recommend that any Task only return one single Target in
-output. If multiple outputs are returned, atomicity will be lost unless
-the Task itself can ensure that the Targets are atomically created. (If
-atomicity is not of concern, then it is safe to return multiple Target
-objects.)
+The *output* method returns one or more Target objects.
+Similarly to requires, can return wrap them up in any way that's convenient for you.
+However we recommend that any Task only return one single Target in output.
+If multiple outputs are returned,
+atomicity will be lost unless the Task itself can ensure that the Targets are atomically created.
+(If atomicity is not of concern, then it is safe to return multiple Target objects.)
 
 .. code:: python
 
@@ -166,12 +215,13 @@ objects.)
 Task.run
 ^^^^^^^^
 
-The *run* method now contains the actual code that is run. Note that
-Luigi breaks down everything into two stages. First it figures out all
-dependencies between tasks, then it runs everything. The *input()*
-method is an internal helper method that just replaces all Task objects
-in requires with their corresponding output. For instance, in this
-example
+The *run* method now contains the actual code that is run.
+When you are using *requires()* and *run()*, Luigi breaks down everything into two stages.
+First it figures out all dependencies between tasks,
+then it runs everything.
+The *input()* method is an internal helper method that just replaces all Task objects in requires
+with their corresponding output.
+An example:
 
 .. code:: python
 
@@ -193,26 +243,58 @@ example
                 g.write('%s\n', ''.join(reversed(line.strip().split()))
             g.close() # needed because files are atomic
 
+
+Dynamic dependencies
+^^^^^^^^^^^^^^^^^^^^
+
+Sometimes you might not now exactly what other tasks to depend on until runtime.
+In that case, Luigi provides a mechanism to specify dynamic dependencies.
+If you yield another Task in the run() method,
+the current task will be suspended and the other task will be run.
+You can also return a list of tasks.
+
+.. code:: python
+
+    class MyTask(luigi.Task):
+        def run(self):
+            other_target = yield OtherTask()
+
+	    # dynamic dependencies resolve into targets
+	    f = other_target.open('r')
+
+
+This mechanism is an alternative to *requires()* in case
+you are not able to build up the full dependency graph before running the task.
+It does come with some constraints:
+the run() method will resume from scratch each time a new task is yielded.
+In other words, you should make sure your run() method is idempotent.
+(This is good practice for all Tasks in Luigi, but especially so for tasks with dynamic dependencies).
+
+For an example of a workflow using dynamic dependencies, see
+`examples/dynamic_requirements.py <https://github.com/spotify/luigi/blob/master/examples/dynamic_requirements.py>`_.
+
+
 Events and callbacks
 ^^^^^^^^^^^^^^^^^^^^
 
-Luigi has a built-in event system that allows you to register callbacks
-to events and trigger them from your own tasks. You can both hook into
-some pre-defined events and create your own. Each event handle is tied
-to a Task class, and will be triggered only from that class or a
-subclass of it. This allows you to effortlessly subscribe to events only
-from a specific class (e.g. for hadoop jobs).
+Luigi has a built-in event system that
+allows you to register callbacks to events and trigger them from your own tasks.
+You can both hook into some pre-defined events and create your own.
+Each event handle is tied to a Task class and
+will be triggered only from that class or
+a subclass of it.
+This allows you to effortlessly subscribe to events only from a specific class (e.g. for hadoop jobs).
 
 .. code:: python
 
-    @luigi.Task.event_handler(luigi.Event.SUCCESS):
+    @luigi.Task.event_handler(luigi.Event.SUCCESS)
     def celebrate_success(task):
         """Will be called directly after a successful execution
            of `run` on any Task subclass (i.e. all luigi Tasks)
         """
         ...
 
-    @luigi.hadoop.JobTask.event_handler(luigi.Event.FAILURE):
+    @luigi.hadoop.JobTask.event_handler(luigi.Event.FAILURE)
     def mourn_failure(task, exception):
         """Will be called directly after a failed execution
            of `run` on any JobTask subclass
@@ -225,11 +307,11 @@ from a specific class (e.g. for hadoop jobs).
 But I just want to run a Hadoop job?
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The Hadoop code is integrated in the rest of the Luigi code because we
-really believe almost all Hadoop jobs benefit from being part of some
-sort of workflow. However, in theory, nothing stops you from using the
-hadoop.JobTask class (and also hdfs.HdfsTarget) without using the rest
-of Luigi. You can simply run it manually using
+The Hadoop code is integrated in the rest of the Luigi code because
+we really believe almost all Hadoop jobs benefit from being part of some sort of workflow.
+However, in theory, nothing stops you from using the hadoop.JobTask class (and also hdfs.HdfsTarget)
+without using the rest of Luigi.
+You can simply run it manually using
 
 .. code:: python
 
@@ -248,17 +330,18 @@ You can use the hdfs.HdfsTarget class anywhere by just instantiating it:
 Task priority
 ^^^^^^^^^^^^^
 
-The scheduler decides which task to run next from the set of all task
-that have all their dependencies met. By default, this choice is pretty
-arbitrary, which is fine for most workflows and situations.
+The scheduler decides which task to run next from
+the set of all task that have all their dependencies met.
+By default, this choice is pretty arbitrary,
+which is fine for most workflows and situations.
 
-If you want to have some control on the order of execution
-of available tasks, you can set the *priority* property of a task,
+If you want to have some control on the order of execution of available tasks,
+you can set the *priority* property of a task,
 for example as follows:
 
 .. code:: python
 
-    # A static priority value as a class contant:
+    # A static priority value as a class constant:
     class MyTask(luigi.Task):
         priority = 100
         # ...
@@ -273,24 +356,26 @@ for example as follows:
                 return 40
         # ...
 
-Tasks with a higher priority value will be picked before tasks
-with a lower priority value.
-There is no predefined range of priorities, you can choose whatever
-(int or float) values you want to use. The default value is 0.
-Note that it is perfectly valid to choose negative priorities for
-tasks that should have less priority than default.
-
-Warning: task execution order in Luigi is influenced by both dependencies
-and priorities, but in Luigi dependencies come first. For example:
-if there is a task A with priority 1000 but still with unmet dependencies
-and a task B with priority 1 without any pending dependencies,
+Tasks with a higher priority value will be picked before tasks with a lower priority value.
+There is no predefined range of priorities,
+you can choose whatever (int or float) values you want to use.
+The default value is 0.
+Note that it is perfectly valid to choose negative priorities
+for tasks that should have less priority than default.
+
+Warning: task execution order in Luigi is influenced by both dependencies and priorities, but
+in Luigi dependencies come first.
+For example:
+if there is a task A with priority 1000 but still with unmet dependencies and
+a task B with priority 1 without any pending dependencies,
 task B will be picked first.
 
 
 Instance caching
 ^^^^^^^^^^^^^^^^
 
-In addition to the stuff mentioned above, Luigi also does some metaclass
-logic so that if eg. *DailyReport(datetime.date(2012, 5, 10))* is
-instantiated twice in the code, it will in fact result in the same
-object. This is needed so that each Task is run only once.
\ No newline at end of file
+In addition to the stuff mentioned above,
+Luigi also does some metaclass logic so that
+if e.g. *DailyReport(datetime.date(2012, 5, 10))* is instantiated twice in the code,
+it will in fact result in the same object.
+This is needed so that each Task is run only once.
diff --git a/doc/central_scheduler.rst b/doc/central_scheduler.rst
index 0a757fe7d9..a745497f9a 100644
--- a/doc/central_scheduler.rst
+++ b/doc/central_scheduler.rst
@@ -1,15 +1,89 @@
 Using the Central Scheduler
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
+---------------------------
 
-The central scheduler does not execute anything for you, or help you
-with job parallelization. The two purposes it serves are to
+While the ``--local-scheduler`` flag is useful for development purposes,
+it's not recommended for production usage.
+The centralized scheduler services two purposes:
 
--  Make sure two instances of the same task are not running
-   simultaneously
+-  Make sure two instances of the same task are not running simultaneously
 -  Provide visualization of everything that's going on.
 
-For running tasks periodically, the easiest thing to do is to trigger a
-Python script from cron or from a continuously running process. There is
-no central process that automatically triggers job. This model may seem
-limited, but we believe that it makes things far more intuitive and easy
-to understand.
\ No newline at end of file
+Note that the central scheduler does not execute anything for you or
+help you with job parallelization.
+For running tasks periodically,
+the easiest thing to do is to trigger a Python script from cron or
+from a continuously running process.
+There is no central process that automatically triggers job.
+This model may seem limited, but
+we believe that it makes things far more intuitive and easy to understand.
+
+The luigid server
+~~~~~~~~~~~~~~~~~
+
+To run the server as a daemon run:
+
+::
+
+    PYTHONPATH=. python bin/luigid --background --pidfile <PATH_TO_PIDFILE> --logdir <PATH_TO_LOGDIR> --state-path <PATH_TO_STATEFILE>
+
+Note that this requires ``python-daemon``.
+By default, the server starts on port ``8082``
+(which can be changed with the ``--port`` flag) and listens on all IPs.
+
+For a full list of configuration options and defaults,
+see the :ref:`scheduler configuration section <scheduler-config>`.
+Note that ``luigid`` uses the same configuration files as the luigi client
+(i.e. ``client.cfg`` or ``/etc/luigi/client.cfg`` by default).
+
+Enabling Task History
+~~~~~~~~~~~~~~~~~~~~~
+
+Task History is an experimental feature in which
+additional information about tasks that have been executed are recorded in a relational database
+for historical analysis.
+This information is exposed via the Central Scheduler at ``/history``.
+
+To enable the task history,
+specify ``record_task_history = True`` in the
+``[scheduler]`` section of ``client.cfg`` and
+specify ``db_connection`` under ``[task_history]``.
+The ``db_connection`` string is to used to configure the `SQLAlchemy engine
+<http://docs.sqlalchemy.org/en/rel_0_9/core/engines.html>`_.
+When starting up,
+``luigid`` will create all the necessary tables using `create_all
+<http://docs.sqlalchemy.org/en/rel_0_9/core/metadata.html#sqlalchemy.schema.MetaData.create_all>`_.
+
+Example configuration::
+
+    [scheduler]
+    record_task_history = True
+    state-path = /usr/local/var/luigi-state.pickle
+
+    [task_history]
+    db_connection = sqlite:////usr/local/var/luigi-task-hist.db
+
+The task history has the following pages:
+
+* ``/history``
+  a reverse-cronological listing of runs from the past 24 hours.
+  Example screenshot:
+
+    .. figure:: history.png
+       :alt: Recent history screenshot
+* ``/history/by_id/:id``
+  detailed information about a run, including:
+  parameter values, the host on which it ran, and timing information.
+  Example screenshot:
+
+    .. figure:: history_by_id.png
+       :alt: By id screenshot
+* ``/history/by_name/:name``
+  a listing of all runs of a task with the given task name.
+  Example screenshot:
+
+    .. figure:: history_by_name.png
+       :alt: By name screenshot
+* ``/history/by_params/:name?data=params``
+  a listing of all runs of a given task restricted to runs with param values matching the given data.
+  The data is a json blob describing the parameters,
+  e.g. ``{"foo": "bar"}`` looks for a task with ``foo=bar``.
diff --git a/doc/command_line.rst b/doc/command_line.rst
index 3e1ea64973..a640f8a739 100644
--- a/doc/command_line.rst
+++ b/doc/command_line.rst
@@ -21,17 +21,18 @@ Any task can be instantiated and run from the command line:
 
 You can run this task from the command line like this::
 
-    $ python my_task.py MyTask --x 123 --y 456
+    $ python my_task.py MyTask --local-scheduler --x 123 --y 456
 
-You can also pass ``main_task_cls=MyTask`` to ``luigi.run()`` and that way
+You can also pass ``main_task_cls=MyTask`` and ``local_scheduler=True`` to ``luigi.run()`` and that way
 you can invoke it simply using
 
 ::
 
     $ python my_task.py --x 123 --y 456
 
-The other way to run a Luigi task is to use the builtin *luigi* task. This will
-be default on your path and can be run by providing a module name. The module
-will imported dynamically::
+The other way to run a Luigi task is to use the builtin *luigi* task.
+This will be default on your path and
+can be run by providing a module name.
+The module will imported dynamically::
 
     $ luigi --module my_module MyTask --x 123 --y 456
diff --git a/doc/conf.py b/doc/conf.py
index 1f30c58a08..9100ab455e 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -74,7 +74,7 @@
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build']
+exclude_patterns = ['_build', 'README.rst']
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
@@ -149,7 +149,7 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+#html_static_path = ['_static']
 
 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied
diff --git a/doc/configuration.rst b/doc/configuration.rst
index 2e7e473a35..37999ac2d9 100644
--- a/doc/configuration.rst
+++ b/doc/configuration.rst
@@ -17,7 +17,32 @@ each controlling a different part of the config. Example
     default-scheduler-host: luigi-host.mycompany.foo
     error-email: foo@bar.baz
 
-Below, we describe each section and the parameters available within it.
+By default, all parameters will be overridden by matching values in the
+configuration file. For instance if you have a Task definition:
+
+.. code:: python
+
+    class DailyReport(luigi.hadoop.JobTask):
+        date = luigi.DateParameter(default=datetime.date.today())
+        # ...
+
+Then you can override the default value for date by providing it in the
+configuration:
+
+::
+
+    [DailyReport]
+    date: 2012-01-01
+
+You can also use ``config_path`` as an argument to the ``Parameter`` if
+you want to use a specific section in the config.
+
+
+Configurable options
+====================
+
+Luigi comes with a lot of configurable options. Below, we describe each
+section and the parameters available within it.
 
 
 [core]
@@ -71,6 +96,14 @@ max-reschedules
   reschedule a job if it is found to not be done when attempting to run
   a dependent job. This defaults to 1.
 
+max-shown-tasks
+  .. versionadded:: 1.0.20
+
+  The maximum number of tasks returned in a task_list api call. This
+  will restrict the number of tasks shown in any section in the
+  visualiser. Small values can alleviate frozen browsers when there are
+  too many done tasks. This defaults to 100000 (one hundred thousand).
+
 no_configure_logging
   If true, logging is not configured. Defaults to false.
 
@@ -120,12 +153,22 @@ worker-count-uniques
 worker-keep-alive
   If true, workers will stay alive when they run out of jobs to run, as
   long as they have some pending job waiting to be run. Defaults to
-  true.
+  false.
 
 worker-ping-interval
   Number of seconds to wait between pinging scheduler to let it know
   that the worker is still alive. Defaults to 1.0.
 
+worker-timeout
+  .. versionadded:: 1.0.20
+
+  Number of seconds after which to kill a task which has been running
+  for too long. This provides a default value for all tasks, which can
+  be overridden by setting the worker-timeout property in any task. This
+  only works when using multiple workers, as the timeout is implemented
+  by killing worker subprocesses. Default value is 0, meaning no
+  timeout.
+
 worker-wait-interval
   Number of seconds for the worker to wait before asking the scheduler
   for another job after the scheduler has said that it does not have any
@@ -202,6 +245,9 @@ client
 client_version
   Optionally specifies hadoop client version for snakebite.
 
+effective_user
+  Optionally specifies the effective user for snakebite.
+
 namenode_host
   The hostname of the namenode. Needed for snakebite if
   snakebite_autoconfig is not set.
@@ -318,6 +364,8 @@ scalding-libjars
   SCALDING_HOME/libjars or /usr/share/scalding/libjars
 
 
+.. _scheduler-config:
+
 [scheduler]
 -----------
 
diff --git a/doc/contributing.rst b/doc/contributing.rst
index 84fec1a2a2..a8650519da 100644
--- a/doc/contributing.rst
+++ b/doc/contributing.rst
@@ -7,14 +7,21 @@ where x = luigi and y = spotify.
 Running Unit Tests
 ~~~~~~~~~~~~~~~~~~
 
-1. Install required packages: ``pip install -r test/requirements.txt``
-2. From the top directory, run
-   `Nose <http://nose.readthedocs.org>`__: ``nosetests``
-
-   -  To run all tests within individual files:
-      ``nosetests test/parameter_test.py test/fib_test.py ...``
-   -  To run named tests within individual files:
-      ``nosetests -m '(testDate.*|testInt)' test/parameter_test.py ...``
+You can see in ``.travis.yml`` how Travis CI runs the tests. Essentially, what
+you do is first ``pip install tox``, then you can run any of these examples and
+change them to your needs.
+
+
+.. code-block:: bash
+
+    # Run all nonhdfs tests
+    export TOX_ENV=nonhdfs; export PYTHONPATH=''; tox -e $TOX_ENV test
+
+    # Run specific nonhdfs tests
+    export TOX_ENV=nonhdfs; export PYTHONPATH=''; tox -e $TOX_ENV test/test_ssh.py
+
+    # Run specific hdp tests with hdp hadoop distrubtion
+    export TOX_ENV=hdp; export PYTHONPATH=''; JAVA_HOME=/usr/lib/jvm/java-1.7.0-openjdk-amd64 tox -e $TOX_ENV test/snakebite_test.py
 
 Future Ideas
 ~~~~~~~~~~~~
diff --git a/doc/example_top_artists.rst b/doc/example_top_artists.rst
index 65420df537..446ba39058 100644
--- a/doc/example_top_artists.rst
+++ b/doc/example_top_artists.rst
@@ -1,14 +1,15 @@
 Example Workflow – Top Artists
 ------------------------------
 
-This is a very simplified case of something we do at Spotify a lot. All
-user actions are logged to HDFS where we run a bunch of Hadoop jobs to
-transform the data. At some point we might end up with a smaller data
-set that we can bulk ingest into Cassandra, Postgres, or some other
-format.
+This is a very simplified case of something we do at Spotify a lot.
+All user actions are logged to HDFS where
+we run a bunch of Hadoop jobs to transform the data.
+At some point we might end up with
+a smaller data set that we can bulk ingest into Cassandra, Postgres, or
+some other format.
 
-For the purpose of this exercise, we want to aggregate all streams, and
-find the top 10 artists. We will then put it into Postgres.
+For the purpose of this exercise, we want to aggregate all streams,
+find the top 10 artists and then put the results into Postgres.
 
 This example is also available in ``examples/top_artists.py``
 
@@ -40,10 +41,10 @@ Step 1 - Aggregate Artist Streams
                     print >> out_file, artist, count
 
 Note that this is just a portion of the file *examples/top\_artists.py*.
-In particular, ``Streams`` is defined as a ``luigi.Task``, acting as a
-dependency for ``AggregateArtists``. In addition, ``luigi.run()`` is
-called if the script is executed directly, allowing it to be run from
-the command line.
+In particular, ``Streams`` is defined as a ``luigi.Task``,
+acting as a dependency for ``AggregateArtists``.
+In addition, ``luigi.run()`` is called if the script is executed directly,
+allowing it to be run from the command line.
 
 There are several pieces of this snippet that deserve more explanation.
 
@@ -96,10 +97,12 @@ overview of the options:
                             AggregateArtists.date_interval
 
 Running the command again will do nothing because the output file is
-already created. In that sense, any task in Luigi is *idempotent*
+already created.
+In that sense, any task in Luigi is *idempotent*
 because running it many times gives the same outcome as running it once.
 Note that unlike Makefile, the output will not be recreated when any of
-the input files is modified. You need to delete the output file
+the input files is modified.
+You need to delete the output file
 manually.
 
 The *--local-scheduler* flag tells Luigi not to connect to a scheduler
@@ -137,18 +140,19 @@ Note that ``luigi.hadoop.JobTask`` doesn't require you to implement a
 Step 2 – Find the Top Artists
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-At this point, we've counted the number of streams for each artists, for
-the full time period. We are left with a large file that contains
-mappings of artist -> count data, and we want to find the top 10
-artists. Since we only have a few hundred thousand artists, and
-calculating artists is nontrivial to parallelize, we choose to do this
-not as a Hadoop job, but just as a plain old for-loop in Python.
+At this point, we've counted the number of streams for each artists,
+for the full time period.
+We are left with a large file that contains
+mappings of artist -> count data, and we want to find the top 10 artists.
+Since we only have a few hundred thousand artists, and
+calculating artists is nontrivial to parallelize,
+we choose to do this not as a Hadoop job, but just as a plain old for-loop in Python.
 
 .. code:: python
 
     class Top10Artists(luigi.Task):
         date_interval = luigi.DateIntervalParameter()
-        use_hadoop = luigi.BooleanParameter()
+        use_hadoop = luigi.BoolParameter()
 
         def requires(self):
             if self.use_hadoop:
@@ -172,9 +176,9 @@ not as a Hadoop job, but just as a plain old for-loop in Python.
                     yield int(streams), int(artist)
 
 The most interesting thing here is that this task (*Top10Artists*)
-defines a dependency on the previous task (*AggregateArtists*). This
-means that if the output of *AggregateArtists* does not exist, the task
-will run before *Top10Artists*.
+defines a dependency on the previous task (*AggregateArtists*).
+This means that if the output of *AggregateArtists* does not exist,
+the task will run before *Top10Artists*.
 
 ::
 
@@ -186,15 +190,15 @@ Step 3 - Insert into Postgres
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 This mainly serves as an example of a specific subclass *Task* that
-doesn't require any code to be written. It's also an example of how you
-can define task templates that you can reuse for a lot of different
-tasks.
+doesn't require any code to be written.
+It's also an example of how you can define task templates that
+you can reuse for a lot of different tasks.
 
 .. code:: python
 
     class ArtistToplistToDatabase(luigi.postgres.CopyToTable):
         date_interval = luigi.DateIntervalParameter()
-        use_hadoop = luigi.BooleanParameter()
+        use_hadoop = luigi.BoolParameter()
 
         host = "localhost"
         database = "toplists"
@@ -217,17 +221,18 @@ building all its upstream dependencies.
 Using the Central Planner
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The *--local-scheduler* flag tells Luigi not to connect to a central
-scheduler. This is recommended in order to get started and or for
-development purposes. At the point where you start putting things in
-production we strongly recommend running the central scheduler server.
-In addition to providing locking so the same task is not run by multiple
-processes at the same time, this server also provides a pretty nice
-visualization of your current work flow.
+The *--local-scheduler* flag tells Luigi not to connect to a central scheduler.
+This is recommended in order to get started and or for development purposes.
+At the point where you start putting things in production
+we strongly recommend running the central scheduler server.
+In addition to providing locking
+so that the same task is not run by multiple processes at the same time,
+this server also provides a pretty nice visualization of your current work flow.
 
-If you drop the *--local-scheduler* flag, your script will try to
-connect to the central planner, by default at localhost port 8082. If
-you run
+If you drop the *--local-scheduler* flag,
+your script will try to connect to the central planner,
+by default at localhost port 8082.
+If you run
 
 ::
 
@@ -240,31 +245,26 @@ in the background and then run
     $ python wordcount.py --date 2012-W03
 
 then in fact your script will now do the scheduling through a
-centralized server. You need `Tornado <http://www.tornadoweb.org/>`__
-for this to work.
+centralized server.
+You need `Tornado <http://www.tornadoweb.org/>`__ for this to work.
 
 Launching *http://localhost:8082* should show something like this:
 
 .. figure:: web_server.png
    :alt: Web server screenshot
 
-   Web server screenshot
-Looking at the dependency graph for any of the tasks yields something
-like this:
+Web server screenshot
+Looking at the dependency graph
+for any of the tasks yields something like this:
 
 .. figure:: aggregate_artists.png
    :alt: Aggregate artists screenshot
 
-   Aggregate artists screenshot
-In case your job crashes remotely due to any Python exception, Luigi
-will try to fetch the traceback and print it on standard output. You
-need `Mechanize <http://wwwsearch.sourceforge.net/mechanize/>`__ for it
+Aggregate artists screenshot
+In case your job crashes remotely due to any Python exception,
+Luigi will try to fetch the traceback and print it on standard output.
+You need `Mechanize <http://wwwsearch.sourceforge.net/mechanize/>`__ for it
 to work and you also need connectivity to your tasktrackers.
 
-To run the server as a daemon run:
-
-::
-
-    PYTHONPATH=. python bin/luigid --background --pidfile <PATH_TO_PIDFILE> --logdir <PATH_TO_LOGDIR> --state-path <PATH_TO_STATEFILE>
-
-Note that this requires python-daemon for this to work.
+In production, you'll want to run the centralized scheduler.
+See: :doc:`central_scheduler` for more information.
diff --git a/doc/execution_model.rst b/doc/execution_model.rst
index 542aec3093..11ff9147f9 100644
--- a/doc/execution_model.rst
+++ b/doc/execution_model.rst
@@ -1,34 +1,38 @@
 Execution Model
 ---------------
 
-Luigi has a quite simple model. The most important aspect is that *no
-execution is transferred*. When you run a Luigi workflow, the worker
-schedules all tasks, and also executes the tasks within the process.
+Luigi has a quite simple model.
+The most important aspect is that *no execution is transferred*.
+When you run a Luigi workflow,
+the worker schedules all tasks, and
+also executes the tasks within the process.
 
-The benefit of this scheme is that it's super easy to debug since all
-execution takes place in the process. It also makes deployment a
-non-event. During development, you typically run the Luigi workflow from
-the command line, whereas when you deploy it, you can trigger it using
-crontab or any other scheduler.
+The benefit of this scheme is that
+it's super easy to debug since all execution takes place in the process.
+It also makes deployment a non-event.
+During development,
+you typically run the Luigi workflow from the command line,
+whereas when you deploy it,
+you can trigger it using crontab or any other scheduler.
 
-The downside is that Luigi doesn't give you scalability for free, but we
-think that should really be up to each Task to implement rather than
-relying on Luigi as a scalability engine. Another downside is that you
-have to rely on an external scheduler such as crontab to actually
-trigger the workflows.
+The downside is that Luigi doesn't give you scalability for free, but
+we think that should really be up to each Task to implement rather than
+relying on Luigi as a scalability engine.
+Another downside is that you have to rely on an external scheduler
+such as crontab to actually trigger the workflows.
 
-Isn't the point of Luigi to automate and schedule these workflows? Not
-necessarily. Luigi helps you *encode the dependencies* of tasks and
-build up chains. Furthermore, Luigi's scheduler makes sure that there's
-centralized view of the dependency graph and that the same job will not
-be executed by multiple workers simultaneously.
+Isn't the point of Luigi to automate and schedule these workflows?
+Not necessarily.
+Luigi helps you *encode the dependencies* of tasks and
+build up chains.
+Furthermore, Luigi's scheduler makes sure that there's centralized view of the dependency graph and
+that the same job will not be executed by multiple workers simultaneously.
 
-This means that scheduling a complex workflow is fairly trivial using
-eg. crontab. If you have an external data dump that arrives every day
-and that your workflow depends on it, you write a workflow that depends
-on this data dump. Crontab can then trigger this workflow *every minute*
-to check if the data has arrived. If it has, it will run the full
-dependency graph.
+This means that scheduling a complex workflow is fairly trivial using eg. crontab.
+If you have an external data dump that arrives every day and that your workflow depends on it,
+you write a workflow that depends on this data dump.
+Crontab can then trigger this workflow *every minute* to check if the data has arrived.
+If it has, it will run the full dependency graph.
 
 .. code:: python
 
@@ -53,13 +57,14 @@ dependency graph.
     if __name__ == '__main__':
         luigi.run(main_task_cls=RunAll)
 
-You can trigger this as much as you want from crontab, and even across
-multiple machines, because the central scheduler will make sure at most
-one of each ``AggregationTask`` task is run simultaneously. Note that
-this might actually mean multiple tasks can be run because there are
-instances with different parameters, and this can gives you some form of
-parallelization (eg. ``AggregationTask(2013-01-09)`` might run in
-parallel with ``AggregationTask(2013-01-08)``).
+You can trigger this as much as you want from crontab, and
+even across multiple machines, because
+the central scheduler will make sure at most one of each ``AggregationTask`` task is run simultaneously.
+Note that this might actually mean multiple tasks can be run because
+there are instances with different parameters, and
+this can gives you some form of parallelization
+(eg. ``AggregationTask(2013-01-09)`` might run in parallel with ``AggregationTask(2013-01-08)``).
 
-Of course, some Task types (eg. ``HadoopJobTask``) can transfer
-execution to other places, but this is up to each Task to define.
+Of course,
+some Task types (eg. ``HadoopJobTask``) can transfer execution to other places, but
+this is up to each Task to define.
diff --git a/doc/history.png b/doc/history.png
new file mode 100644
index 0000000000..e8173fcf85
Binary files /dev/null and b/doc/history.png differ
diff --git a/doc/history_by_id.png b/doc/history_by_id.png
new file mode 100644
index 0000000000..97a90f1cc5
Binary files /dev/null and b/doc/history_by_id.png differ
diff --git a/doc/history_by_name.png b/doc/history_by_name.png
new file mode 100644
index 0000000000..5bef1291a4
Binary files /dev/null and b/doc/history_by_name.png differ
diff --git a/doc/index.rst b/doc/index.rst
index a540eb5d31..aa4aa95ecc 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -3,7 +3,7 @@
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
 
-.. include:: ../README.rst
+.. include:: README.rst
 
 Table of Contents
 -----------------
@@ -30,6 +30,7 @@ API Reference
 
    luigi
    luigi.contrib
+   luigi.tools
 
 
 Indices and tables
diff --git a/doc/luigi_patterns.rst b/doc/luigi_patterns.rst
index d547e753bc..04ec0770fd 100644
--- a/doc/luigi_patterns.rst
+++ b/doc/luigi_patterns.rst
@@ -4,27 +4,26 @@ Luigi Patterns
 Code Reuse
 ~~~~~~~~~~
 
-One nice thing about Luigi is that it's super easy to depend on tasks
-defined in other repos. It's also trivial to have "forks" in the
-execution path, where the output of one task may become the input of
-many other tasks.
-
-Currently no semantics for "intermediate" output is supported, meaning
-that all output will be persisted indefinitely. The upside of that is
-that if you try to run X -> Y, and Y crashes, you can resume with the
-previously built X. The downside is that you will have a lot of
-intermediate results on your file system. A useful pattern is to put
-these files in a special directory and have some kind of periodical
-garbage collection clean it up.
+One nice thing about Luigi is that it's super easy to depend on tasks defined in other repos.
+It's also trivial to have "forks" in the execution path,
+where the output of one task may become the input of many other tasks.
+
+Currently no semantics for "intermediate" output is supported,
+meaning that all output will be persisted indefinitely.
+The upside of that is that if you try to run X -> Y, and Y crashes,
+you can resume with the previously built X.
+The downside is that you will have a lot of intermediate results on your file system.
+A useful pattern is to put these files in a special directory and
+have some kind of periodical garbage collection clean it up.
 
 Triggering Many Tasks
 ~~~~~~~~~~~~~~~~~~~~~
 
-A common use case is to make sure some daily Hadoop job (or something
-else) is run every night. Sometimes for various reasons things will
-crash for more than a day though. A useful pattern is to have a dummy
-Task at the end just declaring dependencies on the past few days of
-tasks you want to run.
+A common use case is to make sure some daily Hadoop job
+(or something else) is run every night.
+Sometimes for various reasons things will crash for more than a day though.
+A useful pattern is to have a dummy Task at the end
+just declaring dependencies on the past few days of tasks you want to run.
 
 .. code:: python
 
@@ -36,5 +35,5 @@ tasks you want to run.
                date = self.date - datetime.timedelta(i + 1)
                yield SomeReport(date), SomeOtherReport(date), CropReport(date), TPSReport(date), FooBarBazReport(date)    
 
-This simple task will not do anything itself, but will invoke a bunch of
-other tasks.
\ No newline at end of file
+This simple task will not do anything itself, but
+will invoke a bunch of other tasks.
\ No newline at end of file
diff --git a/doc/more_info.rst b/doc/more_info.rst
index 84341776ca..fad28a063a 100644
--- a/doc/more_info.rst
+++ b/doc/more_info.rst
@@ -2,49 +2,41 @@
 More Info
 ---------
 
-Luigi is the successor to a couple of attempts that we weren't fully
-happy with. We learned a lot from our mistakes and some design decisions
-include:
+Luigi is the successor to a couple of attempts that we weren't fully happy with.
+We learned a lot from our mistakes and some design decisions include:
 
 -  Straightforward command line integration.
 -  As little boilerplate as possible.
--  Focus on job scheduling and dependency resolution, not a particular
-   platform. In particular this means no limitation to Hadoop. Though
-   Hadoop/HDFS support is built-in and is easy to use, this is just one
-   of many types of things you can run.
--  A file system abstraction where code doesn't have to care about where
-   files are located.
--  Atomic file system operations through this abstraction. If a task
-   crashes it won't lead to a broken state.
--  The dependencies are decentralized. No big config file in XML. Each
-   task just specifies which inputs it needs and cross-module
-   dependencies are trivial.
--  A web server that renders the dependency graph and does locking etc
-   for free.
+-  Focus on job scheduling and dependency resolution, not a particular platform.
+   In particular this means no limitation to Hadoop.
+   Though Hadoop/HDFS support is built-in and is easy to use,
+   this is just one of many types of things you can run.
+-  A file system abstraction where code doesn't have to care about where files are located.
+-  Atomic file system operations through this abstraction.
+   If a task crashes it won't lead to a broken state.
+-  The dependencies are decentralized.
+   No big config file in XML.
+   Each task just specifies which inputs it needs and cross-module dependencies are trivial.
+-  A web server that renders the dependency graph and does locking etc for free.
 -  Trivial to extend with new file systems, file formats and job types.
-   You can easily write jobs that inserts a Tokyo Cabinet into
-   Cassandra. Adding broad support S3, MySQL or Hive should be a stroll
-   in the park. (Feel free to send us a patch when you're done!)
+   You can easily write jobs that inserts a Tokyo Cabinet into Cassandra.
+   Adding broad support S3, MySQL or Hive should be a stroll in the park.
+   (Feel free to send us a patch when you're done!)
 -  Date algebra included.
 -  Lots of unit tests of the most basic stuff
 
-It wouldn't be fair not to mention some limitations with the current
-design:
+It wouldn't be fair not to mention some limitations with the current design:
 
--  Its focus is on batch processing so it's probably less useful for
-   near real-time pipelines or continuously running processes.
--  The assumption is that a each task is a sizable chunk of work. While
-   you can probably schedule a few thousand jobs, it's not meant to
-   scale beyond tens of thousands.
--  Luigi maintains a strict separation between scheduling tasks and
-   running them. Dynamic for-loops and branches are non-trivial to
-   implement. For instance, it's tricky to iterate a numerical
-   computation task until it converges.
+-  Its focus is on batch processing so
+   it's probably less useful for near real-time pipelines or continuously running processes.
+-  The assumption is that a each task is a sizable chunk of work.
+   While you can probably schedule a few thousand jobs,
+   it's not meant to scale beyond tens of thousands.
+-  Luigi maintains a strict separation between scheduling tasks and running them.
+   Dynamic for-loops and branches are non-trivial to implement.
+   For instance, it's tricky to iterate a numerical computation task until it converges.
 
-It should actually be noted that all these limitations are not
-fundamental in any way. However, it would take some major refactoring
-work.
-
-Also it should be mentioned that Luigi is named after the world's second
-most famous plumber.
+It should actually be noted that all these limitations are not fundamental in any way.
+However, it would take some major refactoring work.
 
+Also it should be mentioned that Luigi is named after the world's second most famous plumber.
diff --git a/doc/programmatic_execution.rst b/doc/programmatic_execution.rst
index 5c72e06b12..86894e9720 100644
--- a/doc/programmatic_execution.rst
+++ b/doc/programmatic_execution.rst
@@ -11,9 +11,9 @@ As seen above, command line integration is achieved by simply adding
 This will read the args from the command line (using argparse) and
 invoke everything.
 
-In case you just want to run a Luigi chain from a Python script, you can
-do that internally without the command line integration. The code will
-look something like
+In case you just want to run a Luigi chain from a Python script,
+you can do that internally without the command line integration.
+The code will look something like
 
 .. code:: python
 
diff --git a/examples/dynamic_requirements.py b/examples/dynamic_requirements.py
index f043910a68..6b56b74c17 100644
--- a/examples/dynamic_requirements.py
+++ b/examples/dynamic_requirements.py
@@ -1,7 +1,8 @@
-import luigi
 import random as rnd
 import time
 
+import luigi
+
 
 class Config(luigi.Task):
     seed = luigi.IntParameter()
diff --git a/examples/elasticsearch_index.py b/examples/elasticsearch_index.py
index 5bbcb21be1..264f26a155 100644
--- a/examples/elasticsearch_index.py
+++ b/examples/elasticsearch_index.py
@@ -1,11 +1,14 @@
 # coding: utf-8
 
-from luigi.contrib.esindex import CopyToIndex
 import datetime
 import json
+
 import luigi
+from luigi.contrib.esindex import CopyToIndex
+
 
 class FakeDocuments(luigi.Task):
+
     """ Generate some documents to index. """
 
     date = luigi.DateParameter(default=datetime.date.today())
@@ -23,7 +26,9 @@ def run(self):
     def output(self):
         return luigi.LocalTarget(path='/tmp/_docs-%s.ldj' % self.date)
 
+
 class IndexDocuments(CopyToIndex):
+
     """
     Run
 
diff --git a/examples/foo.py b/examples/foo.py
index 2ddd5d49a1..fb51670727 100644
--- a/examples/foo.py
+++ b/examples/foo.py
@@ -1,20 +1,23 @@
-import luigi
-import time
 import os
 import shutil
+import time
+
+import luigi
 
 
 class MyExternal(luigi.ExternalTask):
+
     def complete(self):
         return False
 
 
 class Foo(luigi.Task):
+
     def run(self):
         print "Running Foo"
 
     def requires(self):
-#        yield MyExternal()
+        #        yield MyExternal()
         for i in xrange(10):
             yield Bar(i)
 
diff --git a/examples/ftp_experiment_outputs.py b/examples/ftp_experiment_outputs.py
index c485cd9041..b80199dc10 100644
--- a/examples/ftp_experiment_outputs.py
+++ b/examples/ftp_experiment_outputs.py
@@ -8,9 +8,11 @@
 
 
 class ExperimentTask(luigi.ExternalTask):
+
     ''' This class represents something that was created elsewhere by an external process,
     so all we want to do is to implement the output method.
     '''
+
     def output(self):
         return RemoteTarget('/experiment/output1.txt', HOST, username=USER, password=PWD)
 
@@ -23,9 +25,11 @@ def run(self):
 
 
 class ProcessingTask(luigi.Task):
+
     ''' This class represents something that was created elsewhere by an external process,
     so all we want to do is to implement the output method.
     '''
+
     def requires(self):
         return ExperimentTask()
 
diff --git a/examples/spark_als.py b/examples/spark_als.py
index 1e66ba0a00..ca4c942a7e 100644
--- a/examples/spark_als.py
+++ b/examples/spark_als.py
@@ -48,7 +48,6 @@ def output(self):
             '%s/als-output/*' % self.item_type, format=luigi.format.Gzip)
 
 
-
 '''
 // Corresponding example Spark Job, a wrapper around the MLLib ALS job.
 // This class would have to be jarred into my-spark-assembly.jar
@@ -85,4 +84,4 @@ def main(args: Array[String]) {
     sc.stop()
   }
 }
-'''
\ No newline at end of file
+'''
diff --git a/examples/ssh_remote_execution.py b/examples/ssh_remote_execution.py
index 5db3c5ca95..4dd5011a3f 100644
--- a/examples/ssh_remote_execution.py
+++ b/examples/ssh_remote_execution.py
@@ -1,4 +1,5 @@
 from collections import defaultdict
+
 import luigi
 from luigi.contrib.ssh import RemoteContext, RemoteTarget
 from luigi.mock import MockFile
@@ -7,9 +8,11 @@
 
 
 class CreateRemoteData(luigi.Task):
+
     """ Dump info on running processes on remote host.
     Data is still stored on the remote host
     """
+
     def output(self):
         return RemoteTarget(
             "/tmp/stuff",
@@ -24,11 +27,13 @@ def run(self):
 
 
 class ProcessRemoteData(luigi.Task):
+
     """ Create a toplist of users based on how many running processes they have
         on a remote machine
 
     In this example the processed data is stored in a MockFile
     """
+
     def requires(self):
         return CreateRemoteData()
 
diff --git a/examples/terasort.py b/examples/terasort.py
index e274820bbd..12a7c5870a 100644
--- a/examples/terasort.py
+++ b/examples/terasort.py
@@ -25,11 +25,12 @@ def hadoop_examples_jar():
 
 
 class TeraGen(luigi.hadoop_jar.HadoopJarJobTask):
+
     """Runs TeraGen, by default with 1TB of data (10B records)"""
     records = luigi.Parameter(default="10000000000",
-        description="Number of records, each record is 100 Bytes")
+                              description="Number of records, each record is 100 Bytes")
     terasort_in = luigi.Parameter(default=DEFAULT_TERASORT_IN,
-        description="directory to store terasort input into.")
+                                  description="directory to store terasort input into.")
 
     def output(self):
         return luigi.hdfs.HdfsTarget(self.terasort_in)
@@ -46,12 +47,13 @@ def args(self):
 
 
 class TeraSort(luigi.hadoop_jar.HadoopJarJobTask):
+
     """Runs TeraGent, by default using """
 
     terasort_in = luigi.Parameter(default=DEFAULT_TERASORT_IN,
-        description="directory to store terasort input into.")
+                                  description="directory to store terasort input into.")
     terasort_out = luigi.Parameter(default=DEFAULT_TERASORT_OUT,
-        description="directory to store terasort output into.")
+                                   description="directory to store terasort output into.")
 
     def requires(self):
         return TeraGen(terasort_in=self.terasort_in)
diff --git a/examples/top_artists.py b/examples/top_artists.py
index a9c4565d26..22c39e365e 100755
--- a/examples/top_artists.py
+++ b/examples/top_artists.py
@@ -1,10 +1,15 @@
 import random
-import luigi, luigi.hdfs, luigi.hadoop
-import luigi.postgres
-from heapq import nlargest
 from collections import defaultdict
+from heapq import nlargest
+
+import luigi
+import luigi.hadoop
+import luigi.hdfs
+import luigi.postgres
+
 
 class ExternalStreams(luigi.ExternalTask):
+
     ''' Example of a possible external data dump
 
     To depend on external targets (typically at the top of your dependency graph), you can define
@@ -16,14 +21,16 @@ def output(self):
         return luigi.hdfs.HdfsTarget(self.date.strftime(
             'data/streams_%Y-%m-%d.tsv'))
 
+
 class Streams(luigi.Task):
+
     ''' Faked version right now, just generates bogus data.
     '''
     date = luigi.DateParameter()
 
     def run(self):
         with self.output().open('w') as output:
-            for i in xrange(1000):
+            for _ in xrange(1000):
                 output.write('{} {} {}\n'.format(
                     random.randint(0, 999),
                     random.randint(0, 999),
@@ -33,10 +40,13 @@ def output(self):
         return luigi.LocalTarget(self.date.strftime(
             'data/streams_%Y_%m_%d_faked.tsv'))
 
+
 class StreamsHdfs(Streams):
+
     def output(self):
         return luigi.hdfs.HdfsTarget(self.date.strftime('data/streams_%Y_%m_%d_faked.tsv'))
 
+
 class AggregateArtists(luigi.Task):
     date_interval = luigi.DateIntervalParameter()
 
@@ -50,16 +60,17 @@ def requires(self):
     def run(self):
         artist_count = defaultdict(int)
 
-        for input in self.input():
-            with input.open('r') as in_file:
+        for t in self.input():
+            with t.open('r') as in_file:
                 for line in in_file:
-                    timestamp, artist, track = line.strip().split()
+                    _, artist, track = line.strip().split()
                     artist_count[artist] += 1
 
         with self.output().open('w') as out_file:
             for artist, count in artist_count.iteritems():
                 out_file.write('{}\t{}\n'.format(artist, count))
 
+
 class AggregateArtistsHadoop(luigi.hadoop.JobTask):
     date_interval = luigi.DateIntervalParameter()
 
@@ -79,9 +90,10 @@ def mapper(self, line):
     def reducer(self, key, values):
         yield key, sum(values)
 
+
 class Top10Artists(luigi.Task):
     date_interval = luigi.DateIntervalParameter()
-    use_hadoop = luigi.BooleanParameter()
+    use_hadoop = luigi.BoolParameter()
 
     def requires(self):
         if self.use_hadoop:
@@ -110,9 +122,10 @@ def _input_iterator(self):
                 artist, streams = line.strip().split()
                 yield int(streams), artist
 
+
 class ArtistToplistToDatabase(luigi.postgres.CopyToTable):
     date_interval = luigi.DateIntervalParameter()
-    use_hadoop = luigi.BooleanParameter()
+    use_hadoop = luigi.BoolParameter()
 
     host = "localhost"
     database = "toplists"
diff --git a/examples/wordcount.py b/examples/wordcount.py
index 92472d6aa8..a0078203f3 100644
--- a/examples/wordcount.py
+++ b/examples/wordcount.py
@@ -2,13 +2,16 @@
 
 
 class InputText(luigi.ExternalTask):
+
     ''' This class represents something that was created elsewhere by an external process,
     so all we want to do is to implement the output method.
     '''
     date = luigi.DateParameter()
+
     def output(self):
         return luigi.LocalTarget(self.date.strftime('/var/tmp/text/%Y-%m-%d.txt'))
 
+
 class WordCount(luigi.Task):
     date_interval = luigi.DateIntervalParameter()
 
@@ -20,8 +23,8 @@ def output(self):
 
     def run(self):
         count = {}
-        for file in self.input(): # The input() method is a wrapper around requires() that returns Target objects
-            for line in file.open('r'): # Target objects are a file system/format abstraction and this will return a file stream object
+        for f in self.input():  # The input() method is a wrapper around requires() that returns Target objects
+            for line in f.open('r'):  # Target objects are a file system/format abstraction and this will return a file stream object
                 for word in line.strip().split():
                     count[word] = count.get(word, 0) + 1
 
@@ -29,7 +32,7 @@ def run(self):
         f = self.output().open('w')
         for word, count in count.iteritems():
             f.write("%s\t%d\n" % (word, count))
-        f.close() # Note that this is essential because file system operations are atomic
+        f.close()  # Note that this is essential because file system operations are atomic
 
 if __name__ == '__main__':
     luigi.run(main_task_cls=WordCount)
diff --git a/examples/wordcount_hadoop.py b/examples/wordcount_hadoop.py
index b11e5a093c..7a89b3e7f4 100644
--- a/examples/wordcount_hadoop.py
+++ b/examples/wordcount_hadoop.py
@@ -1,15 +1,21 @@
-import luigi, luigi.hadoop, luigi.hdfs
+import luigi
+import luigi.hadoop
+import luigi.hdfs
+
 
 # To make this run, you probably want to edit /etc/luigi/client.cfg and add something like:
 #
 # [hadoop]
 # jar: /usr/lib/hadoop-xyz/hadoop-streaming-xyz-123.jar
 
+
 class InputText(luigi.ExternalTask):
     date = luigi.DateParameter()
+
     def output(self):
         return luigi.hdfs.HdfsTarget(self.date.strftime('/tmp/text/%Y-%m-%d.txt'))
 
+
 class WordCount(luigi.hadoop.JobTask):
     date_interval = luigi.DateIntervalParameter()
 
diff --git a/luigi/__init__.py b/luigi/__init__.py
index ced49b85e9..efa5442ddb 100644
--- a/luigi/__init__.py
+++ b/luigi/__init__.py
@@ -1,4 +1,4 @@
-"""Package containing core luigi functionality"""
+""" Package containing core luigi functionality."""
 # Copyright (c) 2012 Spotify AB
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not
@@ -25,6 +25,8 @@
 Event = event.Event
 
 Task = task.Task
+Config = task.Config
+ConfigWithoutSection = task.ConfigWithoutSection
 ExternalTask = task.ExternalTask
 WrapperTask = task.WrapperTask
 Target = target.Target
@@ -40,10 +42,12 @@
 
 # TODO: how can we get rid of these?
 DateHourParameter = parameter.DateHourParameter
+DateMinuteParameter = parameter.DateMinuteParameter
 DateParameter = parameter.DateParameter
 IntParameter = parameter.IntParameter
 FloatParameter = parameter.FloatParameter
-BooleanParameter = parameter.BooleanParameter
+BooleanParameter = parameter.BooleanParameter  # backward compatibility
+BoolParameter = parameter.BoolParameter
 DateIntervalParameter = parameter.DateIntervalParameter
 TimeDeltaParameter = parameter.TimeDeltaParameter
 
diff --git a/luigi/configuration.py b/luigi/configuration.py
index c3859230b4..0b4171c063 100644
--- a/luigi/configuration.py
+++ b/luigi/configuration.py
@@ -1,6 +1,19 @@
+# Copyright (c) 2015 Spotify AB
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
 
-import os
 import logging
+import os
 from ConfigParser import ConfigParser, NoOptionError, NoSectionError
 
 
@@ -31,9 +44,12 @@ def reload(cls):
         return cls.instance().read(cls._config_paths)
 
     def _get_with_default(self, method, section, option, default, expected_type=None):
-        """ Gets the value of the section/option using method. Returns default if value
-        is not found. Raises an exception if the default value is not None and doesn't match
-        the expected_type.
+        """
+        Gets the value of the section/option using method.
+
+        Returns default if value is not found.
+
+        Raises an exception if the default value is not None and doesn't match the expected_type.
         """
         try:
             return method(self, section, option)
@@ -63,12 +79,15 @@ def getintdict(self, section):
         except NoSectionError:
             return {}
 
-    def set(self, section, option, value):
+    def set(self, section, option, value=None):
         if not ConfigParser.has_section(self, section):
             ConfigParser.add_section(self, section)
 
         return ConfigParser.set(self, section, option, value)
 
+
 def get_config():
-    """ Convenience method (for backwards compatibility) for accessing config singleton """
+    """
+    Convenience method (for backwards compatibility) for accessing config singleton.
+    """
     return LuigiConfigParser.instance()
diff --git a/luigi/contrib/__init__.py b/luigi/contrib/__init__.py
index 2bbd518b54..44e46acef7 100644
--- a/luigi/contrib/__init__.py
+++ b/luigi/contrib/__init__.py
@@ -1 +1,3 @@
-"""Package containing optional and-on functionality."""
\ No newline at end of file
+"""
+Package containing optional and-on functionality.
+"""
diff --git a/luigi/contrib/esindex.py b/luigi/contrib/esindex.py
index 4585e50fcb..c5bd164c99 100644
--- a/luigi/contrib/esindex.py
+++ b/luigi/contrib/esindex.py
@@ -6,10 +6,11 @@
 Provides an `ElasticsearchTarget` and a `CopyToIndex` template task.
 
 Modeled after `luigi.contrib.rdbms.CopyToTable`.
-----
 
 A minimal example (assuming elasticsearch is running on localhost:9200):
 
+.. code-block:: python
+
     class ExampleIndex(CopyToIndex):
         index = 'example'
 
@@ -20,10 +21,10 @@ def docs(self):
         task = ExampleIndex()
         luigi.build([task], local_scheduler=True)
 
-----
-
 All options:
 
+.. code-block:: python
+
     class ExampleIndex(CopyToIndex):
         host = 'localhost'
         port = 9200
@@ -39,22 +40,25 @@ def docs(self):
         task = ExampleIndex()
         luigi.build([task], local_scheduler=True)
 
-----
-
 `Host`, `port`, `index`, `doc_type` parameters are standard elasticsearch.
 
 `purge_existing_index` will delete the index, whenever an update is required.
-This is useful, when one deals with "dumps" that represent the whole data,
-not just updates.
+This is useful, when one deals with "dumps" that represent the whole data, not just updates.
 
 `marker_index_hist_size` sets the maximum number of entries in the 'marker'
-index. Keep all updates by default (0). Use 1 to only remember the most recent
-update to the index. This can be useful, if an index needs to recreated, even
-though the corresponding indexing task has been run sometime in the past - but
+index:
+
+* 0 (default) keeps all updates,
+* 1 to only remember the most recent update to the index.
+
+This can be useful, if an index needs to recreated, even though
+the corresponding indexing task has been run sometime in the past - but
 a later indexing task might have altered the index in the meantime.
 
 There are a two luigi `client.cfg` configuration options:
 
+.. code-block:: ini
+
     [elasticsearch]
 
     marker-index = update_log
@@ -68,6 +72,7 @@ def docs(self):
 import hashlib
 import json
 import logging
+
 import luigi
 
 logger = logging.getLogger('luigi-interface')
@@ -85,23 +90,28 @@ def docs(self):
 
 
 class ElasticsearchTarget(luigi.Target):
-    """ Target for a resource in Elasticsearch. """
+    """ Target for a resource in Elasticsearch."""
 
     marker_index = luigi.configuration.get_config().get('elasticsearch',
-                                        'marker-index', 'update_log')
+                                                        'marker-index', 'update_log')
     marker_doc_type = luigi.configuration.get_config().get('elasticsearch',
-                                        'marker-doc-type', 'entry')
+                                                           'marker-doc-type', 'entry')
 
     def __init__(self, host, port, index, doc_type, update_id,
                  marker_index_hist_size=0, http_auth=None):
         """
-        Args:
-            host (str): Elasticsearch server host
-            port (int): Elasticsearch server port
-            index (str): Index name
-            doc_type (str): Doctype name
-            update_id (str): An identifier for this data set
-            marker_index_hist_size (int): List of changes to the index to remember
+        :param host: Elasticsearch server host
+        :type host: str
+        :param port: Elasticsearch server port
+        :type port: int
+        :param index: index name
+        :type index: str
+        :param doc_type: doctype name
+        :type doc_type: str
+        :param update_id: an identifier for this data set
+        :type update_id: str
+        :param marker_index_hist_size: list of changes to the index to remember
+        :type marker_index_hist_size: int
         """
         self.host = host
         self.port = port
@@ -119,28 +129,34 @@ def __init__(self, host, port, index, doc_type, update_id,
         )
 
     def marker_index_document_id(self):
-        """ Generate an id for the indicator document. """
+        """
+        Generate an id for the indicator document.
+        """
         params = '%s:%s:%s' % (self.index, self.doc_type, self.update_id)
         return hashlib.sha1(params).hexdigest()
 
     def touch(self):
-        """ Mark this update as complete. The document id would be sufficent,
-        but we index the parameters
-
-            (update_id, target_index, target_doc_type, date)
+        """
+        Mark this update as complete.
 
-        as well for documentation. """
+        The document id would be sufficent but,
+        for documentation,
+        we index the parameters `update_id`, `target_index`, `target_doc_type` and `date` as well.
+        """
         self.create_marker_index()
         self.es.index(index=self.marker_index, doc_type=self.marker_doc_type,
                       id=self.marker_index_document_id(), body={
-                      'update_id': self.update_id, 'target_index': self.index,
-                      'target_doc_type': self.doc_type,
-                      'date': datetime.datetime.now()})
+                          'update_id': self.update_id,
+                          'target_index': self.index,
+                          'target_doc_type': self.doc_type,
+                          'date': datetime.datetime.now()})
         self.es.indices.flush(index=self.marker_index)
         self.ensure_hist_size()
 
     def exists(self):
-        """ Test, if this task has been run. """
+        """
+        Test, if this task has been run.
+        """
         try:
             _ = self.es.get(index=self.marker_index,
                             doc_type=self.marker_doc_type,
@@ -153,13 +169,17 @@ def exists(self):
         return False
 
     def create_marker_index(self):
-        """ Create the index that will keep track of the tasks if necessary. """
+        """
+        Create the index that will keep track of the tasks if necessary.
+        """
         if not self.es.indices.exists(index=self.marker_index):
             self.es.indices.create(index=self.marker_index)
 
     def ensure_hist_size(self):
-        """ Shrink the history of updates for a `index/doc_type` combination
-        down to `self.marker_index_hist_size`. """
+        """
+        Shrink the history of updates for
+        a `index/doc_type` combination down to `self.marker_index_hist_size`.
+        """
         if self.marker_index_hist_size == 0:
             return
         result = self.es.search(index=self.marker_index,
@@ -184,10 +204,10 @@ class CopyToIndex(luigi.Task):
 
     1. Subclass and override the required `index` attribute.
 
-    2. Implement a custom `docs` method, that returns an iterable over
-    the documents. A document can be a JSON string, e.g. from
-    a newline-delimited JSON (ldj) file (default implementation) or some
-    dictionary.
+    2. Implement a custom `docs` method, that returns an iterable over the documents.
+       A document can be a JSON string,
+       e.g. from a newline-delimited JSON (ldj) file (default implementation)
+       or some dictionary.
 
     Optional attributes:
 
@@ -206,71 +226,97 @@ class CopyToIndex(luigi.Task):
 
     @property
     def host(self):
-        """ ES hostname """
+        """
+        ES hostname.
+        """
         return 'localhost'
 
     @property
     def port(self):
-        """ ES port """
+        """
+        ES port.
+        """
         return 9200
 
     @property
     def http_auth(self):
         """
-        ES optional http auth information
-        as either ‘:’ separated string or a tuple.
-        eg: ` ('user', 'pass') `  or ` "user:pass" `
+        ES optional http auth information as either ‘:’ separated string or a tuple,
+        e.g. `('user', 'pass')` or `"user:pass"`.
         """
         return None
 
     @abc.abstractproperty
     def index(self):
-        """ The target index. May exists or not. """
+        """
+        The target index.
+
+        May exist or not.
+        """
         return None
 
     @property
     def doc_type(self):
-        """ The target doc_type. """
+        """
+        The target doc_type.
+        """
         return 'default'
 
     @property
     def mapping(self):
-        """ Dictionary with custom mapping or `None`. """
+        """
+        Dictionary with custom mapping or `None`.
+        """
         return None
 
     @property
     def settings(self):
-        """ Settings to be used at index creation time. """
+        """
+        Settings to be used at index creation time.
+        """
         return {'settings': {}}
 
     @property
     def chunk_size(self):
-        """ Single API call for this number of docs. """
+        """
+        Single API call for this number of docs.
+        """
         return 2000
 
     @property
     def raise_on_error(self):
-        """ Whether to fail fast. """
+        """
+        Whether to fail fast.
+        """
         return True
 
     @property
     def purge_existing_index(self):
-        """ Whether to delete the `index` completely before any indexing. """
+        """
+        Whether to delete the `index` completely before any indexing.
+        """
         return False
 
     @property
     def marker_index_hist_size(self):
-        """ Number of event log entries in the marker index. 0: unlimited. """
+        """
+        Number of event log entries in the marker index. 0: unlimited.
+        """
         return 0
 
     @property
     def timeout(self):
-        """ Timeout. """
+        """
+        Timeout.
+        """
         return 10
 
     def docs(self):
-        """ Return the documents to be indexed. Beside the user defined
-        fields, the document may contain an `_index`, `_type` and `_id`. """
+        """
+        Return the documents to be indexed.
+
+        Beside the user defined fields, the document may contain an `_index`, `_type` and `_id`.
+        """
         with self.input().open('r') as fobj:
             for line in fobj:
                 yield line
@@ -278,8 +324,9 @@ def docs(self):
 # everything below will rarely have to be overridden
 
     def _docs(self):
-        """ Since `self.docs` may yield documents that do not explicitly
-        contain `_index` or `_type`, add those attributes here, if necessary.
+        """
+        Since `self.docs` may yield documents that do not explicitly contain `_index` or `_type`,
+        add those attributes here, if necessary.
         """
         first = iter(self.docs()).next()
         needs_parsing = False
@@ -308,7 +355,8 @@ def _init_connection(self):
         )
 
     def create_index(self):
-        """ Override to provide code for creating the target index.
+        """
+        Override to provide code for creating the target index.
 
         By default it will be created without any special settings or mappings.
         """
@@ -317,17 +365,22 @@ def create_index(self):
             es.indices.create(index=self.index, body=self.settings)
 
     def delete_index(self):
-        """ Delete the index, if it exists. """
+        """
+        Delete the index, if it exists.
+        """
         es = self._init_connection()
         if es.indices.exists(index=self.index):
             es.indices.delete(index=self.index)
 
     def update_id(self):
-        """ This id will be a unique identifier for this indexing task."""
+        """
+        This id will be a unique identifier for this indexing task.
+        """
         return self.task_id
 
     def output(self):
-        """ Returns a ElasticsearchTarget representing the inserted dataset.
+        """
+        Returns a ElasticsearchTarget representing the inserted dataset.
 
         Normally you don't override this.
         """
@@ -339,15 +392,20 @@ def output(self):
             doc_type=self.doc_type,
             update_id=self.update_id(),
             marker_index_hist_size=self.marker_index_hist_size
-         )
+        )
 
     def run(self):
-        """ Purge existing index, if requested (`purge_existing_index`).
-        Create the index, if missing. Apply mappings, if given.
-        Set refresh interval to -1 (disable) for performance reasons.
-        Bulk index in batches of size `chunk_size` (2000).
-        Set refresh interval to 1s. Refresh Elasticsearch.
-        Create entry in marker index.
+        """
+        Run task, namely:
+
+        * purge existing index, if requested (`purge_existing_index`),
+        * create the index, if missing,
+        * apply mappings, if given,
+        * set refresh interval to -1 (disable) for performance reasons,
+        * bulk index in batches of size `chunk_size` (2000),
+        * set refresh interval to 1s,
+        * refresh Elasticsearch,
+        * create entry in marker index.
         """
         if self.purge_existing_index:
             self.delete_index()
diff --git a/luigi/contrib/ftp.py b/luigi/contrib/ftp.py
index ff9f68da2e..53bc6c9e62 100644
--- a/luigi/contrib/ftp.py
+++ b/luigi/contrib/ftp.py
@@ -1,5 +1,6 @@
 """
-This library is a wrapper of ftplib. It is convenient to move data from/to FTP.
+This library is a wrapper of ftplib.
+It is convenient to move data from/to FTP.
 
 There is an example on how to use it (example/ftp_experiment_outputs.py)
 
@@ -7,37 +8,62 @@
 
 Be aware that normal ftp do not provide secure communication.
 """
+import datetime
+import ftplib
 import os
 import random
-import ftplib
+
 import luigi
-import luigi.target
 import luigi.format
+import luigi.target
 from luigi.format import FileWrapper
 
 
 class RemoteFileSystem(luigi.target.FileSystem):
-    def __init__(self, host, username=None, password=None):
+
+    def __init__(self, host, username=None, password=None, port=21, tls=False):
         self.host = host
         self.username = username
         self.password = password
+        self.port = port
+        self.tls = tls
 
     def _connect(self):
-        """ Log in to ftp """
-        self.ftpcon = ftplib.FTP(self.host, self.username, self.password)
+        """
+        Log in to ftp.
+        """
+        if self.tls:
+            self.ftpcon = ftplib.FTP_TLS()
+        else:
+            self.ftpcon = ftplib.FTP()
+        self.ftpcon.connect(self.host, self.port)
+        self.ftpcon.login(self.username, self.password)
+        if self.tls:
+            self.ftpcon.prot_p()
+
+    def exists(self, path, mtime=None):
+        """
+        Return `True` if file or directory at `path` exist, False otherwise.
+
+        Additional check on modified time when mtime is passed in.
 
-    def exists(self, path):
-        """ Return `True` if file or directory at `path` exist, False otherwise """
+        Return False if the file's modified time is older mtime.
+        """
         self._connect()
         files = self.ftpcon.nlst(path)
 
-        # empty list, means do not exists
-        if not files:
-            return False
+        result = False
+        if files:
+            if mtime:
+                mdtm = self.ftpcon.sendcmd('MDTM ' + path)
+                modified = datetime.datetime.strptime(mdtm[4:], "%Y%m%d%H%M%S")
+                result = modified > mtime
+            else:
+                result = True
 
         self.ftpcon.quit()
 
-        return True
+        return result
 
     def _rm_recursive(self, ftp, path):
         """
@@ -70,11 +96,14 @@ def _rm_recursive(self, ftp, path):
             print('_rm_recursive: Could not remove {0}: {1}'.format(path, e))
 
     def remove(self, path, recursive=True):
-        """ Remove file or directory at location ``path``
+        """
+        Remove file or directory at location ``path``.
 
-        :param str path: a path within the FileSystem to remove.
-        :param bool recursive: if the path is a directory, recursively remove the directory and all
-                               of its descendants. Defaults to ``True``.
+        :param path: a path within the FileSystem to remove.
+        :type path: str
+        :param recursive: if the path is a directory, recursively remove the directory and
+                          all of its descendants. Defaults to ``True``.
+        :type recursive: bool
         """
         self._connect()
 
@@ -82,7 +111,7 @@ def remove(self, path, recursive=True):
             self._rm_recursive(self.ftpcon, path)
         else:
             try:
-                #try delete file
+                # try delete file
                 self.ftpcon.delete(path)
             except ftplib.all_errors:
                 # it is a folder, delete it
@@ -125,18 +154,28 @@ def get(self, path, local_path):
         tmp_local_path = local_path + '-luigi-tmp-%09d' % random.randrange(0, 1e10)
         # download file
         self._connect()
-        self.ftpcon.retrbinary('RETR %s' % path,  open(tmp_local_path, 'wb').write)
+        self.ftpcon.retrbinary('RETR %s' % path, open(tmp_local_path, 'wb').write)
         self.ftpcon.quit()
 
         os.rename(tmp_local_path, local_path)
 
 
 class AtomicFtpfile(file):
-    """ Simple class that writes to a temp file and upload to ftp on close().
-     Also cleans up the temp file if close is not invoked.
     """
+    Simple class that writes to a temp file and upload to ftp on close().
+
+    Also cleans up the temp file if close is not invoked.
+    """
+
     def __init__(self, fs, path):
-        self.__tmp_path = self.path + '-luigi-tmp-%09d' % random.randrange(0, 1e10)
+        """
+        Initializes an AtomicFtpfile instance.
+
+        :param fs:
+        :param path:
+        :type path: str
+        """
+        self.__tmp_path = '%s-luigi-tmp-%09d' % (path, random.randrange(0, 1e10))
         self._fs = fs
         self.path = path
         super(AtomicFtpfile, self).__init__(self.__tmp_path, 'w')
@@ -162,6 +201,7 @@ def fs(self):
     def __exit__(self, exc_type, exc, traceback):
         """
         Close/commit the file if there are no exception
+
         Upload file to ftp
         """
         if exc_type:
@@ -171,27 +211,33 @@ def __exit__(self, exc_type, exc, traceback):
 
 class RemoteTarget(luigi.target.FileSystemTarget):
     """
-    Target used for reading from remote files. The target is implemented using
-    ssh commands streaming data over the network.
+    Target used for reading from remote files.
+
+    The target is implemented using ssh commands streaming data over the network.
     """
-    def __init__(self, path, host, format=None, username=None, password=None):
+
+    def __init__(self, path, host, format=None, username=None, password=None, port=21, mtime=None, tls=False):
         self.path = path
+        self.mtime = mtime
         self.format = format
-        self._fs = RemoteFileSystem(host, username, password)
+        self.tls = tls
+        self._fs = RemoteFileSystem(host, username, password, port, tls)
 
     @property
     def fs(self):
         return self._fs
 
     def open(self, mode):
-        """Open the FileSystem target.
+        """
+        Open the FileSystem target.
 
         This method returns a file-like object which can either be read from or written to depending
         on the specified mode.
 
-        :param str mode: the mode `r` opens the FileSystemTarget in read-only mode, whereas `w` will
-                         open the FileSystemTarget in write mode. Subclasses can implement
-                         additional options.
+        :param mode: the mode `r` opens the FileSystemTarget in read-only mode, whereas `w` will
+                     open the FileSystemTarget in write mode. Subclasses can implement
+                     additional options.
+        :type mode: str
         """
         if mode == 'w':
             if self.format:
@@ -212,6 +258,9 @@ def open(self, mode):
         else:
             raise Exception('mode must be r/w')
 
+    def exists(self):
+        return self.fs.exists(self.path, self.mtime)
+
     def put(self, local_path):
         self.fs.put(local_path, self.path)
 
diff --git a/luigi/contrib/mysqldb.py b/luigi/contrib/mysqldb.py
index 341e73286d..83573871fa 100644
--- a/luigi/contrib/mysqldb.py
+++ b/luigi/contrib/mysqldb.py
@@ -13,19 +13,26 @@
 
 
 class MySqlTarget(luigi.Target):
-    """Target for a resource in MySql"""
+    """
+    Target for a resource in MySql.
+    """
 
     marker_table = luigi.configuration.get_config().get('mysql', 'marker-table', 'table_updates')
 
     def __init__(self, host, database, user, password, table, update_id):
         """
-        Args:
-            host (str): MySql server address. Possibly a host:port string.
-            database (str): Database name
-            user (str): Database user
-            password (str): Password for specified user
-            update_id (str): An identifier for this data set
-
+        Initializes a MySqlTarget instance.
+
+        :param host: MySql server address. Possibly a host:port string.
+        :type host: str
+        :param database: database name.
+        :type database: str
+        :param user: database user
+        :type user: str
+        :param password: password for specified user.
+        :type password: str
+        :param update_id: an identifier for this data set.
+        :type update_id: str
         """
         if ':' in host:
             self.host, self.port = host.split(':')
@@ -40,10 +47,12 @@ def __init__(self, host, database, user, password, table, update_id):
         self.update_id = update_id
 
     def touch(self, connection=None):
-        """Mark this update as complete.
+        """
+        Mark this update as complete.
 
-        Important: If the marker table doesn't exist, the connection transaction will be aborted
-        and the connection reset. Then the marker table will be created.
+        IMPORTANT, If the marker table doesn't exist,
+        the connection transaction will be aborted and the connection reset.
+        Then the marker table will be created.
         """
         self.create_marker_table()
 
@@ -71,11 +80,11 @@ def exists(self, connection=None):
             cursor.execute("""SELECT 1 FROM {marker_table}
                 WHERE update_id = %s
                 LIMIT 1""".format(marker_table=self.marker_table),
-                (self.update_id,)
-            )
+                           (self.update_id,)
+                           )
             row = cursor.fetchone()
         except mysql.connector.Error as e:
-            if e.errno ==  errorcode.ER_NO_SUCH_TABLE:
+            if e.errno == errorcode.ER_NO_SUCH_TABLE:
                 row = None
             else:
                 raise
@@ -91,9 +100,11 @@ def connect(self, autocommit=False):
         return connection
 
     def create_marker_table(self):
-        """Create marker table if it doesn't exist.
+        """
+        Create marker table if it doesn't exist.
 
-        Using a separate connection since the transaction might have to be reset"""
+        Using a separate connection since the transaction might have to be reset.
+        """
         connection = self.connect(autocommit=True)
         cursor = connection.cursor()
         try:
diff --git a/luigi/contrib/pig.py b/luigi/contrib/pig.py
new file mode 100644
index 0000000000..80a561a2d2
--- /dev/null
+++ b/luigi/contrib/pig.py
@@ -0,0 +1,185 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+import datetime
+import logging
+import os
+import select
+import signal
+import subprocess
+import tempfile
+
+import luigi
+from luigi import configuration
+
+logger = logging.getLogger('luigi-interface')
+
+"""
+Apache Pig support.
+
+Example configuration section in client.cfg:
+[pig]
+# pig home directory
+home: /usr/share/pig
+"""
+
+
+class PigJobTask(luigi.Task):
+
+    def pig_home(self):
+        return configuration.get_config().get('pig', 'home', '/usr/share/pig')
+
+    def pig_command_path(self):
+        return os.path.join(self.pig_home(), "bin/pig")
+
+    def pig_env_vars(self):
+        """
+        Dictionary of environment variables that should be set when running Pig.
+
+        Ex:
+            return { 'PIG_CLASSPATH': '/your/path' }
+        """
+        return {}
+
+    def pig_properties(self):
+        """
+        Dictionary of properties that should be set when running Pig.
+
+        Ex:
+            return { 'pig.additional.jars':'/path/to/your/jar' }
+        """
+        return {}
+
+    def pig_parameters(self):
+        """
+        Dictionary of parameters that should be set for the Pig job.
+        Ex:
+            return { 'YOUR_PARAM_NAME':'Your param value' }
+        """
+        return {}
+
+    def pig_options(self):
+        """
+        List of options that will be appended to the Pig command.
+        Ex:
+            return ['-x', 'local']
+        """
+        return []
+
+    def output(self):
+        raise NotImplementedError("subclass should define output path")
+
+    def pig_script_path(self):
+        """
+        Return the path to the Pig script to be run.
+        """
+        raise NotImplementedError("subclass should define pig_script_path")
+
+    def _build_pig_cmd(self):
+        opts = self.pig_options()
+
+        for k, v in self.pig_parameters().iteritems():
+            opts.append("-p")
+            opts.append("%s=%s" % (k, v))
+
+        if self.pig_properties():
+            with open('pig_property_file', 'w') as prop_file:
+                prop_file.writelines(["%s=%s%s" % (k, v, os.linesep) for (k, v) in self.pig_properties().iteritems()])
+            opts.append('-propertyFile')
+            opts.append('pig_property_file')
+
+        cmd = [self.pig_command_path()] + opts + ["-f", self.pig_script_path()]
+
+        logger.info(' '.join(cmd))
+        return cmd
+
+    def run(self):
+        cmd = self._build_pig_cmd()
+        self.track_and_progress(cmd)
+
+    def track_and_progress(self, cmd):
+        temp_stdout = tempfile.TemporaryFile()
+        env = os.environ.copy()
+        env['PIG_HOME'] = self.pig_home()
+        for k, v in self.pig_env_vars().iteritems():
+            env[k] = v
+
+        proc = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
+        reads = [proc.stderr.fileno(), proc.stdout.fileno()]
+        # tracking the possible problems with this job
+        err_lines = []
+        with PigRunContext() as pig_context:
+            while proc.poll() is None:
+                ret = select.select(reads, [], [])
+                for fd in ret[0]:
+                    if fd == proc.stderr.fileno():
+                        line = proc.stderr.readline()
+                        err_lines.append(line)
+                    if fd == proc.stdout.fileno():
+                        line = proc.stdout.readline()
+                        temp_stdout.write(line)
+
+                err_line = line.lower()
+                if err_line.find('More information at:') != -1:
+                    logger.info(err_line.split('more information at: ')[-1].strip())
+                if err_line.find(' - '):
+                    t = err_line.split(' - ')[-1].strip()
+                    if t != "":
+                        logger.info(t)
+
+        # Read the rest + stdout
+        err = ''.join(err_lines + [err_line for err_line in proc.stderr])
+        if proc.returncode == 0:
+            logger.info("Job completed successfully!")
+        else:
+            logger.error("Error when running script:\n%s", self.pig_script_path())
+            logger.error(err)
+            raise PigJobError("Pig script failed with return value: %s" % (proc.returncode,), err=err)
+
+
+class PigRunContext(object):
+    def __init__(self):
+        self.job_id = None
+
+    def __enter__(self):
+        self.__old_signal = signal.getsignal(signal.SIGTERM)
+        signal.signal(signal.SIGTERM, self.kill_job)
+        return self
+
+    def kill_job(self, captured_signal=None, stack_frame=None):
+        if self.job_id:
+            logger.info('Job interrupted, killing job %s', self.job_id)
+            subprocess.call(['pig', '-e', '"kill %s"' % self.job_id])
+        if captured_signal is not None:
+            # adding 128 gives the exit code corresponding to a signal
+            sys.exit(128 + captured_signal)
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if exc_type is KeyboardInterrupt:
+            self.kill_job()
+        signal.signal(signal.SIGTERM, self.__old_signal)
+
+
+class PigJobError(RuntimeError):
+    def __init__(self, message, out=None, err=None):
+        super(PigJobError, self).__init__(message, out, err)
+        self.message = message
+        self.out = out
+        self.err = err
+
+    def __str__(self):
+        info = self.message
+        if self.out:
+            info += "\nSTDOUT: " + str(self.out)
+        if self.err:
+            info += "\nSTDERR: " + str(self.err)
+        return info
diff --git a/luigi/contrib/rdbms.py b/luigi/contrib/rdbms.py
index 0d5dc71994..10023c59bd 100644
--- a/luigi/contrib/rdbms.py
+++ b/luigi/contrib/rdbms.py
@@ -12,11 +12,18 @@
 
 class CopyToTable(luigi.Task):
     """
-    An abstract task for inserting a data set into RDBMS
+    An abstract task for inserting a data set into RDBMS.
 
     Usage:
-    Subclass and override the required `host`, `database`, `user`,
-    `password`, `table` and `columns` attributes.
+
+        Subclass and override the following attributes:
+
+        * `host`,
+        * `database`,
+        * `user`,
+        * `password`,
+        * `table`
+        * `columns`
     """
 
     @abc.abstractproperty
@@ -51,9 +58,9 @@ def table(self):
 
     column_separator = "\t"  # how columns are separated in the file copied into postgres
 
-
     def create_table(self, connection):
-        """ Override to provide code for creating the target table.
+        """
+        Override to provide code for creating the target table.
 
         By default it will be created using types (optionally) specified in columns.
 
@@ -71,9 +78,10 @@ def create_table(self, connection):
             query = "CREATE TABLE {table} ({coldefs})".format(table=self.table, coldefs=coldefs)
             connection.cursor().execute(query)
 
-
     def update_id(self):
-        """This update id will be a unique identifier for this insert on this table."""
+        """
+        This update id will be a unique identifier for this insert on this table.
+        """
         return self.task_id
 
     @abc.abstractmethod
@@ -81,9 +89,12 @@ def output(self):
         raise NotImplementedError("This method must be overridden")
 
     def init_copy(self, connection):
-        """ Override to perform custom queries.
+        """
+        Override to perform custom queries.
 
-            Any code here will be formed in the same transaction as the main copy, just prior to copying data. Example use cases include truncating the table or removing all data older than X in the database to keep a rolling window of data available in the table.
+        Any code here will be formed in the same transaction as the main copy, just prior to copying data.
+        Example use cases include truncating the table or removing all data older than X in the database
+        to keep a rolling window of data available in the table.
         """
 
         # TODO: remove this after sufficient time so most people using the
diff --git a/luigi/contrib/redshift.py b/luigi/contrib/redshift.py
index e560417776..69aedf5bc8 100644
--- a/luigi/contrib/redshift.py
+++ b/luigi/contrib/redshift.py
@@ -1,14 +1,13 @@
 import abc
+import json
 import logging
-import luigi.postgres
+import time
+
 import luigi
-import json
-from luigi.contrib import rdbms
 from luigi import postgres
-
+from luigi.contrib import rdbms
 from luigi.s3 import S3PathTask, S3Target
 
-
 logger = logging.getLogger('luigi-interface')
 
 
@@ -16,16 +15,21 @@
     import psycopg2
     import psycopg2.errorcodes
 except ImportError:
-    logger.warning("Loading postgres module without psycopg2 installed. Will crash at runtime if postgres functionality is used.")
+    logger.warning("Loading postgres module without psycopg2 installed. "
+                   "Will crash at runtime if postgres functionality is used.")
 
 
 class RedshiftTarget(postgres.PostgresTarget):
     """
     Target for a resource in Redshift.
 
-    Redshift is similar to postgres with a few adjustments required by redshift
+    Redshift is similar to postgres with a few adjustments
+    required by redshift.
     """
-    marker_table = luigi.configuration.get_config().get('redshift', 'marker-table', 'table_updates')
+    marker_table = luigi.configuration.get_config().get(
+        'redshift',
+        'marker-table',
+        'table_updates')
 
     use_db_timestamps = False
 
@@ -35,70 +39,128 @@ class S3CopyToTable(rdbms.CopyToTable):
     Template task for inserting a data set into Redshift from s3.
 
     Usage:
-    Subclass and override the required attributes:
-    `host`, `database`, `user`, `password`, `table`, `columns`,
-    `aws_access_key_id`, `aws_secret_access_key`, `s3_load_path`
+
+    * Subclass and override the required attributes:
+      * `host`,
+      * `database`,
+      * `user`,
+      * `password`,
+      * `table`,
+      * `columns`,
+      * `aws_access_key_id`,
+      * `aws_secret_access_key`,
+      * `s3_load_path`.
     """
 
     @abc.abstractproperty
     def s3_load_path(self):
-        'override to return the load path'
+        """
+        Override to return the load path.
+        """
         return None
 
     @abc.abstractproperty
     def aws_access_key_id(self):
-        'override to return the key id'
+        """
+        Override to return the key id.
+        """
         return None
 
     @abc.abstractproperty
     def aws_secret_access_key(self):
-        'override to return the secret access key'
+        """
+        Override to return the secret access key.
+        """
         return None
 
     @abc.abstractproperty
     def copy_options(self):
-        '''Add extra copy options, for example:
+        """
+        Add extra copy options, for example:
+
+        * TIMEFORMAT 'auto'
+        * IGNOREHEADER 1
+        * TRUNCATECOLUMNS
+        * IGNOREBLANKLINES
+        """
+        return ''
 
-         TIMEFORMAT 'auto'
-         IGNOREHEADER 1
-         TRUNCATECOLUMNS
-         IGNOREBLANKLINES
+    def table_attributes(self):
+        '''Add extra table attributes, for example:
+        DISTSTYLE KEY
+        DISTKEY (MY_FIELD)
+        SORTKEY (MY_FIELD_2, MY_FIELD_3)
         '''
         return ''
 
+    def do_truncate_table(self):
+        """
+        Return True if table should be truncated before copying new data in.
+        """
+        return False
+
+    def truncate_table(self, connection):
+        query = "truncate %s" % self.table
+        cursor = connection.cursor()
+        try:
+            cursor.execute(query)
+        finally:
+            cursor.close()
+
+    def create_table(self, connection):
+        """
+        Override to provide code for creating the target table.
+
+        By default it will be created using types (optionally)
+        specified in columns.
+
+        If overridden, use the provided connection object for
+        setting up the table in order to create the table and
+        insert data using the same transaction.
+        """
+        if len(self.columns[0]) == 1:
+            # only names of columns specified, no types
+            raise NotImplementedError("create_table() not implemented "
+                                      "for %r and columns types not "
+                                      "specified" % self.table)
+        elif len(self.columns[0]) == 2:
+            # if columns is specified as (name, type) tuples
+            coldefs = ','.join(
+                '{name} {type}'.format(
+                    name=name,
+                    type=type) for name, type in self.columns
+            )
+            query = ("CREATE TABLE "
+                     "{table} ({coldefs}) "
+                     "{table_attributes}").format(
+                table=self.table,
+                coldefs=coldefs,
+                table_attributes=self.table_attributes())
+            connection.cursor().execute(query)
+
     def run(self):
         """
-        If the target table doesn't exist, self.create_table will be called
-        to attempt to create the table.
+        If the target table doesn't exist, self.create_table
+        will be called to attempt to create the table.
         """
         if not (self.table):
             raise Exception("table need to be specified")
 
+        path = self.s3_load_path()
         connection = self.output().connect()
+        if not self.does_table_exist(connection):
+            # try creating table
+            logger.info("Creating table %s", self.table)
+            connection.reset()
+            self.create_table(connection)
+        elif self.do_truncate_table():
+            logger.info("Truncating table %s", self.table)
+            self.truncate_table(connection)
 
-        path = self.s3_load_path()
         logger.info("Inserting file: %s", path)
-
-        # attempt to copy the data into postgres
-        # if it fails because the target table doesn't exist
-        # try to create it by running self.create_table
-        for attempt in xrange(2):
-            try:
-                cursor = connection.cursor()
-                self.init_copy(connection)
-                self.copy(cursor, path)
-            except psycopg2.ProgrammingError, e:
-                if e.pgcode == psycopg2.errorcodes.UNDEFINED_TABLE and attempt == 0:
-                    # if first attempt fails with "relation not found",
-                    # try creating table
-                    logger.info("Creating table %s", self.table)
-                    connection.reset()
-                    self.create_table(connection)
-                else:
-                    raise
-            else:
-                break
-
+        cursor = connection.cursor()
+        self.init_copy(connection)
+        self.copy(cursor, path)
         self.output().touch(connection)
         connection.commit()
 
@@ -106,9 +168,9 @@ def run(self):
         connection.close()
 
     def copy(self, cursor, f):
-        '''
-        Defines copying from s3 into redshift
-        '''
+        """
+        Defines copying from s3 into redshift.
+        """
 
         cursor.execute("""
          COPY %s from '%s'
@@ -120,17 +182,33 @@ def copy(self, cursor, f):
                  self.copy_options))
 
     def output(self):
-        """Returns a RedshiftTarget representing the inserted dataset.
+        """
+        Returns a RedshiftTarget representing the inserted dataset.
 
         Normally you don't override this.
         """
         return RedshiftTarget(
-                host=self.host,
-                database=self.database,
-                user=self.user,
-                password=self.password,
-                table=self.table,
-                update_id=self.update_id())
+            host=self.host,
+            database=self.database,
+            user=self.user,
+            password=self.password,
+            table=self.table,
+            update_id=self.update_id())
+
+    def does_table_exist(self, connection):
+        """
+        Determine whether the table already exists.
+        """
+        query = ("select 1 as table_exists "
+                 "from pg_table_def "
+                 "where tablename = %s limit 1")
+        cursor = connection.cursor()
+        try:
+            cursor.execute(query, (self.table,))
+            result = cursor.fetchone()
+            return bool(result)
+        finally:
+            cursor.close()
 
 
 class S3CopyJSONToTable(S3CopyToTable):
@@ -138,29 +216,43 @@ class S3CopyJSONToTable(S3CopyToTable):
     Template task for inserting a JSON data set into Redshift from s3.
 
     Usage:
-    Subclass and override the required attributes:
-    `host`, `database`, `user`, `password`, `table`, `columns`,
-    `aws_access_key_id`, `aws_secret_access_key`, `s3_load_path`,
-    `jsonpath`, `copy_json_options`
+
+        * Subclass and override the required attributes:
+
+            * `host`,
+            * `database`,
+            * `user`,
+            * `password`,
+            * `table`,
+            * `columns`,
+            * `aws_access_key_id`,
+            * `aws_secret_access_key`,
+            * `s3_load_path`,
+            * `jsonpath`,
+            * `copy_json_options`.
     """
 
     @abc.abstractproperty
     def jsonpath(self):
-        'override the jsonpath schema location for the table'
+        """
+        Override the jsonpath schema location for the table.
+        """
         return ''
 
     @abc.abstractproperty
     def copy_json_options(self):
-        '''Add extra copy options, for example:
-        GZIP
-        LZOP
-        '''
+        """
+        Add extra copy options, for example:
+
+        * GZIP
+        * LZOP
+        """
         return ''
 
     def copy(self, cursor, f):
-        '''
-        Defines copying JSON from s3 into redshift
-        '''
+        """
+        Defines copying JSON from s3 into redshift.
+        """
 
         cursor.execute("""
          COPY %s from '%s'
@@ -176,22 +268,26 @@ class RedshiftManifestTask(S3PathTask):
     """
     Generic task to generate a manifest file that can be used
     in S3CopyToTable in order to copy multiple files from your
-    s3 folder into a redshift table at once
+    s3 folder into a redshift table at once.
 
-    For full description on how to use the manifest file see:
+    For full description on how to use the manifest file see
     http://docs.aws.amazon.com/redshift/latest/dg/loading-data-files-using-manifest.html
 
     Usage:
-    Requires parameters
-        path - s3 path to the generated manifest file, including the
-               name of the generated file
-                      to be copied into a redshift table
-        folder_paths - s3 paths to the folders containing files you wish to be copied
+
+        * requires parameters
+            * path - s3 path to the generated manifest file, including the
+                     name of the generated file
+                     to be copied into a redshift table
+            * folder_paths - s3 paths to the folders containing files you wish to be copied
+
     Output:
-        generated manifest file
+
+        * generated manifest file
     """
 
-    # should be over ridden to point to a variety of folders you wish to copy from
+    # should be over ridden to point to a variety
+    # of folders you wish to copy from
     folder_paths = luigi.Parameter()
 
     def run(self):
@@ -208,3 +304,99 @@ def run(self):
         target = self.output().open('w')
         target.write(json.dumps(manifest))
         target.close()
+
+
+class KillOpenRedshiftSessions(luigi.Task):
+    """
+    An task for killing any open Redshift sessions
+    in a given database. This is necessary to prevent open user sessions
+    with transactions against the table from blocking drop or truncate
+    table commands.
+
+    Usage:
+
+    Subclass and override the required `host`, `database`,
+    `user`, and `password` attributes.
+    """
+
+    # time in seconds to wait before
+    # reconnecting to Redshift if our session is killed too.
+    # 30 seconds is usually fine; 60 is conservative
+    connection_reset_wait_seconds = luigi.IntParameter(default=60)
+
+    @abc.abstractproperty
+    def host(self):
+        return None
+
+    @abc.abstractproperty
+    def database(self):
+        return None
+
+    @abc.abstractproperty
+    def user(self):
+        return None
+
+    @abc.abstractproperty
+    def password(self):
+        return None
+
+    def update_id(self):
+        """
+        This update id will be a unique identifier
+        for this insert on this table.
+        """
+        return self.task_id
+
+    def output(self):
+        """
+        Returns a RedshiftTarget representing the inserted dataset.
+
+        Normally you don't override this.
+        """
+        # uses class name as a meta-table
+        return RedshiftTarget(
+            host=self.host,
+            database=self.database,
+            user=self.user,
+            password=self.password,
+            table=self.__class__.__name__,
+            update_id=self.update_id())
+
+    def run(self):
+        """
+        Kill any open Redshift sessions for the given database.
+        """
+        connection = self.output().connect()
+        # kill any sessions other than ours and
+        # internal Redshift sessions (rdsdb)
+        query = ("select pg_terminate_backend(process) "
+                 "from STV_SESSIONS "
+                 "where db_name=%s "
+                 "and user_name != 'rdsdb' "
+                 "and process != pg_backend_pid()")
+        cursor = connection.cursor()
+        logger.info('Killing all open Redshift sessions for database: %s', self.database)
+        try:
+            cursor.execute(query, (self.database,))
+            cursor.close()
+            connection.commit()
+        except psycopg2.DatabaseError, e:
+            if e.message and 'EOF' in e.message:
+                # sometimes this operation kills the current session.
+                # rebuild the connection. Need to pause for 30-60 seconds
+                # before Redshift will allow us back in.
+                connection.close()
+                logger.info('Pausing %s seconds for Redshift to reset connection', self.connection_reset_wait_seconds)
+                time.sleep(self.connection_reset_wait_seconds)
+                logger.info('Reconnecting to Redshift')
+                connection = self.output().connect()
+            else:
+                raise
+
+        try:
+            self.output().touch(connection)
+            connection.commit()
+        finally:
+            connection.close()
+
+        logger.info('Done killing all open Redshift sessions for database: %s', self.database)
diff --git a/luigi/contrib/scalding.py b/luigi/contrib/scalding.py
new file mode 100644
index 0000000000..024bc394cd
--- /dev/null
+++ b/luigi/contrib/scalding.py
@@ -0,0 +1,288 @@
+import logging
+import os
+import re
+import subprocess
+
+import luigi.configuration
+import luigi.hadoop
+import luigi.hadoop_jar
+import luigi.hdfs
+from luigi import LocalTarget
+from luigi.task import flatten
+
+logger = logging.getLogger('luigi-interface')
+
+"""
+Scalding support for Luigi.
+
+Example configuration section in client.cfg::
+
+    [scalding]
+    # scala home directory, which should include a lib subdir with scala jars.
+    scala-home: /usr/share/scala
+
+    # scalding home directory, which should include a lib subdir with
+    # scalding-*-assembly-* jars as built from the official Twitter build script.
+    scalding-home: /usr/share/scalding
+
+    # provided dependencies, e.g. jars required for compiling but not executing
+    # scalding jobs. Currently requred jars:
+    # org.apache.hadoop/hadoop-core/0.20.2
+    # org.slf4j/slf4j-log4j12/1.6.6
+    # log4j/log4j/1.2.15
+    # commons-httpclient/commons-httpclient/3.1
+    # commons-cli/commons-cli/1.2
+    # org.apache.zookeeper/zookeeper/3.3.4
+    scalding-provided: /usr/share/scalding/provided
+
+    # additional jars required.
+    scalding-libjars: /usr/share/scalding/libjars
+"""
+
+
+class ScaldingJobRunner(luigi.hadoop.JobRunner):
+    """
+    JobRunner for `pyscald` commands. Used to run a ScaldingJobTask.
+    """
+
+    def __init__(self):
+        conf = luigi.configuration.get_config()
+
+        default = os.environ.get('SCALA_HOME', '/usr/share/scala')
+        self.scala_home = conf.get('scalding', 'scala-home', default)
+
+        default = os.environ.get('SCALDING_HOME', '/usr/share/scalding')
+        self.scalding_home = conf.get('scalding', 'scalding-home', default)
+        self.provided_dir = conf.get(
+            'scalding', 'scalding-provided', os.path.join(default, 'provided'))
+        self.libjars_dir = conf.get(
+            'scalding', 'scalding-libjars', os.path.join(default, 'libjars'))
+
+        self.tmp_dir = LocalTarget(is_tmp=True)
+
+    def _get_jars(self, path):
+        return [os.path.join(path, j) for j in os.listdir(path)
+                if j.endswith('.jar')]
+
+    def get_scala_jars(self, include_compiler=False):
+        lib_dir = os.path.join(self.scala_home, 'lib')
+        jars = [os.path.join(lib_dir, 'scala-library.jar')]
+
+        # additional jar for scala 2.10 only
+        reflect = os.path.join(lib_dir, 'scala-reflect.jar')
+        if os.path.exists(reflect):
+            jars.append(reflect)
+
+        if include_compiler:
+            jars.append(os.path.join(lib_dir, 'scala-compiler.jar'))
+
+        return jars
+
+    def get_scalding_jars(self):
+        lib_dir = os.path.join(self.scalding_home, 'lib')
+        return self._get_jars(lib_dir)
+
+    def get_scalding_core(self):
+        lib_dir = os.path.join(self.scalding_home, 'lib')
+        for j in os.listdir(lib_dir):
+            if j.startswith('scalding-core-'):
+                p = os.path.join(lib_dir, j)
+                logger.debug('Found scalding-core: %s', p)
+                return p
+        raise luigi.hadoop.HadoopJobError('Coudl not find scalding-core.')
+
+    def get_provided_jars(self):
+        return self._get_jars(self.provided_dir)
+
+    def get_libjars(self):
+        return self._get_jars(self.libjars_dir)
+
+    def get_tmp_job_jar(self, source):
+        job_name = os.path.basename(os.path.splitext(source)[0])
+        return os.path.join(self.tmp_dir.path, job_name + '.jar')
+
+    def get_build_dir(self, source):
+        build_dir = os.path.join(self.tmp_dir.path, 'build')
+        return build_dir
+
+    def get_job_class(self, source):
+        # find name of the job class
+        # usually the one that matches file name or last class that extends Job
+        job_name = os.path.splitext(os.path.basename(source))[0]
+        package = None
+        job_class = None
+        for l in open(source).readlines():
+            p = re.search(r'package\s+([^\s\(]+)', l)
+            if p:
+                package = p.groups()[0]
+            p = re.search(r'class\s+([^\s\(]+).*extends\s+.*Job', l)
+            if p:
+                job_class = p.groups()[0]
+                if job_class == job_name:
+                    break
+        if job_class:
+            if package:
+                job_class = package + '.' + job_class
+            logger.debug('Found scalding job class: %s', job_class)
+            return job_class
+        else:
+            raise luigi.hadoop.HadoopJobError('Coudl not find scalding job class.')
+
+    def build_job_jar(self, job):
+        job_jar = job.jar()
+        if job_jar:
+            if not os.path.exists(job_jar):
+                logger.error("Can't find jar: {0}, full path {1}".format(
+                             job_jar, os.path.abspath(job_jar)))
+                raise Exception("job jar does not exist")
+            if not job.job_class():
+                logger.error("Undefined job_class()")
+                raise Exception("Undefined job_class()")
+            return job_jar
+
+        job_src = job.source()
+        if not job_src:
+            logger.error("Both source() and jar() undefined")
+            raise Exception("Both source() and jar() undefined")
+        if not os.path.exists(job_src):
+            logger.error("Can't find source: {0}, full path {1}".format(
+                         job_src, os.path.abspath(job_src)))
+            raise Exception("job source does not exist")
+
+        job_src = job.source()
+        job_jar = self.get_tmp_job_jar(job_src)
+
+        build_dir = self.get_build_dir(job_src)
+        if not os.path.exists(build_dir):
+            os.makedirs(build_dir)
+
+        classpath = ':'.join(filter(None,
+                                    self.get_scalding_jars() +
+                                    self.get_provided_jars() +
+                                    self.get_libjars() +
+                                    job.extra_jars()))
+        scala_cp = ':'.join(self.get_scala_jars(include_compiler=True))
+
+        # compile scala source
+        arglist = ['java', '-cp', scala_cp, 'scala.tools.nsc.Main',
+                   '-classpath', classpath,
+                   '-d', build_dir, job_src]
+        logger.info('Compiling scala source: %s', ' '.join(arglist))
+        subprocess.check_call(arglist)
+
+        # build job jar file
+        arglist = ['jar', 'cf', job_jar, '-C', build_dir, '.']
+        logger.info('Building job jar: %s', ' '.join(arglist))
+        subprocess.check_call(arglist)
+        return job_jar
+
+    def run_job(self, job):
+        job_jar = self.build_job_jar(job)
+        jars = [job_jar] + self.get_libjars() + job.extra_jars()
+        scalding_core = self.get_scalding_core()
+        libjars = ','.join(filter(None, jars))
+        arglist = luigi.hdfs.load_hadoop_cmd() + ['jar', scalding_core, '-libjars', libjars]
+        arglist += ['-D%s' % c for c in job.jobconfs()]
+
+        job_class = job.job_class() or self.get_job_class(job.source())
+        arglist += [job_class, '--hdfs']
+
+        # scalding does not parse argument with '=' properly
+        arglist += ['--name', job.task_id.replace('=', ':')]
+
+        (tmp_files, job_args) = luigi.hadoop_jar.fix_paths(job)
+        arglist += job_args
+
+        env = os.environ.copy()
+        jars.append(scalding_core)
+        hadoop_cp = ':'.join(filter(None, jars))
+        env['HADOOP_CLASSPATH'] = hadoop_cp
+        logger.info("Submitting Hadoop job: HADOOP_CLASSPATH=%s %s",
+                    hadoop_cp, ' '.join(arglist))
+        luigi.hadoop.run_and_track_hadoop_job(arglist, env=env)
+
+        for a, b in tmp_files:
+            a.move(b)
+
+
+class ScaldingJobTask(luigi.hadoop.BaseHadoopJobTask):
+    """
+    A job task for Scalding that define a scala source and (optional) main method.
+
+    requires() should return a dictionary where the keys are Scalding argument
+    names and values are sub tasks or lists of subtasks.
+
+    For example:
+
+    .. code-block:: python
+
+        {'input1': A, 'input2': C} => --input1 <Aoutput> --input2 <Coutput>
+        {'input1': [A, B], 'input2': [C]} => --input1 <Aoutput> <Boutput> --input2 <Coutput>
+    """
+
+    def relpath(self, current_file, rel_path):
+        """
+        Compute path given current file and relative path.
+        """
+        script_dir = os.path.dirname(os.path.abspath(current_file))
+        rel_path = os.path.abspath(os.path.join(script_dir, rel_path))
+        return rel_path
+
+    def source(self):
+        """
+        Path to the scala source for this Scalding Job
+
+        Either one of source() or jar() must be specified.
+        """
+        return None
+
+    def jar(self):
+        """
+        Path to the jar file for this Scalding Job
+
+        Either one of source() or jar() must be specified.
+        """
+        return None
+
+    def extra_jars(self):
+        """
+        Extra jars for building and running this Scalding Job.
+        """
+        return []
+
+    def job_class(self):
+        """
+        optional main job class for this Scalding Job.
+        """
+        return None
+
+    def job_runner(self):
+        return ScaldingJobRunner()
+
+    def atomic_output(self):
+        """
+        If True, then rewrite output arguments to be temp locations and
+        atomically move them into place after the job finishes.
+        """
+        return True
+
+    def requires(self):
+        return {}
+
+    def job_args(self):
+        """
+        Extra arguments to pass to the Scalding job.
+        """
+        return []
+
+    def args(self):
+        """
+        Returns an array of args to pass to the job.
+        """
+        arglist = []
+        for k, v in self.requires_hadoop().iteritems():
+            arglist.append('--' + k)
+            arglist.extend([t.output().path for t in flatten(v)])
+        arglist.extend(['--output', self.output()])
+        arglist.extend(self.job_args())
+        return arglist
diff --git a/luigi/contrib/spark.py b/luigi/contrib/spark.py
index d1a173a473..1aec6dd8e3 100644
--- a/luigi/contrib/spark.py
+++ b/luigi/contrib/spark.py
@@ -3,8 +3,8 @@
 import os
 import random
 import re
-import subprocess
 import signal
+import subprocess
 import sys
 import tempfile
 import time
@@ -14,7 +14,6 @@
 import luigi.hdfs
 from luigi import configuration
 
-
 logger = logging.getLogger('luigi-interface')
 
 """
@@ -36,6 +35,7 @@
 
 
 class SparkRunContext(object):
+
     def __init__(self):
         self.app_id = None
 
@@ -66,12 +66,21 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 
 
 class SparkJobError(RuntimeError):
+
     def __init__(self, message, out=None, err=None):
         super(SparkJobError, self).__init__(message, out, err)
         self.message = message
         self.out = out
         self.err = err
 
+    def __str__(self):
+        info = self.message
+        if self.out:
+            info += "\nSTDOUT: " + str(self.out)
+        if self.err:
+            info += "\nSTDERR: " + str(self.err)
+        return info
+
 
 class SparkJob(luigi.Task):
     spark_workers = None
@@ -81,7 +90,9 @@ class SparkJob(luigi.Task):
     temp_hadoop_output_file = None
 
     def requires_local(self):
-        ''' Default impl - override this method if you need any local input to be accessible in init() '''
+        """
+        Default impl - override this method if you need any local input to be accessible in init().
+        """
         return []
 
     def requires_hadoop(self):
@@ -158,7 +169,7 @@ def run(self):
             raise SparkJobError('Spark job failed: see yarn logs for %s' % app_id)
         else:
             temp_stderr.seek(0)
-            errors = temp_stderr.readlines()
+            errors = "".join(temp_stderr.readlines())
             logger.error(errors)
             raise SparkJobError('Spark job failed', err=errors)
 
@@ -213,7 +224,9 @@ def jar(self):
                                   "containing job_class")
 
     def dependency_jars(self):
-        """Override to provide a list of dependency jars."""
+        """
+        Override to provide a list of dependency jars.
+        """
         return []
 
     def job_class(self):
@@ -268,16 +281,15 @@ def run(self):
                                 .format(app_id))
         elif return_code != 0:
             temp_stderr.seek(0)
-            errors = temp_stderr.readlines()
+            errors = "".join(temp_stderr.readlines())
             logger.error(errors)
             raise SparkJobError('Spark job failed', err=errors)
 
     def track_progress(self, proc):
         """
-        The Spark client currently outputs a multiline status to stdout every
-        second while the application is running. This instead captures status
-        data and updates a single line of output until the application
-        finishes.
+        The Spark client currently outputs a multiline status to stdout every second while the application is running.
+
+        This instead captures status data and updates a single line of output until the application finishes.
         """
         app_id = None
         app_status = 'N/A'
@@ -318,7 +330,6 @@ def track_progress(self, proc):
         return proc.returncode, final_state, app_id
 
 
-
 class PySpark1xJob(Spark1xJob):
 
     num_executors = None
@@ -330,7 +341,9 @@ def program(self):
         raise NotImplementedError("subclass should define Spark .py file")
 
     def py_files(self):
-        """Override to provide a list of py files."""
+        """
+        Override to provide a list of py files.
+        """
         return []
 
     def run(self):
@@ -362,6 +375,6 @@ def run(self):
                                 .format(app_id))
         elif return_code != 0:
             temp_stderr.seek(0)
-            errors = temp_stderr.readlines()
+            errors = "".join(temp_stderr.readlines())
             logger.error(errors)
             raise SparkJobError('Spark job failed', err=errors)
diff --git a/luigi/contrib/sparkey.py b/luigi/contrib/sparkey.py
index 857fd72247..f3e2746224 100644
--- a/luigi/contrib/sparkey.py
+++ b/luigi/contrib/sparkey.py
@@ -16,16 +16,19 @@
 
 import luigi
 
+
 class SparkeyExportTask(luigi.Task):
-    """ A luigi task that writes to a local sparkey log file.
+    """
+    A luigi task that writes to a local sparkey log file.
 
     Subclasses should implement the requires and output methods. The output
     must be a luigi.LocalTarget.
 
     The resulting sparkey log file will contain one entry for every line in
     the input, mapping from the first value to a tab-separated list of the
-    rest of the line. To generate a simple key-value index, yield "key", "value"
-    pairs from the input(s) to this task.
+    rest of the line.
+
+    To generate a simple key-value index, yield "key", "value" pairs from the input(s) to this task.
     """
 
     # the separator used to split input lines
@@ -55,4 +58,3 @@ def _write_sparkey_file(self):
 
         # move finished sparkey file to final destination
         temp_output.move(outfile.path)
-
diff --git a/luigi/contrib/sqla.py b/luigi/contrib/sqla.py
new file mode 100644
index 0000000000..cee367db65
--- /dev/null
+++ b/luigi/contrib/sqla.py
@@ -0,0 +1,332 @@
+# Copyright (c) 2015 Gouthaman Balaraman
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+"""
+Support for SQLAlchmey. Provides SQLAlchemyTarget for storing in databases
+supported by SQLAlchemy. The user would be responsible for installing the
+required database driver to connect using SQLAlchemy.
+
+Minimal example of a job to copy data to database using SQLAlchemy is as shown
+below:
+
+.. code-block:: python
+
+    from sqlalchemy import String
+    import luigi
+    from luigi.contrib import sqla
+
+    class SQLATask(sqla.CopyToTable):
+        # columns defines the table schema, with each element corresponding
+        # to a column in the format (args, kwargs) which will be sent to
+        # the sqlalchemy.Column(*args, **kwargs)
+        columns = [
+            (["item", String(64)], {"primary_key": True}),
+            (["property", String(64)], {})
+        ]
+        connection_string = "sqlite://"  # in memory SQLite database
+        table = "item_property"  # name of the table to store data
+
+        def rows(self):
+            for row in [("item1" "property1"), ("item2", "property2")]:
+                yield row
+
+    if __name__ == '__main__':
+        task = SQLATask()
+        luigi.build([task], local_scheduler=True)
+
+
+If the target table where the data needs to be copied already exists, then
+the column schema definition can be skipped and instead the reflect flag
+can be set as True. Here is a modified version of the above example:
+
+.. code-block:: python
+
+    from sqlalchemy import String
+    import luigi
+    from luigi.contrib import sqla
+
+    class SQLATask(sqla.CopyToTable):
+        # If database table is already created, then the schema can be loaded
+        # by setting the reflect flag to True
+        reflect = True
+        connection_string = "sqlite://"  # in memory SQLite database
+        table = "item_property"  # name of the table to store data
+
+        def rows(self):
+            for row in [("item1" "property1"), ("item2", "property2")]:
+                yield row
+
+    if __name__ == '__main__':
+        task = SQLATask()
+        luigi.build([task], local_scheduler=True)
+
+
+In the above examples, the data that needs to be copied was directly provided by
+overriding the rows method. Alternately, if the data comes from another task, the
+modified example would look as shown below:
+
+.. code-block:: python
+
+    from sqlalchemy import String
+    import luigi
+    from luigi.contrib import sqla
+    from luigi.mock import MockFile
+
+    class BaseTask(luigi.Task):
+        def output(self):
+            return MockFile("BaseTask")
+
+        def run(self):
+            out = self.output().open("w")
+            TASK_LIST = ["item%d\\tproperty%d\\n" % (i, i) for i in range(10)]
+            for task in TASK_LIST:
+                out.write(task)
+            out.close()
+
+    class SQLATask(sqla.CopyToTable):
+        # columns defines the table schema, with each element corresponding
+        # to a column in the format (args, kwargs) which will be sent to
+        # the sqlalchemy.Column(*args, **kwargs)
+        columns = [
+            (["item", String(64)], {"primary_key": True}),
+            (["property", String(64)], {})
+        ]
+        connection_string = "sqlite://"  # in memory SQLite database
+        table = "item_property"  # name of the table to store data
+
+        def requires(self):
+            return BaseTask()
+
+    if __name__ == '__main__':
+        task1, task2 = SQLATask(), BaseTask()
+        luigi.build([task1, task2], local_scheduler=True)
+
+
+In the above example, the output from `BaseTask` is copied into the
+database. Here we did not have to implement the `rows` method because
+by default `rows` implementation assumes every line is a row with
+column values separated by a tab. One can define `column_separator`
+option for the task if the values are say comma separated instead of
+tab separated.
+
+The other option to `sqla.CopyToTable` that can be of help with performance aspect is the
+`chunk_size`. The default is 5000. This is the number of rows that will be inserted in
+a transaction at a time. Depending on the size of the inserts, this value can be tuned
+for performance.
+
+Author: Gouthaman Balaraman
+Date: 01/02/2015
+"""
+
+
+import abc
+import datetime
+import itertools
+import logging
+
+import luigi
+import sqlalchemy
+
+logger = logging.getLogger('luigi-interface')
+
+
+class SQLAlchemyTarget(luigi.Target):
+    """
+    Database target using SQLAlchemy.
+
+    This will rarely have to be directly instantiated by the user.
+
+    Typical usage would be to override `luigi.contrib.sqla.CopyToTable` class
+    to create a task to write to the database.
+    """
+    marker_table = None
+
+    def __init__(self, connection_string, target_table, update_id, echo=False):
+        """
+        Constructor for the SQLAlchemyTarget.
+
+        :param connection_string: (str) SQLAlchemy connection string
+        :param target_table: (str) The table name for the data
+        :param update_id: (str) An identifier for this data set
+        :param echo: (bool) Flag to setup SQLAlchemy logging
+        :return:
+        """
+        self.target_table = target_table
+        self.update_id = update_id
+        self.engine = sqlalchemy.create_engine(connection_string, echo=echo)
+        self.marker_table_bound = None
+
+    def touch(self):
+        """
+        Mark this update as complete.
+        """
+        if self.marker_table_bound is None:
+            self.create_marker_table()
+
+        table = self.marker_table_bound
+        with self.engine.begin() as conn:
+            id_exists = self.exists()
+            if not id_exists:
+                ins = table.insert().values(update_id=self.update_id, target_table=self.target_table)
+            else:
+                ins = table.update().values(update_id=self.update_id, target_table=self.target_table,
+                                            inserted=datetime.datetime.now())
+            conn.execute(ins)
+        assert self.exists()
+
+    def exists(self):
+        row = None
+        if self.marker_table_bound is None:
+            self.create_marker_table()
+        with self.engine.begin() as conn:
+            table = self.marker_table_bound
+            s = sqlalchemy.select([table]).where(table.c.update_id == self.update_id).limit(1)
+            row = conn.execute(s).fetchone()
+        return row is not None
+
+    def create_marker_table(self):
+        """
+        Create marker table if it doesn't exist.
+
+        Using a separate connection since the transaction might have to be reset.
+        """
+        if self.marker_table is None:
+            self.marker_table = luigi.configuration.get_config().get('sqlalchemy', 'marker-table', 'table_updates')
+
+        with self.engine.begin() as con:
+            metadata = sqlalchemy.MetaData()
+            if not con.dialect.has_table(con, self.marker_table):
+                self.marker_table_bound = sqlalchemy.Table(
+                    self.marker_table, metadata,
+                    sqlalchemy.Column("update_id", sqlalchemy.String(128), primary_key=True),
+                    sqlalchemy.Column("target_table", sqlalchemy.String(128)),
+                    sqlalchemy.Column("inserted", sqlalchemy.DateTime, default=datetime.datetime.now()))
+                metadata.create_all(self.engine)
+            else:
+                metadata.reflect(bind=self.engine)
+                self.marker_table_bound = metadata.tables[self.marker_table]
+
+    def open(self, mode):
+        raise NotImplementedError("Cannot open() SQLAlchemyTarget")
+
+
+class CopyToTable(luigi.Task):
+    """
+    An abstract task for inserting a data set into SQLAlchemy RDBMS
+
+    Usage:
+
+    * subclass and override the required `connection_string`, `table` and `columns` attributes.
+    """
+    echo = False
+
+    @abc.abstractmethod
+    def connection_string(self):
+        return None
+
+    @abc.abstractproperty
+    def table(self):
+        return None
+
+    # specify the columns that define the schema. The format for the columns is a list
+    # of tuples. For example :
+    # columns = [
+    #            (["id", sqlalchemy.Integer], dict(primary_key=True)),
+    #            (["name", sqlalchemy.String(64)], {}),
+    #            (["value", sqlalchemy.String(64)], {})
+    #        ]
+    # The tuple (args_list, kwargs_dict) here is the args and kwargs
+    # that need to be passed to sqlalchemy.Column(*args, **kwargs).
+    # If the tables have already been setup by another process, then you can
+    # completely ignore the columns. Instead set the reflect value to True below
+    columns = []
+
+    # options
+    column_separator = "\t"  # how columns are separated in the file copied into postgres
+    chunk_size = 5000   # default chunk size for insert
+    reflect = False  # Set this to true only if the table has already been created by alternate means
+
+    def create_table(self, engine):
+        """
+        Override to provide code for creating the target table.
+
+        By default it will be created using types specified in columns.
+        If the table exists, then it binds to the existing table.
+
+        If overridden, use the provided connection object for setting up the table in order to
+        create the table and insert data using the same transaction.
+        """
+        def construct_sqla_columns(columns):
+            retval = [sqlalchemy.Column(*c[0], **c[1]) for c in columns]
+            return retval
+
+        needs_setup = (len(self.columns) == 0) or (False in [len(c) == 2 for c in self.columns]) if not self.reflect else False
+        if needs_setup:
+            # only names of columns specified, no types
+            raise NotImplementedError("create_table() not implemented for %r and columns types not specified" % self.table)
+        else:
+            # if columns is specified as (name, type) tuples
+            with engine.begin() as con:
+                metadata = sqlalchemy.MetaData()
+                try:
+                    if not con.dialect.has_table(con, self.table):
+                        sqla_columns = construct_sqla_columns(self.columns)
+                        self.table_bound = sqlalchemy.Table(self.table, metadata, *sqla_columns)
+                        metadata.create_all(engine)
+                    else:
+                        metadata.reflect(bind=engine)
+                        self.table_bound = metadata.tables[self.table]
+                except Exception as e:
+                    logger.exception(self.table + str(e))
+
+    def update_id(self):
+        """
+        This update id will be a unique identifier for this insert on this table.
+        """
+        return self.task_id
+
+    def output(self):
+        return SQLAlchemyTarget(
+            connection_string=self.connection_string,
+            target_table=self.table,
+            update_id=self.update_id(),
+            echo=self.echo
+        )
+
+    def rows(self):
+        """
+        Return/yield tuples or lists corresponding to each row to be inserted.
+
+        This method can be overridden for custom file types or formats.
+        """
+        with self.input().open('r') as fobj:
+            for line in fobj:
+                yield line.strip("\n").split(self.column_separator)
+
+    def run(self):
+        logger.info("Running task copy to table for update id %s for table %s" % (self.update_id(), self.table))
+        output = self.output()
+        self.create_table(output.engine)
+        with output.engine.begin() as conn:
+            rows = iter(self.rows())
+            ins_rows = [dict(zip((c.key for c in self.table_bound.c), row))
+                        for row in itertools.islice(rows, self.chunk_size)]
+            while ins_rows:
+                ins = self.table_bound.insert()
+                conn.execute(ins, ins_rows)
+                ins_rows = [dict(zip((c.key for c in self.table_bound.c), row))
+                            for row in itertools.islice(rows, self.chunk_size)]
+                logger.info("Finished inserting %d rows into SQLAlchemy target" % len(ins_rows))
+        output.touch()
+        logger.info("Finished inserting rows into SQLAlchemy target")
diff --git a/luigi/contrib/ssh.py b/luigi/contrib/ssh.py
index 3a25dc7ac9..7c49d107c4 100644
--- a/luigi/contrib/ssh.py
+++ b/luigi/contrib/ssh.py
@@ -13,11 +13,14 @@
 # the License.
 
 """
-Light-weight remote execution library and utilities
+Light-weight remote execution library and utilities.
 
-There are some examples in the unittest, but I added another more luigi-specific in the examples directory (examples/ssh_remote_execution.py
+There are some examples in the unittest, but
+I added another more luigi-specific in the examples directory (examples/ssh_remote_execution.py
 
-contrib.ssh.RemoteContext is meant to provide functionality similar to that of the standard library subprocess module, but where the commands executed are run on a remote machine instead, without the user having to think about prefixing everything with "ssh" and credentials etc.
+contrib.ssh.RemoteContext is meant to provide functionality similar to that of the standard library subprocess module,
+but where the commands executed are run on a remote machine instead,
+without the user having to think about prefixing everything with "ssh" and credentials etc.
 
 Using this mini library (which is just a convenience wrapper for subprocess),
 RemoteTarget is created to let you stream data from a remotely stored file using
@@ -30,17 +33,18 @@
 This can be super convenient when you want secure communication using a non-secure
 protocol or circumvent firewalls (as long as they are open for ssh traffic).
 """
+import contextlib
 import os
 import random
+import subprocess
 
 import luigi
-import luigi.target
 import luigi.format
-import subprocess
-import contextlib
+import luigi.target
 
 
 class RemoteContext(object):
+
     def __init__(self, host, username=None, key_file=None, connect_timeout=None):
         self.host = host
         self.username = username
@@ -77,14 +81,17 @@ def _prepare_cmd(self, cmd):
         return connection_cmd + cmd
 
     def Popen(self, cmd, **kwargs):
-        """ Remote Popen """
+        """
+        Remote Popen.
+        """
         prefixed_cmd = self._prepare_cmd(cmd)
         return subprocess.Popen(prefixed_cmd, **kwargs)
 
     def check_output(self, cmd):
-        """ Execute a shell command remotely and return the output
+        """
+        Execute a shell command remotely and return the output.
 
-        Simplified version of Popen when you only want the output as a string and detect any errors
+        Simplified version of Popen when you only want the output as a string and detect any errors.
         """
         p = self.Popen(cmd, stdout=subprocess.PIPE)
         output, _ = p.communicate()
@@ -94,7 +101,8 @@ def check_output(self, cmd):
 
     @contextlib.contextmanager
     def tunnel(self, local_port, remote_port=None, remote_host="localhost"):
-        """ Open a tunnel between localhost:local_port and remote_host:remote_port via the host specified by this context
+        """
+        Open a tunnel between localhost:local_port and remote_host:remote_port via the host specified by this context.
 
         Remember to close() the returned "tunnel" object in order to clean up
         after yourself when you are done with the tunnel.
@@ -115,14 +123,17 @@ def tunnel(self, local_port, remote_port=None, remote_host="localhost"):
 
 
 class RemoteFileSystem(luigi.target.FileSystem):
+
     def __init__(self, host, username=None, key_file=None):
         self.remote_context = RemoteContext(host, username, key_file)
 
     def exists(self, path):
-        """ Return `True` if file or directory at `path` exist, False otherwise """
+        """
+        Return `True` if file or directory at `path` exist, False otherwise.
+        """
         try:
             self.remote_context.check_output(["test", "-e", path])
-        except subprocess.CalledProcessError, e:
+        except subprocess.CalledProcessError as e:
             if e.returncode == 1:
                 return False
             else:
@@ -130,7 +141,9 @@ def exists(self, path):
         return True
 
     def remove(self, path, recursive=True):
-        """ Remove file or directory at location `path` """
+        """
+        Remove file or directory at location `path`.
+        """
         if recursive:
             cmd = ["rm", "-r", path]
         else:
@@ -172,6 +185,7 @@ def get(self, path, local_path):
 
 
 class AtomicRemoteFileWriter(luigi.format.OutputPipeProcessWrapper):
+
     def __init__(self, fs, path):
         self._fs = fs
         self.path = path
@@ -206,11 +220,13 @@ def fs(self):
 
 class RemoteTarget(luigi.target.FileSystemTarget):
     """
-    Target used for reading from remote files. The target is implemented using
-    ssh commands streaming data over the network.
+    Target used for reading from remote files.
+
+    The target is implemented using ssh commands streaming data over the network.
     """
+
     def __init__(self, path, host, format=None, username=None, key_file=None):
-        self.path = path
+        super(RemoteTarget, self).__init__(path)
         self.format = format
         self._fs = RemoteFileSystem(host, username, key_file)
 
diff --git a/luigi/contrib/target.py b/luigi/contrib/target.py
index 3d2bbaeadf..32b1028df1 100644
--- a/luigi/contrib/target.py
+++ b/luigi/contrib/target.py
@@ -1,12 +1,16 @@
-import luigi.target
 import logging
 import types
+
+import luigi.target
+
 logger = logging.getLogger('luigi-interface')
 
-class CascadingClient():
+
+class CascadingClient(object):
     """
-    A FilesystemClient that will cascade failing function calls through a list
-    of clients. Which clients are used are specified at time of construction.
+    A FilesystemClient that will cascade failing function calls through a list of clients.
+
+    Which clients are used are specified at time of construction.
     """
 
     # This constant member is supposed to include all methods, feel free to add
@@ -14,10 +18,14 @@ class CascadingClient():
     # created, pass the kwarg to the constructor.
     ALL_METHOD_NAMES = ['exists', 'rename', 'remove', 'chmod', 'chown',
                         'count', 'copy', 'get', 'put', 'mkdir', 'listdir',
-                        'isdir']
+                        'isdir',
+                        'rename_dont_move',
+                        ]
 
-    def __init__(self, clients, method_names=ALL_METHOD_NAMES):
+    def __init__(self, clients, method_names=None):
         self.clients = clients
+        if method_names is None:
+            method_names = self.ALL_METHOD_NAMES
 
         for method_name in method_names:
             new_method = self._make_method(method_name)
@@ -47,4 +55,4 @@ def _chained_call(self, method_name, *args, **kwargs):
                     logger.exception(
                         'The {0} failed to {1}, using fallback class {2}'
                         .format(client.__class__.__name__, method_name,
-                                self.clients[i+1].__class__.__name__))
+                                self.clients[i + 1].__class__.__name__))
diff --git a/luigi/contrib/webhdfs.py b/luigi/contrib/webhdfs.py
index d50bde08dc..ac1e242cd4 100644
--- a/luigi/contrib/webhdfs.py
+++ b/luigi/contrib/webhdfs.py
@@ -4,9 +4,9 @@
 """
 from __future__ import absolute_import
 
+import logging
 import os
 import random
-import logging
 import tempfile
 
 from luigi import configuration
@@ -39,6 +39,7 @@ def open(self, mode='r'):
 
 
 class ReadableWebHdfsFile(object):
+
     def __init__(self, path, client):
         self.path = path
         self.client = client
@@ -77,6 +78,7 @@ class AtomicWebHdfsFile(file):
     """
     An Hdfs file that writes to a temp file and put to WebHdfs on close.
     """
+
     def __init__(self, path, client):
         unique_name = 'luigi-webhdfs-tmp-%09d' % random.randrange(0, 1e10)
         self.tmp_path = os.path.join(tempfile.gettempdir(), unique_name)
@@ -93,13 +95,17 @@ def __enter__(self):
         return self
 
     def __exit__(self, exc_type, exc, traceback):
-        """Close/commit the file if there are no exception"""
+        """
+        Close/commit the file if there are no exception.
+        """
         if exc_type:
             return
         return file.__exit__(self, exc_type, exc, traceback)
 
     def __del__(self):
-        """Remove the temporary directory"""
+        """
+        Remove the temporary directory.
+        """
         if os.path.exists(self.tmp_path):
             os.remove(self.tmp_path)
 
@@ -126,11 +132,13 @@ def walk(self, path, depth=1):
         return self.webhdfs.walk(path, depth=depth)
 
     def exists(self, path):
-        """Returns true if the path exists and false otherwise"""
+        """
+        Returns true if the path exists and false otherwise.
+        """
         try:
             self.webhdfs.status(path)
             return True
-        except webhdfs.util.HdfsError, e:
+        except webhdfs.util.HdfsError as e:
             if str(e).startswith('File does not exist: '):
                 return False
             else:
diff --git a/luigi/date_interval.py b/luigi/date_interval.py
index f4daafd5de..d166ef5bcb 100644
--- a/luigi/date_interval.py
+++ b/luigi/date_interval.py
@@ -12,11 +12,12 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import re
 import datetime
+import re
 
 
 class DateInterval(object):
+
     def __init__(self, date_a, date_b):
         # Represents all date d such that date_a <= d < date_b
         self.date_a = date_a
@@ -52,11 +53,11 @@ def to_string(self):
         raise NotImplementedError
 
     @classmethod
-    def from_date(self, d):
+    def from_date(cls, d):
         raise NotImplementedError
 
     @classmethod
-    def parse(self, s):
+    def parse(cls, s):
         raise NotImplementedError
 
     def __contains__(self, date):
@@ -70,7 +71,7 @@ def __hash__(self):
         return hash(repr(self))
 
     def __cmp__(self, other):
-        if type(self) != type(other):
+        if not isinstance(self, type(other)):
             # doing this because it's not well defined if eg. 2012-01-01-2013-01-01 == 2012
             raise TypeError('Date interval type mismatch')
         return cmp((self.date_a, self.date_b), (other.date_a, other.date_b))
@@ -86,6 +87,7 @@ def __ne__(self, other):
 
 
 class Date(DateInterval):
+
     def __init__(self, y, m, d):
         a = datetime.date(y, m, d)
         b = datetime.date(y, m, d) + datetime.timedelta(1)
@@ -95,16 +97,17 @@ def to_string(self):
         return self.date_a.strftime('%Y-%m-%d')
 
     @classmethod
-    def from_date(self, d):
+    def from_date(cls, d):
         return Date(d.year, d.month, d.day)
 
     @classmethod
-    def parse(self, s):
+    def parse(cls, s):
         if re.match(r'\d\d\d\d\-\d\d\-\d\d$', s):
             return Date(*map(int, s.split('-')))
 
 
 class Week(DateInterval):
+
     def __init__(self, y, w):
         # Python datetime does not have a method to convert from ISO weeks!
         for d in xrange(-10, 370):
@@ -121,17 +124,18 @@ def to_string(self):
         return '%d-W%02d' % self.date_a.isocalendar()[:2]
 
     @classmethod
-    def from_date(self, d):
+    def from_date(cls, d):
         return Week(*d.isocalendar()[:2])
 
     @classmethod
-    def parse(self, s):
+    def parse(cls, s):
         if re.match(r'\d\d\d\d\-W\d\d$', s):
             y, w = map(int, s.split('-W'))
             return Week(y, w)
 
 
 class Month(DateInterval):
+
     def __init__(self, y, m):
         date_a = datetime.date(y, m, 1)
         date_b = datetime.date(y + m / 12, 1 + m % 12, 1)
@@ -141,17 +145,18 @@ def to_string(self):
         return self.date_a.strftime('%Y-%m')
 
     @classmethod
-    def from_date(self, d):
+    def from_date(cls, d):
         return Month(d.year, d.month)
 
     @classmethod
-    def parse(self, s):
+    def parse(cls, s):
         if re.match(r'\d\d\d\d\-\d\d$', s):
             y, m = map(int, s.split('-'))
             return Month(y, m)
 
 
 class Year(DateInterval):
+
     def __init__(self, y):
         date_a = datetime.date(y, 1, 1)
         date_b = datetime.date(y + 1, 1, 1)
@@ -161,21 +166,22 @@ def to_string(self):
         return self.date_a.strftime('%Y')
 
     @classmethod
-    def from_date(self, d):
+    def from_date(cls, d):
         return Year(d.year)
 
     @classmethod
-    def parse(self, s):
+    def parse(cls, s):
         if re.match(r'\d\d\d\d$', s):
             return Year(int(s))
 
 
 class Custom(DateInterval):
+
     def to_string(self):
         return '-'.join([d.strftime('%Y-%m-%d') for d in (self.date_a, self.date_b)])
 
     @classmethod
-    def parse(self, s):
+    def parse(cls, s):
         if re.match('\d\d\d\d\-\d\d\-\d\d\-\d\d\d\d\-\d\d\-\d\d$', s):
             # Actually the ISO 8601 specifies <start>/<end> as the time interval format
             # Not sure if this goes for date intervals as well. In any case slashes will
diff --git a/luigi/db_task_history.py b/luigi/db_task_history.py
index 17279158ad..8294c5e498 100644
--- a/luigi/db_task_history.py
+++ b/luigi/db_task_history.py
@@ -12,26 +12,27 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import task_history
-import configuration
 import datetime
 import logging
-
 from contextlib import contextmanager
-from task_status import PENDING, FAILED, DONE, RUNNING
 
-from sqlalchemy.orm.collections import attribute_mapped_collection
-from sqlalchemy import Column, Integer, String, ForeignKey, TIMESTAMP, create_engine
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import sessionmaker, relationship
+import configuration
+import sqlalchemy
+import sqlalchemy.ext.declarative
+import sqlalchemy.orm
+import sqlalchemy.orm.collections
+import task_history
+from task_status import DONE, FAILED, PENDING, RUNNING
 
-Base = declarative_base()
+Base = sqlalchemy.ext.declarative.declarative_base()
 
 logger = logging.getLogger('luigi-interface')
 
 
 class DbTaskHistory(task_history.TaskHistory):
-    """ Task History that writes to a database using sqlalchemy. Also has methods for useful db queries
+    """
+    Task History that writes to a database using sqlalchemy.
+    Also has methods for useful db queries.
     """
     @contextmanager
     def _session(self, session=None):
@@ -50,8 +51,8 @@ def _session(self, session=None):
     def __init__(self):
         config = configuration.get_config()
         connection_string = config.get('task_history', 'db_connection')
-        self.engine = create_engine(connection_string)
-        self.session_factory = sessionmaker(bind=self.engine, expire_on_commit=False)
+        self.engine = sqlalchemy.create_engine(connection_string)
+        self.session_factory = sqlalchemy.orm.sessionmaker(bind=self.engine, expire_on_commit=False)
         Base.metadata.create_all(self.engine)
         self.tasks = {}  # task_id -> TaskRecord
 
@@ -101,8 +102,9 @@ def _find_or_create_task(self, task):
         task.record_id = task_record.id
 
     def find_all_by_parameters(self, task_name, session=None, **task_params):
-        ''' Find tasks with the given task_name and the same parameters as the kwargs
-        '''
+        """
+        Find tasks with the given task_name and the same parameters as the kwargs.
+        """
         with self._session(session) as session:
             tasks = session.query(TaskRecord).join(TaskEvent).filter(TaskRecord.name == task_name).order_by(TaskEvent.ts).all()
             for task in tasks:
@@ -110,13 +112,15 @@ def find_all_by_parameters(self, task_name, session=None, **task_params):
                     yield task
 
     def find_all_by_name(self, task_name, session=None):
-        ''' Find all tasks with the given task_name
-        '''
+        """
+        Find all tasks with the given task_name.
+        """
         return self.find_all_by_parameters(task_name, session)
 
     def find_latest_runs(self, session=None):
-        ''' Return tasks that have been updated in the past 24 hours.
-        '''
+        """
+        Return tasks that have been updated in the past 24 hours.
+        """
         with self._session(session) as session:
             yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
             return session.query(TaskRecord).\
@@ -127,48 +131,58 @@ def find_latest_runs(self, session=None):
                 all()
 
     def find_task_by_id(self, id, session=None):
-        ''' Find task with the given record ID
-        '''
+        """
+        Find task with the given record ID.
+        """
         with self._session(session) as session:
             return session.query(TaskRecord).get(id)
 
 
 class TaskParameter(Base):
-    """ Table to track luigi.Parameter()s of a Task
+    """
+    Table to track luigi.Parameter()s of a Task.
     """
     __tablename__ = 'task_parameters'
-    task_id = Column(Integer, ForeignKey('tasks.id'), primary_key=True)
-    name = Column(String(128), primary_key=True)
-    value = Column(String(256))
+    task_id = sqlalchemy.Column(sqlalchemy.Integer, sqlalchemy.ForeignKey('tasks.id'), primary_key=True)
+    name = sqlalchemy.Column(sqlalchemy.String(128), primary_key=True)
+    value = sqlalchemy.Column(sqlalchemy.String(256))
 
     def __repr__(self):
         return "TaskParameter(task_id=%d, name=%s, value=%s)" % (self.task_id, self.name, self.value)
 
 
 class TaskEvent(Base):
-    """ Table to track when a task is scheduled, starts, finishes, and fails
+    """
+    Table to track when a task is scheduled, starts, finishes, and fails.
     """
     __tablename__ = 'task_events'
-    id = Column(Integer, primary_key=True)
-    task_id = Column(Integer, ForeignKey('tasks.id'))
-    event_name = Column(String(20))
-    ts = Column(TIMESTAMP, index=True)
+    id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
+    task_id = sqlalchemy.Column(sqlalchemy.Integer, sqlalchemy.ForeignKey('tasks.id'))
+    event_name = sqlalchemy.Column(sqlalchemy.String(20))
+    ts = sqlalchemy.Column(sqlalchemy.TIMESTAMP, index=True)
 
     def __repr__(self):
         return "TaskEvent(task_id=%s, event_name=%s, ts=%s" % (self.task_id, self.event_name, self.ts)
 
 
 class TaskRecord(Base):
-    """ Base table to track information about a luigi.Task. References to other tables are available through
-    task.events, task.parameters, etc.
+    """
+    Base table to track information about a luigi.Task.
+
+    References to other tables are available through task.events, task.parameters, etc.
     """
     __tablename__ = 'tasks'
-    id = Column(Integer, primary_key=True)
-    name = Column(String(128), index=True)
-    host = Column(String(128))
-    parameters = relationship('TaskParameter', collection_class=attribute_mapped_collection('name'),
-                              cascade="all, delete-orphan")
-    events = relationship("TaskEvent", order_by=lambda: TaskEvent.ts.desc(), backref="task")
+    id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
+    name = sqlalchemy.Column(sqlalchemy.String(128), index=True)
+    host = sqlalchemy.Column(sqlalchemy.String(128))
+    parameters = sqlalchemy.orm.relationship(
+        'TaskParameter',
+        collection_class=sqlalchemy.orm.collections.attribute_mapped_collection('name'),
+        cascade="all, delete-orphan")
+    events = sqlalchemy.orm.relationship(
+        'TaskEvent',
+        order_by=lambda: TaskEvent.ts.desc(),
+        backref='task')
 
     def __repr__(self):
         return "TaskRecord(name=%s, host=%s)" % (self.name, self.host)
diff --git a/luigi/deprecate_kwarg.py b/luigi/deprecate_kwarg.py
new file mode 100644
index 0000000000..52bb301ae9
--- /dev/null
+++ b/luigi/deprecate_kwarg.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+
+import warnings
+
+
+def deprecate_kwarg(old_name, new_name, kw_value):
+    """
+    Rename keyword arguments, but keep backwards compatibility.
+
+    Usage:
+
+    .. code-block: python
+
+        >>> @deprecate_kwarg('old', 'new', 'defval')
+        ... def some_func(old='defval'):
+        ...     print(old)
+        ...
+        >>> some_func(new='yay')
+        yay
+        >>> some_func(old='yaay')
+        yaay
+        >>> some_func()
+        defval
+
+    """
+    def real_decorator(function):
+        def new_function(*args, **kwargs):
+            value = kw_value
+            if old_name in kwargs:
+                warnings.warn('Keyword argument {0} is deprecated, use {1}'
+                              .format(old_name, new_name))
+                value = kwargs[old_name]
+            if new_name in kwargs:
+                value = kwargs[new_name]
+                del kwargs[new_name]
+            kwargs[old_name] = value
+            return function(*args, **kwargs)
+        return new_function
+    return real_decorator
diff --git a/luigi/event.py b/luigi/event.py
index cbb1ed40f2..dd2e15e28b 100644
--- a/luigi/event.py
+++ b/luigi/event.py
@@ -12,7 +12,8 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-class Event:
+
+class Event(object):
     # TODO nice descriptive subclasses of Event instead of strings? pass their instances to the callback instead of an undocumented arg list?
     DEPENDENCY_DISCOVERED = "event.core.dependency.discovered"  # triggered for every (task, upstream task) pair discovered in a jobflow
     DEPENDENCY_MISSING = "event.core.dependency.missing"
@@ -22,5 +23,3 @@ class Event:
     FAILURE = "event.core.failure"
     SUCCESS = "event.core.success"
     PROCESSING_TIME = "event.core.processing_time"
-
-
diff --git a/luigi/file.py b/luigi/file.py
index 5b57826f9f..0d22e42cfc 100644
--- a/luigi/file.py
+++ b/luigi/file.py
@@ -14,16 +14,18 @@
 
 import os
 import random
-import tempfile
 import shutil
+import tempfile
+
 import luigi.util
-from target import FileSystem, FileSystemTarget
 from luigi.format import FileWrapper
+from target import FileSystem, FileSystemTarget
 
 
 class atomic_file(file):
     # Simple class that writes to a temp file and moves it on close()
     # Also cleans up the temp file if close is not invoked
+
     def __init__(self, path):
         self.__tmp_path = path + '-luigi-tmp-%09d' % random.randrange(0, 1e10)
         self.path = path
@@ -49,14 +51,16 @@ def __exit__(self, exc_type, exc, traceback):
 
 
 class LocalFileSystem(FileSystem):
-    """ Wrapper for access to file system operations
+    """
+    Wrapper for access to file system operations.
 
-    Work in progress - add things as needed
+    Work in progress - add things as needed.
     """
+
     def exists(self, path):
         return os.path.exists(path)
 
-    def mkdir(self, path):
+    def mkdir(self, path, parents=True, raise_if_exists=False):
         os.makedirs(path)
 
     def isdir(self, path):
@@ -81,14 +85,18 @@ def __init__(self, path=None, format=None, is_tmp=False):
         self.format = format
         self.is_tmp = is_tmp
 
+    def makedirs(self):
+        """
+        Create all parent folders if they do not exist.
+        """
+        normpath = os.path.normpath(self.path)
+        parentfolder = os.path.dirname(normpath)
+        if parentfolder and not os.path.exists(parentfolder):
+            os.makedirs(parentfolder)
+
     def open(self, mode='r'):
         if mode == 'w':
-            # Create folder if it does not exist
-            normpath = os.path.normpath(self.path)
-            parentfolder = os.path.dirname(normpath)
-            if parentfolder and not os.path.exists(parentfolder):
-                os.makedirs(parentfolder)
-
+            self.makedirs()
             if self.format:
                 return self.format.pipe_writer(atomic_file(self.path))
             else:
@@ -122,7 +130,7 @@ def copy(self, new_path, fail_if_exists=False):
         if fail_if_exists and os.path.exists(new_path):
             raise RuntimeError('Destination exists: %s' % new_path)
         tmp = File(new_path + '-luigi-tmp-%09d' % random.randrange(0, 1e10), is_tmp=True)
-        tmp.open('w')
+        tmp.makedirs()
         shutil.copy(self.path, tmp.fn)
         tmp.move(new_path)
 
diff --git a/luigi/format.py b/luigi/format.py
index e12a64ef52..522e9b7174 100644
--- a/luigi/format.py
+++ b/luigi/format.py
@@ -12,12 +12,13 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import subprocess
 import signal
+import subprocess
 
 
 class FileWrapper(object):
-    """Wrap `file` in a "real" so stuff can be added to it after creation
+    """
+    Wrap `file` in a "real" so stuff can be added to it after creation.
     """
 
     def __init__(self, file_object):
@@ -42,12 +43,15 @@ def __iter__(self):
 
 
 class InputPipeProcessWrapper(object):
+
     def __init__(self, command, input_pipe=None):
-        '''
-        @param command a subprocess.Popen instance with stdin=input_pipe and
-        stdout=subprocess.PIPE. Alternatively, just its args argument as a
-        convenience.
-        '''
+        """
+        Initializes a InputPipeProcessWrapper instance.
+
+        :param command: a subprocess.Popen instance with stdin=input_pipe and
+                        stdout=subprocess.PIPE.
+                        Alternatively, just its args argument as a convenience.
+        """
         self._command = command
         self._input_pipe = input_pipe
         self._process = command if isinstance(command, subprocess.Popen) else self.create_subprocess(command)
@@ -94,7 +98,9 @@ def __enter__(self):
         return self
 
     def _abort(self):
-        "Call _finish, but eat the exception (if any)."
+        """
+        Call _finish, but eat the exception (if any).
+        """
         try:
             self._finish()
         except KeyboardInterrupt:
@@ -144,7 +150,9 @@ def writeLine(self, line):
         self.write(line + '\n')
 
     def _finish(self):
-        """ Closes and waits for subprocess to exit """
+        """
+        Closes and waits for subprocess to exit.
+        """
         if self._process.returncode is None:
             self._process.stdin.flush()
             self._process.stdin.close()
@@ -182,7 +190,9 @@ def __getattr__(self, name):
 
 
 class Format(object):
-    """ Interface for format specifications """
+    """
+    Interface for format specifications.
+    """
 
     # TODO Move this to somewhere else?
     @classmethod
@@ -204,6 +214,7 @@ def pipe_writer(cls, output_pipe):
 
 
 class Gzip(Format):
+
     @classmethod
     def pipe_reader(cls, input_pipe):
         return InputPipeProcessWrapper(['gunzip'], input_pipe)
@@ -214,6 +225,7 @@ def pipe_writer(cls, output_pipe):
 
 
 class Bzip2(Format):
+
     @classmethod
     def pipe_reader(cls, input_pipe):
         return InputPipeProcessWrapper(['bzcat'], input_pipe)
@@ -221,4 +233,3 @@ def pipe_reader(cls, input_pipe):
     @classmethod
     def pipe_writer(cls, output_pipe):
         return OutputPipeProcessWrapper(['bzip2'], output_pipe)
-
diff --git a/luigi/hadoop.py b/luigi/hadoop.py
index e3caea97a0..3bd80b654b 100644
--- a/luigi/hadoop.py
+++ b/luigi/hadoop.py
@@ -12,28 +12,30 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import random
-import sys
-import os
-import datetime
-import subprocess
-import tempfile
-from itertools import groupby
-import pickle
+import abc
 import binascii
+import datetime
+import glob
+import json
 import logging
-import StringIO
+import os
+import pickle
+import random
 import re
 import shutil
 import signal
+import StringIO
+import subprocess
+import sys
+import tempfile
+import warnings
 from hashlib import md5
+from itertools import groupby
+
+import configuration
 import luigi
 import luigi.hdfs
-import configuration
-import warnings
 import mrrunner
-import json
-import glob
 
 logger = logging.getLogger('luigi-interface')
 
@@ -41,16 +43,19 @@
 
 
 def attach(*packages):
-    """ Attach a python package to hadoop map reduce tarballs to make those packages available on the hadoop cluster"""
+    """
+    Attach a python package to hadoop map reduce tarballs to make those packages available
+    on the hadoop cluster.
+    """
     _attached_packages.extend(packages)
 
 
-def dereference(file):
-    if os.path.islink(file):
-        #by joining with the dirname we are certain to get the absolute path
-        return dereference(os.path.join(os.path.dirname(file), os.readlink(file)))
+def dereference(f):
+    if os.path.islink(f):
+        # by joining with the dirname we are certain to get the absolute path
+        return dereference(os.path.join(os.path.dirname(f), os.readlink(f)))
     else:
-        return file
+        return f
 
 
 def get_extra_files(extra_files):
@@ -66,8 +71,8 @@ def get_extra_files(extra_files):
         if os.path.isdir(src):
             src_prefix = os.path.join(src, '')
             for base, dirs, files in os.walk(src):
-                for file in files:
-                    f_src = os.path.join(base, file)
+                for f in files:
+                    f_src = os.path.join(base, f)
                     f_src_stripped = f_src[len(src_prefix):]
                     f_dst = os.path.join(dst, f_src_stripped)
                     result.append((f_src, f_dst))
@@ -78,7 +83,9 @@ def get_extra_files(extra_files):
 
 
 def create_packages_archive(packages, filename):
-    """Create a tar archive which will contain the files for the packages listed in packages. """
+    """
+    Create a tar archive which will contain the files for the packages listed in packages.
+    """
     import tarfile
     tar = tarfile.open(filename, "w")
 
@@ -149,11 +156,15 @@ def add_files_for_package(sub_package_path, root_package_path, root_package_name
 
 
 def flatten(sequence):
-    """A simple generator which flattens a sequence.
+    """
+    A simple generator which flattens a sequence.
+
+    Only one level is flattened.
+
+    .. code-block:: python
 
-    Only one level is flattned.
+        (1, (2, 3), 4) -> (1, 2, 3, 4)
 
-    (1, (2, 3), 4) -> (1, 2, 3, 4)
     """
     for item in sequence:
         if hasattr(item, "__iter__"):
@@ -164,6 +175,7 @@ def flatten(sequence):
 
 
 class HadoopRunContext(object):
+
     def __init__(self):
         self.job_id = None
 
@@ -187,6 +199,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 
 
 class HadoopJobError(RuntimeError):
+
     def __init__(self, message, out=None, err=None):
         super(HadoopJobError, self).__init__(message, out, err)
         self.message = message
@@ -195,17 +208,26 @@ def __init__(self, message, out=None, err=None):
 
 
 def run_and_track_hadoop_job(arglist, tracking_url_callback=None, env=None):
-    ''' Runs the job by invoking the command from the given arglist. Finds tracking urls from the output and attempts to fetch
-    errors using those urls if the job fails. Throws HadoopJobError with information about the error (including stdout and stderr
-    from the process) on failure and returns normally otherwise.
-    '''
+    """
+    Runs the job by invoking the command from the given arglist.
+    Finds tracking urls from the output and attempts to fetch errors using those urls if the job fails.
+    Throws HadoopJobError with information about the error
+    (including stdout and stderr from the process)
+    on failure and returns normally otherwise.
+
+    :param arglist:
+    :param tracking_url_callback:
+    :param env:
+    :return:
+    """
     logger.info('%s', ' '.join(arglist))
 
     def write_luigi_history(arglist, history):
-        '''
+        """
         Writes history to a file in the job's output directory in JSON format.
-        Currently just for tracking the job ID in a configuration where no history is stored in the output directory by Hadoop.
-        '''
+        Currently just for tracking the job ID in a configuration where
+        no history is stored in the output directory by Hadoop.
+        """
         history_filename = configuration.get_config().get('core', 'history-filename', '')
         if history_filename and '-output' in arglist:
             output_dir = arglist[arglist.index('-output') + 1]
@@ -263,7 +285,7 @@ def track_process(arglist, tracking_url_callback, env=None):
 
         try:
             task_failures = fetch_task_failures(tracking_url)
-        except Exception, e:
+        except Exception as e:
             raise HadoopJobError(message + 'Additionally, an error occurred when fetching data from %s: %s' %
                                  (tracking_url, e), out, err)
 
@@ -279,14 +301,16 @@ def track_process(arglist, tracking_url_callback, env=None):
 
 
 def fetch_task_failures(tracking_url):
-    ''' Uses mechanize to fetch the actual task logs from the task tracker.
+    """
+    Uses mechanize to fetch the actual task logs from the task tracker.
 
-    This is highly opportunistic, and we might not succeed. So we set a low timeout and hope it works.
+    This is highly opportunistic, and we might not succeed.
+    So we set a low timeout and hope it works.
     If it does not, it's not the end of the world.
 
     TODO: Yarn has a REST API that we should probably use instead:
-    http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/MapredAppMasterRest.html
-    '''
+    http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html
+    """
     import mechanize
     timeout = 3.0
     failures_url = tracking_url.replace('jobdetails.jsp', 'jobfailures.jsp') + '&cause=failed'
@@ -303,7 +327,7 @@ def fetch_task_failures(tracking_url):
         try:
             r = b2.open(task_url, timeout=timeout)
             data = r.read()
-        except Exception, e:
+        except Exception as e:
             logger.debug('Error fetching data from %s: %s', task_url, e)
             continue
         # Try to get the hex-encoded traceback back from the output
@@ -319,17 +343,21 @@ class JobRunner(object):
 
 
 class HadoopJobRunner(JobRunner):
-    ''' Takes care of uploading & executing a Hadoop job using Hadoop streaming
+    """
+    Takes care of uploading & executing a Hadoop job using Hadoop streaming.
 
     TODO: add code to support Elastic Mapreduce (using boto) and local execution.
-    '''
-    def __init__(self, streaming_jar, modules=[], streaming_args=[], libjars=[], libjars_in_hdfs=[], jobconfs={}, input_format=None, output_format=None):
+    """
+
+    def __init__(self, streaming_jar, modules=None, streaming_args=None, libjars=None, libjars_in_hdfs=None, jobconfs=None, input_format=None, output_format=None):
+        def get(x, default):
+            return x is not None and x or default
         self.streaming_jar = streaming_jar
-        self.modules = modules
-        self.streaming_args = streaming_args
-        self.libjars = libjars
-        self.libjars_in_hdfs = libjars_in_hdfs
-        self.jobconfs = jobconfs
+        self.modules = get(modules, [])
+        self.streaming_args = get(streaming_args, [])
+        self.libjars = get(libjars, [])
+        self.libjars_in_hdfs = get(libjars_in_hdfs, [])
+        self.jobconfs = get(jobconfs, {})
         self.input_format = input_format
         self.output_format = output_format
         self.tmp_dir = False
@@ -348,10 +376,10 @@ def run_job(self, job):
 
         base_tmp_dir = configuration.get_config().get('core', 'tmp-dir', None)
         if base_tmp_dir:
-            warnings.warn("The core.tmp-dir configuration item is"\
-                          " deprecated, please use the TMPDIR"\
-                          " environment variable if you wish"\
-                          " to control where luigi.hadoop may"\
+            warnings.warn("The core.tmp-dir configuration item is"
+                          " deprecated, please use the TMPDIR"
+                          " environment variable if you wish"
+                          " to control where luigi.hadoop may"
                           " create temporary files and directories.")
             self.tmp_dir = os.path.join(base_tmp_dir, 'hadoop_job_%016x' % random.getrandbits(64))
             os.makedirs(self.tmp_dir)
@@ -370,7 +398,7 @@ def run_job(self, job):
         # replace output with a temporary work directory
         output_final = job.output().path
         output_tmp_fn = output_final + '-temp-' + datetime.datetime.now().isoformat().replace(':', '-')
-        tmp_target = luigi.hdfs.HdfsTarget(output_tmp_fn, is_tmp=True)
+        tmp_target = luigi.hdfs.HdfsTarget(output_tmp_fn)
 
         arglist = luigi.hdfs.load_hadoop_cmd() + ['jar', self.streaming_jar]
 
@@ -392,7 +420,7 @@ def run_job(self, job):
             dst_tmp = '%s_%09d' % (dst.replace('/', '_'), random.randint(0, 999999999))
             files += ['%s#%s' % (src, dst_tmp)]
             # -files doesn't support subdirectories, so we need to create the dst_tmp -> dst manually
-            job._add_link(dst_tmp, dst)
+            job.add_link(dst_tmp, dst)
 
         if files:
             arglist += ['-files', ','.join(files)]
@@ -434,11 +462,11 @@ def run_job(self, job):
         # submit job
         create_packages_archive(packages, self.tmp_dir + '/packages.tar')
 
-        job._dump(self.tmp_dir)
+        job.dump(self.tmp_dir)
 
         run_and_track_hadoop_job(arglist)
 
-        tmp_target.move(output_final, raise_if_exists=True)
+        tmp_target.move_dir(output_final)
         self.finish()
 
     def finish(self):
@@ -452,7 +480,10 @@ def __del__(self):
 
 
 class DefaultHadoopJobRunner(HadoopJobRunner):
-    ''' The default job runner just reads from config and sets stuff '''
+    """
+    The default job runner just reads from config and sets stuff.
+    """
+
     def __init__(self):
         config = configuration.get_config()
         streaming_jar = config.get('hadoop', 'streaming-jar')
@@ -461,29 +492,31 @@ def __init__(self):
 
 
 class LocalJobRunner(JobRunner):
-    ''' Will run the job locally
+    """
+    Will run the job locally.
 
     This is useful for debugging and also unit testing. Tries to mimic Hadoop Streaming.
 
     TODO: integrate with JobTask
-    '''
+    """
+
     def __init__(self, samplelines=None):
         self.samplelines = samplelines
 
-    def sample(self, input, n, output):
-        for i, line in enumerate(input):
+    def sample(self, input_stream, n, output):
+        for i, line in enumerate(input_stream):
             if n is not None and i >= n:
                 break
             output.write(line)
 
-    def group(self, input):
+    def group(self, input_stream):
         output = StringIO.StringIO()
         lines = []
-        for i, line in enumerate(input):
+        for i, line in enumerate(input_stream):
             parts = line.rstrip('\n').split('\t')
             blob = md5(str(i)).hexdigest()  # pseudo-random blob to make sure the input isn't sorted
             lines.append((parts[:-1], blob, line))
-        for k, _, line in sorted(lines):
+        for _, _, line in sorted(lines):
             output.write(line)
         output.seek(0)
         return output
@@ -499,14 +532,14 @@ def run_job(self, job):
         if job.reducer == NotImplemented:
             # Map only job; no combiner, no reducer
             map_output = job.output().open('w')
-            job._run_mapper(map_input, map_output)
+            job.run_mapper(map_input, map_output)
             map_output.close()
             return
 
         job.init_mapper()
         # run job now...
         map_output = StringIO.StringIO()
-        job._run_mapper(map_input, map_output)
+        job.run_mapper(map_input, map_output)
         map_output.seek(0)
 
         if job.combiner == NotImplemented:
@@ -514,13 +547,13 @@ def run_job(self, job):
         else:
             combine_input = self.group(map_output)
             combine_output = StringIO.StringIO()
-            job._run_combiner(combine_input, combine_output)
+            job.run_combiner(combine_input, combine_output)
             combine_output.seek(0)
             reduce_input = self.group(combine_output)
 
         job.init_reducer()
         reduce_output = job.output().open('w')
-        job._run_reducer(reduce_input, reduce_output)
+        job.run_reducer(reduce_input, reduce_output)
         reduce_output.close()
 
 
@@ -538,6 +571,10 @@ class BaseHadoopJobTask(luigi.Task):
     _counter_dict = {}
     task_id = None
 
+    @abc.abstractmethod
+    def job_runner(self):
+        pass
+
     def jobconfs(self):
         jcs = []
         jcs.append('mapred.job.name=%s' % self.task_id)
@@ -553,14 +590,14 @@ def jobconfs(self):
                 jcs.append('mapred.job.queue.name=%s' % pool)
         return jcs
 
-
     def init_local(self):
-        ''' Implement any work to setup any internal datastructure etc here.
+        """
+        Implement any work to setup any internal datastructure etc here.
 
         You can add extra input using the requires_local/input_local methods.
 
         Anything you set on the object will be pickled and available on the Hadoop nodes.
-        '''
+        """
         pass
 
     def init_hadoop(self):
@@ -571,7 +608,9 @@ def run(self):
         self.job_runner().run_job(self)
 
     def requires_local(self):
-        ''' Default impl - override this method if you need any local input to be accessible in init() '''
+        """
+        Default impl - override this method if you need any local input to be accessible in init().
+        """
         return []
 
     def requires_hadoop(self):
@@ -628,9 +667,12 @@ def _setup_remote(self):
 
     def job_runner(self):
         # We recommend that you define a subclass, override this method and set up your own config
-        """ Get the MapReduce runner for this job
+        """
+        Get the MapReduce runner for this job.
 
-        If all outputs are HdfsTargets, the DefaultHadoopJobRunner will be used. Otherwise, the LocalJobRunner which streams all data through the local machine will be used (great for testing).
+        If all outputs are HdfsTargets, the DefaultHadoopJobRunner will be used.
+        Otherwise, the LocalJobRunner which streams all data through the local machine
+        will be used (great for testing).
         """
         outputs = luigi.task.flatten(self.output())
         for output in outputs:
@@ -642,15 +684,20 @@ def job_runner(self):
             return DefaultHadoopJobRunner()
 
     def reader(self, input_stream):
-        """Reader is a method which iterates over input lines and outputs records.
-           The default implementation yields one argument containing the line for each line in the input."""
+        """
+        Reader is a method which iterates over input lines and outputs records.
+
+        The default implementation yields one argument containing the line for each line in the input."""
         for line in input_stream:
             yield line,
 
     def writer(self, outputs, stdout, stderr=sys.stderr):
-        """Writer format is a method which iterates over the output records from the reducer and formats
-           them for output.
-           The default implementation outputs tab separated items"""
+        """
+        Writer format is a method which iterates over the output records
+        from the reducer and formats them for output.
+
+        The default implementation outputs tab separated items.
+        """
         for output in outputs:
             try:
                 print >> stdout, "\t".join(map(str, flatten(output)))
@@ -659,15 +706,18 @@ def writer(self, outputs, stdout, stderr=sys.stderr):
                 raise
 
     def mapper(self, item):
-        """Re-define to process an input item (usually a line of input data)
+        """
+        Re-define to process an input item (usually a line of input data).
 
-        Defaults to identity mapper that sends all lines to the same reducer"""
+        Defaults to identity mapper that sends all lines to the same reducer.
+        """
         yield None, item
 
     combiner = NotImplemented
 
     def incr_counter(self, *args, **kwargs):
-        """ Increments a Hadoop counter
+        """
+        Increments a Hadoop counter.
 
         Since counters can be a bit slow to update, this batches the updates.
         """
@@ -683,13 +733,14 @@ def incr_counter(self, *args, **kwargs):
         ct = self._counter_dict.get(key, 0)
         ct += count
         if ct >= threshold:
-            new_arg = list(key)+[ct]
+            new_arg = list(key) + [ct]
             self._incr_counter(*new_arg)
             ct = 0
         self._counter_dict[key] = ct
 
     def _flush_batch_incr_counter(self):
-        """ Increments any unflushed counter values
+        """
+        Increments any unflushed counter values.
         """
         for key, count in self._counter_dict.iteritems():
             if count == 0:
@@ -698,9 +749,12 @@ def _flush_batch_incr_counter(self):
             self._incr_counter(*args)
 
     def _incr_counter(self, *args):
-        """ Increments a Hadoop counter
+        """
+        Increments a Hadoop counter.
+
+        Note that this seems to be a bit slow, ~1 ms
 
-        Note that this seems to be a bit slow, ~1 ms. Don't overuse this function by updating very frequently.
+        Don't overuse this function by updating very frequently.
         """
         if len(args) == 2:
             # backwards compatibility with existing hadoop jobs
@@ -714,15 +768,19 @@ def extra_modules(self):
         return []  # can be overridden in subclass
 
     def extra_files(self):
-        '''
-        Can be overriden in subclass. Each element is either a string, or a pair of two strings (src, dst).
-        src can be a directory (in which case everything will be copied recursively).
-        dst can include subdirectories (foo/bar/baz.txt etc)
+        """
+        Can be overriden in subclass.
+
+        Each element is either a string, or a pair of two strings (src, dst).
+
+        * `src` can be a directory (in which case everything will be copied recursively).
+        * `dst` can include subdirectories (foo/bar/baz.txt etc)
+
         Uses Hadoop's -files option so that the same file is reused across tasks.
-        '''
+        """
         return []
 
-    def _add_link(self, src, dst):
+    def add_link(self, src, dst):
         if not hasattr(self, '_links'):
             self._links = []
         self._links.append((src, dst))
@@ -746,9 +804,11 @@ def _setup_links(self):
                     'Missing files for distributed cache: ' +
                     ', '.join(missing))
 
-    def _dump(self, dir=''):
-        """Dump instance to file."""
-        file_name = os.path.join(dir, 'job-instance.pickle')
+    def dump(self, directory=''):
+        """
+        Dump instance to file.
+        """
+        file_name = os.path.join(directory, 'job-instance.pickle')
         if self.__module__ == '__main__':
             d = pickle.dumps(self)
             module_name = os.path.basename(sys.argv[0]).rsplit('.', 1)[0]
@@ -759,11 +819,14 @@ def _dump(self, dir=''):
             pickle.dump(self, open(file_name, "w"))
 
     def _map_input(self, input_stream):
-        """Iterate over input and call the mapper for each item.
-           If the job has a parser defined, the return values from the parser will
-           be passed as arguments to the mapper.
+        """
+        Iterate over input and call the mapper for each item.
+        If the job has a parser defined, the return values from the parser will
+        be passed as arguments to the mapper.
 
-           If the input is coded output from a previous run, the arguments will be splitted in key and value."""
+        If the input is coded output from a previous run,
+        the arguments will be splitted in key and value.
+        """
         for record in self.reader(input_stream):
             for output in self.mapper(*record):
                 yield output
@@ -773,7 +836,9 @@ def _map_input(self, input_stream):
         self._flush_batch_incr_counter()
 
     def _reduce_input(self, inputs, reducer, final=NotImplemented):
-        """Iterate over input, collect values with the same key, and call the reducer for each uniqe key."""
+        """
+        Iterate over input, collect values with the same key, and call the reducer for each unique key.
+        """
         for key, values in groupby(inputs, key=lambda x: repr(x[0])):
             for output in reducer(eval(key), (v[1] for v in values)):
                 yield output
@@ -782,8 +847,10 @@ def _reduce_input(self, inputs, reducer, final=NotImplemented):
                 yield output
         self._flush_batch_incr_counter()
 
-    def _run_mapper(self, stdin=sys.stdin, stdout=sys.stdout):
-        """Run the mapper on the hadoop node."""
+    def run_mapper(self, stdin=sys.stdin, stdout=sys.stdout):
+        """
+        Run the mapper on the hadoop node.
+        """
         self.init_hadoop()
         self.init_mapper()
         outputs = self._map_input((line[:-1] for line in stdin))
@@ -792,27 +859,33 @@ def _run_mapper(self, stdin=sys.stdin, stdout=sys.stdout):
         else:
             self.internal_writer(outputs, stdout)
 
-    def _run_reducer(self, stdin=sys.stdin, stdout=sys.stdout):
-        """Run the reducer on the hadoop node."""
+    def run_reducer(self, stdin=sys.stdin, stdout=sys.stdout):
+        """
+        Run the reducer on the hadoop node.
+        """
         self.init_hadoop()
         self.init_reducer()
         outputs = self._reduce_input(self.internal_reader((line[:-1] for line in stdin)), self.reducer, self.final_reducer)
         self.writer(outputs, stdout)
 
-    def _run_combiner(self, stdin=sys.stdin, stdout=sys.stdout):
+    def run_combiner(self, stdin=sys.stdin, stdout=sys.stdout):
         self.init_hadoop()
         self.init_combiner()
         outputs = self._reduce_input(self.internal_reader((line[:-1] for line in stdin)), self.combiner, self.final_combiner)
         self.internal_writer(outputs, stdout)
 
     def internal_reader(self, input_stream):
-        """Reader which uses python eval on each part of a tab separated string.
-        Yields a tuple of python objects."""
-        for input in input_stream:
-            yield map(eval, input.split("\t"))
+        """
+        Reader which uses python eval on each part of a tab separated string.
+        Yields a tuple of python objects.
+        """
+        for input_line in input_stream:
+            yield map(eval, input_line.split("\t"))
 
     def internal_writer(self, outputs, stdout):
-        """Writer which outputs the python repr for each item"""
+        """
+        Writer which outputs the python repr for each item.
+        """
         for output in outputs:
             print >> stdout, "\t".join(map(repr, output))
 
diff --git a/luigi/hadoop_jar.py b/luigi/hadoop_jar.py
index ba1f4561be..668aad1479 100644
--- a/luigi/hadoop_jar.py
+++ b/luigi/hadoop_jar.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2015 Spotify AB
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
 
 import logging
 import os
@@ -10,10 +23,14 @@
 
 
 def fix_paths(job):
-    """Coerce input arguments to use temporary files when used for output.
+    """
+    Coerce input arguments to use temporary files when used for output.
+
     Return a list of temporary file pairs (tmpfile, destination path) and
-    a list of arguments. Converts each HdfsTarget to a string for the
-    path."""
+    a list of arguments.
+
+    Converts each HdfsTarget to a string for the path.
+    """
     tmp_files = []
     args = []
     for x in job.args():
@@ -33,7 +50,9 @@ def fix_paths(job):
 
 
 class HadoopJarJobRunner(luigi.hadoop.JobRunner):
-    """JobRunner for `hadoop jar` commands. Used to run a HadoopJarJobTask"""
+    """
+    JobRunner for `hadoop jar` commands. Used to run a HadoopJarJobTask.
+    """
 
     def __init__(self):
         pass
@@ -43,7 +62,7 @@ def run_job(self, job):
         # hadoop.HadoopJobRunner
         if not job.jar() or not os.path.exists(job.jar()):
             logger.error("Can't find jar: {0}, full path {1}".format(job.jar(),
-                         os.path.abspath(job.jar())))
+                                                                     os.path.abspath(job.jar())))
             raise Exception("job jar does not exist")
         arglist = luigi.hdfs.load_hadoop_cmd() + ['jar', job.jar()]
         if job.main():
@@ -65,15 +84,20 @@ def run_job(self, job):
 
 
 class HadoopJarJobTask(luigi.hadoop.BaseHadoopJobTask):
-    """A job task for `hadoop jar` commands that define a jar and (optional)
-    main method"""
+    """
+    A job task for `hadoop jar` commands that define a jar and (optional) main method.
+    """
 
     def jar(self):
-        """Path to the jar for this Hadoop Job"""
+        """
+        Path to the jar for this Hadoop Job.
+        """
         return None
 
     def main(self):
-        """optional main method for this Hadoop Job"""
+        """
+        optional main method for this Hadoop Job.
+        """
         return None
 
     def job_runner(self):
@@ -81,10 +105,14 @@ def job_runner(self):
         return HadoopJarJobRunner()
 
     def atomic_output(self):
-        """If True, then rewrite output arguments to be temp locations and
-        atomically move them into place after the job finishes"""
+        """
+        If True, then rewrite output arguments to be temp locations and
+        atomically move them into place after the job finishes.
+        """
         return True
 
     def args(self):
-        """returns an array of args to pass to the job (after hadoop jar <jar> <main>)."""
+        """
+        Returns an array of args to pass to the job (after hadoop jar <jar> <main>).
+        """
         return []
diff --git a/luigi/hdfs.py b/luigi/hdfs.py
index 5737b7d79c..278fa783ae 100644
--- a/luigi/hdfs.py
+++ b/luigi/hdfs.py
@@ -12,23 +12,36 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import subprocess
+import datetime
+import getpass
+import logging
 import os
 import random
-import urlparse
-import luigi.format
-import luigi.contrib.target
-import datetime
 import re
+import subprocess
+import urlparse
 import warnings
-from luigi.target import FileSystem, FileSystemTarget, FileAlreadyExists
-import configuration
-import logging
-import getpass
+
+import luigi.contrib.target
+import luigi.format
+from luigi.target import FileAlreadyExists, FileSystem, FileSystemTarget
+
 logger = logging.getLogger('luigi-interface')
 
 
+class hdfs(luigi.Config):
+    client_version = luigi.IntParameter(default=None)
+    effective_user = luigi.Parameter(default=None)
+    snakebite_autoconfig = luigi.BoolParameter()
+    namenode_host = luigi.Parameter(default=None)
+    namenode_port = luigi.IntParameter(default=None)
+    client = luigi.Parameter(default=None)
+    use_snakebite = luigi.BoolParameter(default=None)
+    tmp_dir = luigi.Parameter(config_path=dict(section='core', name='hdfs-tmp-dir'), default=None)
+
+
 class HDFSCliError(Exception):
+
     def __init__(self, command, returncode, stdout, stderr):
         self.returncode = returncode
         self.stdout = stdout
@@ -66,31 +79,31 @@ def tmppath(path=None, include_unix_username=True):
     addon = "luigitemp-%08d" % random.randrange(1e9)
     temp_dir = '/tmp'  # default tmp dir if none is specified in config
 
-    #1. Figure out to which temporary directory to place
-    configured_hdfs_tmp_dir = configuration.get_config().get('core', 'hdfs-tmp-dir', None)
+    # 1. Figure out to which temporary directory to place
+    configured_hdfs_tmp_dir = hdfs().tmp_dir
     if configured_hdfs_tmp_dir is not None:
-        #config is superior
+        # config is superior
         base_dir = configured_hdfs_tmp_dir
     elif path is not None:
-        #need to copy correct schema and network location
+        # need to copy correct schema and network location
         parsed = urlparse.urlparse(path)
         base_dir = urlparse.urlunparse((parsed.scheme, parsed.netloc, temp_dir, '', '', ''))
     else:
-        #just system temporary directory
+        # just system temporary directory
         base_dir = temp_dir
 
-    #2. Figure out what to place
+    # 2. Figure out what to place
     if path is not None:
         if path.startswith(temp_dir + '/'):
-            #Not 100%, but some protection from directories like /tmp/tmp/file
+            # Not 100%, but some protection from directories like /tmp/tmp/file
             subdir = path[len(temp_dir):]
         else:
-            #Protection from /tmp/hdfs:/dir/file
+            # Protection from /tmp/hdfs:/dir/file
             parsed = urlparse.urlparse(path)
             subdir = parsed.path
         subdir = subdir.lstrip('/') + '-'
     else:
-        #just return any random temporary location
+        # just return any random temporary location
         subdir = ''
 
     if include_unix_username:
@@ -98,6 +111,7 @@ def tmppath(path=None, include_unix_username=True):
 
     return os.path.join(base_dir, subdir + addon)
 
+
 def list_path(path):
     if isinstance(path, list) or isinstance(path, tuple):
         return path
@@ -105,13 +119,47 @@ def list_path(path):
         return [path, ]
     return [str(path), ]
 
+
+def is_dangerous_rm_path(path):
+    """ Determines if it is risky to remove such a path.
+
+    Examples:
+    * blanks
+    * top level root, e.g. /
+    * absolute path that is one level deep, e.g. /etc or /opt
+    * tilde, e.g. ~
+
+    :return bool: True if too dangerous
+
+    >>> for danger in ['~', '~/', ' ', '/', '/opt', '/etc/', '/etc//',
+    ...                '//', ' /opt ', ' /opt// ', '//opt']:
+    ...     assert is_dangerous_rm_path(danger), 'expected dangerous: %r' % danger
+    >>> for safe in ['~/foo', '/foo/bar', 'foo', ' foo ', 'bar/', 'silly//']:
+    ...     assert not is_dangerous_rm_path(safe), 'expected safe: %r' % safe
+    >>> try:
+    ...     is_dangerous_rm_path(None)
+    ... except AttributeError:
+    ...     pass
+    """
+    path = path.strip().rstrip('/')
+
+    if path.startswith('/'):
+        path = path.lstrip('/')
+        return len(path.split('/')) <= 1
+    else:
+        return path in ('', '~')
+
+
 class HdfsClient(FileSystem):
-    """This client uses Apache 2.x syntax for file system commands, which also matched CDH4"""
+    """
+    This client uses Apache 2.x syntax for file system commands, which also matched CDH4.
+    """
 
     recursive_listdir_cmd = ['-ls', '-R']
 
     def exists(self, path):
-        """ Use ``hadoop fs -stat`` to check file existence
+        """
+        Use ``hadoop fs -stat`` to check file existence.
         """
 
         cmd = load_hadoop_cmd() + ['fs', '-stat', path]
@@ -138,9 +186,35 @@ def rename(self, path, dest):
             warnings.warn("Renaming multiple files at once is not atomic.")
         call_check(load_hadoop_cmd() + ['fs', '-mv'] + path + [dest])
 
-    def remove(self, path, recursive=True, skip_trash=False):
+    def rename_dont_move(self, path, dest):
+        """
+        Override this method with an implementation that uses rename2,
+        which is a rename operation that never moves.
+
+        For instance, `rename2 a b` never moves `a` into `b` folder.
+
+        Currently, the hadoop cli does not support this operation.
+
+        We keep the interface simple by just aliasing this to
+        normal rename and let individual implementations redefine the method.
+
+        rename2 -
+        https://github.com/apache/hadoop/blob/ae91b13/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
+        (lines 483-523)
+        """
+        warnings.warn("Configured HDFS client doesn't support rename_dont_move, using normal mv operation instead.")
+        if self.exists(dest):
+            return False
+        self.rename(path, dest)
+        return True
+
+    def remove(self, path, recursive=True, skip_trash=False, chicken=True):
         if recursive:
             cmd = load_hadoop_cmd() + ['fs', '-rm', '-r']
+
+            if chicken and is_dangerous_rm_path(path):
+                raise ValueError("Too chicken to recursively "
+                    "delete '%s'" % path)
         else:
             cmd = load_hadoop_cmd() + ['fs', '-rm']
 
@@ -177,7 +251,7 @@ def count(self, path):
             if line.startswith("OpenJDK 64-Bit Server VM warning") or line.startswith("It's highly recommended") or not line:
                 lines.pop(lines.index(line))
             else:
-               (dir_count, file_count, content_size, ppath) = stdout.split() 
+                (dir_count, file_count, content_size, ppath) = stdout.split()
         results = {'content_size': content_size, 'dir_count': dir_count, 'file_count': file_count}
         return results
 
@@ -205,7 +279,7 @@ def mkdir(self, path, parents=True, raise_if_exists=False):
                    (['-p'] if parents else []) +
                    [path])
             call_check(cmd)
-        except HDFSCliError, ex:
+        except HDFSCliError as ex:
             if "File exists" in ex.stderr:
                 if raise_if_exists:
                     raise FileAlreadyExists(ex.stderr)
@@ -254,16 +328,18 @@ def listdir(self, path, ignore_directories=False, ignore_files=False,
             else:
                 yield file
 
+
 class SnakebiteHdfsClient(HdfsClient):
     """
     This client uses Spotify's snakebite client whenever possible.
+
     @author: Alan Brenner <alan@magnetic.com> github.com/alanbbr
     """
+
     def __init__(self):
         super(SnakebiteHdfsClient, self).__init__()
         try:
             from snakebite.client import Client
-            self.config = configuration.get_config()
             self._bite = None
             self.pid = -1
         except Exception as err:    # IGNORE:broad-except
@@ -284,28 +360,23 @@ def get_bite(self):
         """
         If Luigi has forked, we have a different PID, and need to reconnect.
         """
+        config = hdfs()
         if self.pid != os.getpid() or not self._bite:
-            autoconfig_enabled = self.config.getboolean("hdfs", "snakebite_autoconfig", False)
-            if autoconfig_enabled is True:
+            client_kwargs = dict(filter(lambda k_v: k_v[1] is not None and k_v[1] != '', {
+                'hadoop_version': config.client_version,
+                'effective_user': config.effective_user,
+            }.iteritems()))
+            if config.snakebite_autoconfig:
                 """
                 This is fully backwards compatible with the vanilla Client and can be used for a non HA cluster as well.
                 This client tries to read ``${HADOOP_PATH}/conf/hdfs-site.xml`` to get the address of the namenode.
                 The behaviour is the same as Client.
                 """
                 from snakebite.client import AutoConfigClient
-                self._bite = AutoConfigClient()
+                self._bite = AutoConfigClient(**client_kwargs)
             else:
                 from snakebite.client import Client
-                try:
-                    ver = self.config.getint("hdfs", "client_version")
-                    if ver is None:
-                        raise RuntimeError()
-                    self._bite = Client(self.config.get("hdfs", "namenode_host"),
-                                       self.config.getint("hdfs", "namenode_port"),
-                                       hadoop_version=ver)
-                except:
-                    self._bite = Client(self.config.get("hdfs", "namenode_host"),
-                                       self.config.getint("hdfs", "namenode_port"))
+                self._bite = Client(config.namenode_host, config.namenode_port, **client_kwargs)
         return self._bite
 
     def exists(self, path):
@@ -338,7 +409,25 @@ def rename(self, path, dest):
                 self.mkdir(dir_path, parents=True)
         return list(self.get_bite().rename(list_path(path), dest))
 
-    def remove(self, path, recursive=True, skip_trash=False):
+    def rename_dont_move(self, path, dest):
+        """
+        Use snakebite.rename_dont_move, if available.
+
+        :param path: source path (single input)
+        :type path: string
+        :param dest: destination path
+        :type dest: string
+        :return: True if succeeded
+        :raises: snakebite.errors.FileAlreadyExistsException
+        """
+        from snakebite.errors import FileAlreadyExistsException
+        try:
+            self.get_bite().rename2(path, dest, overwriteDest=False)
+            return True
+        except FileAlreadyExistsException:
+            return False
+
+    def remove(self, path, recursive=True, skip_trash=False, chicken=None):
         """
         Use snakebite.delete, if available.
 
@@ -348,6 +437,8 @@ def remove(self, path, recursive=True, skip_trash=False):
         :type recursive: boolean, default is True
         :param skip_trash: do or don't move deleted items into the trash first
         :type skip_trash: boolean, default is False (use trash)
+        :param chicken: ignored
+        :type chicken: ignored
         :return: list of deleted items
         """
         return list(self.get_bite().delete(list_path(path), recurse=recursive))
@@ -365,7 +456,7 @@ def chmod(self, path, permissions, recursive=False):
         :return: list of all changed items
         """
         return list(self.get_bite().chmod(list_path(path),
-                                         permissions, recursive))
+                                          permissions, recursive))
 
     def chown(self, path, owner, group, recursive=False):
         """
@@ -419,7 +510,7 @@ def get(self, path, local_destination):
         return list(self.get_bite().copyToLocal(list_path(path),
                                                 local_destination))
 
-    def mkdir(self, path, parents=True, mode=0755, raise_if_exists=False):
+    def mkdir(self, path, parents=True, mode=0o755, raise_if_exists=False):
         """
         Use snakebite.mkdir, if available.
 
@@ -480,23 +571,34 @@ def listdir(self, path, ignore_directories=False, ignore_files=False,
             else:
                 yield rval[0]
 
+
 class HdfsClientCdh3(HdfsClient):
-    """This client uses CDH3 syntax for file system commands"""
-    def mkdir(self, path):
-        '''
-        No -p switch, so this will fail creating ancestors
-        '''
+    """
+    This client uses CDH3 syntax for file system commands.
+    """
+
+    def mkdir(self, path, parents=False, raise_if_exists=False):
+        """
+        No -p switch, so this will fail creating ancestors.
+
+        :param parents: ignored
+        """
         try:
             call_check(load_hadoop_cmd() + ['fs', '-mkdir', path])
-        except HDFSCliError, ex:
+        except HDFSCliError as ex:
             if "File exists" in ex.stderr:
-                raise FileAlreadyExists(ex.stderr)
+                if raise_if_exists:
+                    raise FileAlreadyExists(ex.stderr)
             else:
                 raise
 
-    def remove(self, path, recursive=True, skip_trash=False):
+    def remove(self, path, recursive=True, skip_trash=False, chicken=True):
         if recursive:
             cmd = load_hadoop_cmd() + ['fs', '-rmr']
+
+            if chicken and is_dangerous_rm_path(path):
+                raise ValueError("Too chicken to recursively "
+                    "delete '%s'" % path)
         else:
             cmd = load_hadoop_cmd() + ['fs', '-rm']
 
@@ -506,9 +608,12 @@ def remove(self, path, recursive=True, skip_trash=False):
         cmd = cmd + [path]
         call_check(cmd)
 
+
 class HdfsClientApache1(HdfsClientCdh3):
-    """This client uses Apache 1.x syntax for file system commands,
-    which are similar to CDH3 except for the file existence check"""
+    """
+    This client uses Apache 1.x syntax for file system commands,
+    which are similar to CDH3 except for the file existence check.
+    """
 
     recursive_listdir_cmd = ['-lsr']
 
@@ -527,23 +632,27 @@ def exists(self, path):
 def get_configured_hadoop_version():
     """
     CDH4 (hadoop 2+) has a slightly different syntax for interacting with hdfs
-    via the command line. The default version is CDH4, but one can override
+    via the command line.
+
+    The default version is CDH4, but one can override
     this setting with "cdh3" or "apache1" in the hadoop section of the config
-    in order to use the old syntax
+    in order to use the old syntax.
     """
-    return configuration.get_config().get("hadoop", "version", "cdh4").lower()
+    return luigi.configuration.get_config().get("hadoop", "version", "cdh4").lower()
 
 
 def get_configured_hdfs_client(show_warnings=True):
-    """ This is a helper that fetches the configuration value for 'client' in
+    """
+    This is a helper that fetches the configuration value for 'client' in
     the [hdfs] section. It will return the client that retains backwards
-    compatibility when 'client' isn't configured. """
-    config = configuration.get_config()
-    custom = config.get("hdfs", "client", None)
+    compatibility when 'client' isn't configured.
+    """
+    config = hdfs()
+    custom = config.client
     if custom:
         # Eventually this should be the only valid code path
         return custom
-    if config.getboolean("hdfs", "use_snakebite", False):
+    if config.use_snakebite:
         if show_warnings:
             warnings.warn("Deprecated: Just specify 'client: snakebite' in config")
         return "snakebite"
@@ -553,8 +662,10 @@ def get_configured_hdfs_client(show_warnings=True):
 
 
 def create_hadoopcli_client():
-    """ Given that we want one of the hadoop cli clients (unlike snakebite),
-    this one will return the right one """
+    """
+    Given that we want one of the hadoop cli clients (unlike snakebite),
+    this one will return the right one.
+    """
     version = get_configured_hadoop_version()
     if version == "cdh4":
         return HdfsClient()
@@ -566,8 +677,11 @@ def create_hadoopcli_client():
         raise Exception("Error: Unknown version specified in Hadoop version"
                         "configuration parameter")
 
+
 def get_autoconfig_client(show_warnings=True):
-    """Creates the client as specified in the `client.cfg` configuration"""
+    """
+    Creates the client as specified in the `client.cfg` configuration.
+    """
     configured_client = get_configured_hdfs_client(show_warnings=show_warnings)
     if configured_client == "snakebite":
         return SnakebiteHdfsClient()
@@ -588,12 +702,14 @@ def get_autoconfig_client(show_warnings=True):
 
 
 class HdfsReadPipe(luigi.format.InputPipeProcessWrapper):
+
     def __init__(self, path):
         super(HdfsReadPipe, self).__init__(load_hadoop_cmd() + ['fs', '-cat', path])
 
 
 class HdfsAtomicWritePipe(luigi.format.OutputPipeProcessWrapper):
-    """ File like object for writing to HDFS
+    """
+    File like object for writing to HDFS
 
     The referenced file is first written to a temporary location and then
     renamed to final location on close(). If close() isn't called
@@ -613,7 +729,7 @@ def __init__(self, path):
 
     def abort(self):
         logger.info("Aborting %s('%s'). Removing temporary file '%s'",
-                self.__class__.__name__, self.path, self.tmppath)
+                    self.__class__.__name__, self.path, self.tmppath)
         super(HdfsAtomicWritePipe, self).abort()
         remove(self.tmppath)
 
@@ -623,7 +739,10 @@ def close(self):
 
 
 class HdfsAtomicWriteDirPipe(luigi.format.OutputPipeProcessWrapper):
-    """ Writes a data<data_extension> file to a directory at <path> """
+    """
+    Writes a data<data_extension> file to a directory at <path>.
+    """
+
     def __init__(self, path, data_extension=""):
         self.path = path
         self.tmppath = tmppath(self.path)
@@ -632,7 +751,7 @@ def __init__(self, path, data_extension=""):
 
     def abort(self):
         logger.info("Aborting %s('%s'). Removing temporary dir '%s'",
-                self.__class__.__name__, self.path, self.tmppath)
+                    self.__class__.__name__, self.path, self.tmppath)
         super(HdfsAtomicWriteDirPipe, self).abort()
         remove(self.tmppath)
 
@@ -642,6 +761,7 @@ def close(self):
 
 
 class Plain(luigi.format.Format):
+
     @classmethod
     def hdfs_reader(cls, path):
         return HdfsReadPipe(path)
@@ -652,6 +772,7 @@ def pipe_writer(cls, output_pipe):
 
 
 class PlainDir(luigi.format.Format):
+
     @classmethod
     def hdfs_reader(cls, path):
         # exclude underscore-prefixedfiles/folders (created by MapReduce)
@@ -677,7 +798,7 @@ def __init__(self, path=None, format=Plain, is_tmp=False, fs=None):
         self._fs = fs or get_autoconfig_client()
 
     def __del__(self):
-        #TODO: not sure is_tmp belongs in Targets construction arguments
+        # TODO: not sure is_tmp belongs in Targets construction arguments
         if self.is_tmp and self.exists():
             self.remove()
 
@@ -706,12 +827,13 @@ def open(self, mode='r'):
             except NotImplementedError:
                 return self.format.pipe_writer(HdfsAtomicWritePipe(self.path))
 
-    def remove(self, skip_trash=False):
-        remove(self.path, skip_trash=skip_trash)
+    def remove(self, skip_trash=False, chicken=True):
+        remove(self.path, skip_trash=skip_trash, chicken=chicken)
 
     @luigi.util.deprecate_kwarg('fail_if_exists', 'raise_if_exists', False)
     def rename(self, path, fail_if_exists=False):
-        """ Rename does not change self.path, so be careful with assumptions
+        """
+        Rename does not change self.path, so be careful with assumptions.
 
         Not recommendeed for directories. Use move_dir.  spotify/luigi#522
         """
@@ -723,19 +845,29 @@ def rename(self, path, fail_if_exists=False):
 
     @luigi.util.deprecate_kwarg('fail_if_exists', 'raise_if_exists', False)
     def move(self, path, fail_if_exists=False):
-        """ Move does not change self.path, so be careful with assumptions
+        """
+        Move does not change self.path, so be careful with assumptions.
 
         Not recommendeed for directories. Use move_dir.  spotify/luigi#522
         """
         self.rename(path, raise_if_exists=fail_if_exists)
 
     def move_dir(self, path):
-        # mkdir will fail if directory already exists, thereby ensuring atomicity
-        if isinstance(path, HdfsTarget):
-            path = path.path
-        mkdir(path, parents=False, raise_if_exists=True)
-        rename(self.path + '/*', path)
-        self.remove()
+        """
+        Rename a directory.
+
+        The implementation uses `rename_dont_move`,
+        which on some clients is just a normal `mv` operation, which can cause
+        nested directories.
+
+        One could argue that the implementation should use the
+        mkdir+raise_if_exists approach, but we at Spotify have had more trouble
+        with that over just using plain mv.  See spotify/luigi#557
+        """
+        move_succeeded = self.fs.rename_dont_move(self.path, path)
+        if move_succeeded:
+            self.path = path
+        return move_succeeded
 
     def is_writable(self):
         if "/" in self.path:
diff --git a/luigi/hive.py b/luigi/hive.py
index 2cb0d1e111..a0f683ce9f 100644
--- a/luigi/hive.py
+++ b/luigi/hive.py
@@ -13,18 +13,20 @@
 import abc
 import logging
 import operator
-import luigi
-import luigi.hadoop
-from luigi.target import FileSystemTarget, FileAlreadyExists
 import os
 import subprocess
 import tempfile
+
+import luigi
+import luigi.hadoop
+from luigi.target import FileAlreadyExists, FileSystemTarget
 from luigi.task import flatten
 
 logger = logging.getLogger('luigi-interface')
 
 
 class HiveCommandError(RuntimeError):
+
     def __init__(self, message, out=None, err=None):
         super(HiveCommandError, self).__init__(message, out, err)
         self.message = message
@@ -41,12 +43,13 @@ def get_hive_syntax():
 
 
 def run_hive(args, check_return_code=True):
-    """Runs the `hive` from the command line, passing in the given args, and
-       returning stdout.
+    """
+    Runs the `hive` from the command line, passing in the given args, and
+    returning stdout.
 
-       With the apache release of Hive, so of the table existence checks
-       (which are done using DESCRIBE do not exit with a return code of 0
-       so we need an option to ignore the return code and just return stdout for parsing
+    With the apache release of Hive, so of the table existence checks
+    (which are done using DESCRIBE do not exit with a return code of 0
+    so we need an option to ignore the return code and just return stdout for parsing
     """
     cmd = [load_hive_cmd()] + args
     p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -58,12 +61,16 @@ def run_hive(args, check_return_code=True):
 
 
 def run_hive_cmd(hivecmd, check_return_code=True):
-    """Runs the given hive query and returns stdout"""
+    """
+    Runs the given hive query and returns stdout.
+    """
     return run_hive(['-e', hivecmd], check_return_code)
 
 
 def run_hive_script(script):
-    """Runs the contents of the given script in hive and returns stdout"""
+    """
+    Runs the contents of the given script in hive and returns stdout.
+    """
     if not os.path.isfile(script):
         raise RuntimeError("Hive script: {0} does not exist.".format(script))
     return run_hive(['-f', script])
@@ -73,7 +80,7 @@ class HiveClient(object):  # interface
     __metaclass__ = abc.ABCMeta
 
     @abc.abstractmethod
-    def table_location(self, table, database='default', partition={}):
+    def table_location(self, table, database='default', partition=None):
         """
         Returns location of db.table (or db.table.partition). partition is a dict of partition key to
         value.
@@ -82,13 +89,15 @@ def table_location(self, table, database='default', partition={}):
 
     @abc.abstractmethod
     def table_schema(self, table, database='default'):
-        """ Returns list of [(name, type)] for each column in database.table """
+        """
+        Returns list of [(name, type)] for each column in database.table.
+        """
         pass
 
     @abc.abstractmethod
-    def table_exists(self, table, database='default', partition={}):
+    def table_exists(self, table, database='default', partition=None):
         """
-        Returns true iff db.table (or db.table.partition) exists. partition is a dict of partition key to
+        Returns true if db.table (or db.table.partition) exists. partition is a dict of partition key to
         value.
         """
         pass
@@ -100,10 +109,13 @@ def partition_spec(self, partition):
 
 
 class HiveCommandClient(HiveClient):
-    """ Uses `hive` invocations to find information """
-    def table_location(self, table, database='default', partition={}):
+    """
+    Uses `hive` invocations to find information.
+    """
+
+    def table_location(self, table, database='default', partition=None):
         cmd = "use {0}; describe formatted {1}".format(database, table)
-        if partition:
+        if partition is not None:
             cmd += " PARTITION ({0})".format(self.partition_spec(partition))
 
         stdout = run_hive_cmd(cmd)
@@ -112,8 +124,8 @@ def table_location(self, table, database='default', partition={}):
             if "Location:" in line:
                 return line.split("\t")[1]
 
-    def table_exists(self, table, database='default', partition={}):
-        if not partition:
+    def table_exists(self, table, database='default', partition=None):
+        if partition is None:
             stdout = run_hive_cmd('use {0}; show tables like "{1}";'.format(database, table))
 
             return stdout and table in stdout
@@ -133,9 +145,11 @@ def table_schema(self, table, database='default'):
         return [tuple([x.strip() for x in line.strip().split("\t")]) for line in describe.strip().split("\n")]
 
     def partition_spec(self, partition):
-        """ Turns a dict into the a Hive partition specification string """
+        """
+        Turns a dict into the a Hive partition specification string.
+        """
         return ','.join(["{0}='{1}'".format(k, v) for (k, v) in
-                         sorted(partition.items(), key=operator.itemgetter(0))])
+                         sorted(partition.iteritems(), key=operator.itemgetter(0))])
 
 
 class ApacheHiveCommandClient(HiveCommandClient):
@@ -143,6 +157,7 @@ class ApacheHiveCommandClient(HiveCommandClient):
     A subclass for the HiveCommandClient to (in some cases) ignore the return code from
     the hive command so that we can just parse the output.
     """
+
     def table_schema(self, table, database='default'):
         describe = run_hive_cmd("use {0}; describe {1}".format(database, table), False)
         if not describe or "Table not found" in describe:
@@ -151,18 +166,19 @@ def table_schema(self, table, database='default'):
 
 
 class MetastoreClient(HiveClient):
-    def table_location(self, table, database='default', partition={}):
+
+    def table_location(self, table, database='default', partition=None):
         with HiveThriftContext() as client:
-            if partition:
+            if partition is not None:
                 partition_str = self.partition_spec(partition)
                 thrift_table = client.get_partition_by_name(database, table, partition_str)
             else:
                 thrift_table = client.get_table(database, table)
             return thrift_table.sd.location
 
-    def table_exists(self, table, database='default', partition={}):
+    def table_exists(self, table, database='default', partition=None):
         with HiveThriftContext() as client:
-            if not partition:
+            if partition is None:
                 return table in client.get_all_tables(database)
             else:
                 return partition in self._existing_partitions(table, database, client)
@@ -184,11 +200,14 @@ def table_schema(self, table, database='default'):
             return [(field_schema.name, field_schema.type) for field_schema in client.get_schema(database, table)]
 
     def partition_spec(self, partition):
-        return "/".join("%s=%s" % (k, v) for (k, v) in sorted(partition.items(), key=operator.itemgetter(0)))
+        return "/".join("%s=%s" % (k, v) for (k, v) in sorted(partition.iteritems(), key=operator.itemgetter(0)))
 
 
 class HiveThriftContext(object):
-    """ Context manager for hive metastore client """
+    """
+    Context manager for hive metastore client.
+    """
+
     def __enter__(self):
         try:
             from thrift import Thrift
@@ -208,7 +227,7 @@ def __enter__(self):
             transport.open()
             self.transport = transport
             return ThriftHiveMetastore.Client(protocol)
-        except ImportError, e:
+        except ImportError as e:
             raise Exception('Could not import Hive thrift library:' + str(e))
 
     def __exit__(self, exc_type, exc_val, exc_tb):
@@ -222,7 +241,10 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 
 
 class HiveQueryTask(luigi.hadoop.BaseHadoopJobTask):
-    """ Task to run a hive query """
+    """
+    Task to run a hive query.
+    """
+
     # by default, we let hive figure these out.
     n_reduce_tasks = None
     bytes_per_reducer = None
@@ -234,10 +256,12 @@ def query(self):
         raise RuntimeError("Must implement query!")
 
     def hiverc(self):
-        """ Location of an rc file to run before the query
-            if hiverc-location key is specified in client.cfg, will default to the value there
-            otherwise returns None
-            Returning a list of rc files will load all of them in order.
+        """
+        Location of an rc file to run before the query
+        if hiverc-location key is specified in client.cfg, will default to the value there
+        otherwise returns None.
+
+        Returning a list of rc files will load all of them in order.
         """
         return luigi.configuration.get_config().get('hive', 'hiverc-location', default=None)
 
@@ -246,6 +270,7 @@ def hiveconfs(self):
         Returns an dict of key=value settings to be passed along
         to the hive command line via --hiveconf. By default, sets
         mapred.job.name to task_id and if not None, sets:
+
         * mapred.reduce.tasks (n_reduce_tasks)
         * mapred.fairscheduler.pool (pool) or mapred.job.queue.name (pool)
         * hive.exec.reducers.bytes.per.reducer (bytes_per_reducer)
@@ -273,10 +298,13 @@ def job_runner(self):
 
 
 class HiveQueryRunner(luigi.hadoop.JobRunner):
-    """ Runs a HiveQueryTask by shelling out to hive """
+    """
+    Runs a HiveQueryTask by shelling out to hive.
+    """
 
     def prepare_outputs(self, job):
-        """ Called before job is started
+        """
+        Called before job is started.
 
         If output is a `FileSystemTarget`, create parent directories so the hive command won't fail
         """
@@ -301,7 +329,7 @@ def run_job(self, job):
             arglist = [load_hive_cmd(), '-f', f.name]
             hiverc = job.hiverc()
             if hiverc:
-                if type(hiverc) == str:
+                if isinstance(hiverc, str):
                     hiverc = [hiverc]
                 for rcfile in hiverc:
                     arglist += ['-i', rcfile]
@@ -314,7 +342,9 @@ def run_job(self, job):
 
 
 class HiveTableTarget(luigi.Target):
-    """ exists returns true if the table exists """
+    """
+    exists returns true if the table exists.
+    """
 
     def __init__(self, table, database='default', client=default_client):
         self.database = database
@@ -328,7 +358,9 @@ def exists(self):
 
     @property
     def path(self):
-        """Returns the path to this table in HDFS"""
+        """
+        Returns the path to this table in HDFS.
+        """
         location = self.client.table_location(self.table, self.database)
         if not location:
             raise Exception("Couldn't find location for table: {0}".format(str(self)))
@@ -339,7 +371,9 @@ def open(self, mode):
 
 
 class HivePartitionTarget(luigi.Target):
-    """ exists returns true if the table's partition exists """
+    """
+    exists returns true if the table's partition exists.
+    """
 
     def __init__(self, table, partition, database='default', fail_missing_table=True, client=default_client):
         self.database = database
@@ -353,7 +387,7 @@ def exists(self):
         try:
             logger.debug("Checking Hive table '{d}.{t}' for partition {p}".format(d=self.database, t=self.table, p=str(self.partition)))
             return self.client.table_exists(self.table, self.database, self.partition)
-        except HiveCommandError, e:
+        except HiveCommandError as e:
             if self.fail_missing_table:
                 raise
             else:
@@ -366,7 +400,9 @@ def exists(self):
 
     @property
     def path(self):
-        """Returns the path for this HiveTablePartitionTarget's data"""
+        """
+        Returns the path for this HiveTablePartitionTarget's data.
+        """
         location = self.client.table_location(self.table, self.database, self.partition)
         if not location:
             raise Exception("Couldn't find location for table: {0}".format(str(self)))
@@ -377,7 +413,9 @@ def open(self, mode):
 
 
 class ExternalHiveTask(luigi.ExternalTask):
-    """ External task that depends on a Hive table/partition """
+    """
+    External task that depends on a Hive table/partition.
+    """
 
     database = luigi.Parameter(default='default')
     table = luigi.Parameter()
diff --git a/luigi/interface.py b/luigi/interface.py
index 30874479c8..379d9f30a2 100644
--- a/luigi/interface.py
+++ b/luigi/interface.py
@@ -12,21 +12,21 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import worker
-import lock
+import argparse
 import logging
 import logging.config
-import rpc
 import optparse
-import scheduler
-import warnings
+import os
+import sys
+import tempfile
+
 import configuration
-import task
+import lock
 import parameter
-import re
-import argparse
-import sys
-import os
+import rpc
+import scheduler
+import task
+import worker
 from task import Register
 
 
@@ -39,106 +39,68 @@ def setup_interface_logging(conf_file=None):
         logger = logging.getLogger('luigi-interface')
         logger.setLevel(logging.DEBUG)
 
-        streamHandler = logging.StreamHandler()
-        streamHandler.setLevel(logging.DEBUG)
+        stream_handler = logging.StreamHandler()
+        stream_handler.setLevel(logging.DEBUG)
 
         formatter = logging.Formatter('%(levelname)s: %(message)s')
-        streamHandler.setFormatter(formatter)
+        stream_handler.setFormatter(formatter)
 
-        logger.addHandler(streamHandler)
+        logger.addHandler(stream_handler)
     else:
         logging.config.fileConfig(conf_file, disable_existing_loggers=False)
 
     setup_interface_logging.has_run = True
 
 
-def load_task(parent_task, task_name, params):
-    """ Imports task and uses ArgParseInterface to initialize it
-    """
-    # How the module is represented depends on if Luigi was started from
-    # that file or if the module was imported later on
-    module = sys.modules[parent_task.__module__]
-    if module.__name__ == '__main__':
-        parent_module_path = os.path.abspath(module.__file__)
-        for p in sys.path:
-            if parent_module_path.startswith(p):
-                end = parent_module_path.rfind('.py')
-                actual_module = parent_module_path[len(p):end].strip(
-                    '/').replace('/', '.')
-                break
-    else:
-        actual_module = module.__name__
-    return init_task(actual_module, task_name, params, {})
-
-
-def init_task(module_name, task, str_params, global_str_params):
-    __import__(module_name)
-    module = sys.modules[module_name]
-    Task = getattr(module, task)
-
-    return Task.from_str_params(str_params, global_str_params)
-
+class EnvironmentParamsContainer(task.ConfigWithoutSection):
 
-class EnvironmentParamsContainer(task.Task):
     ''' Keeps track of a bunch of environment params.
 
     Uses the internal luigi parameter mechanism.
     The nice thing is that we can instantiate this class
     and get an object with all the environment variables set.
-    This is arguably a bit of a hack.'''
+    This is arguably a bit of a hack.
+    '''
 
-    local_scheduler = parameter.BooleanParameter(
-        is_global=True, default=False,
+    local_scheduler = parameter.BoolParameter(
+        default=False,
         description='Use local scheduling')
     scheduler_host = parameter.Parameter(
-        is_global=True,
         default='localhost',
         description='Hostname of machine running remote scheduler',
         config_path=dict(section='core', name='default-scheduler-host'))
     scheduler_port = parameter.IntParameter(
-        is_global=True, default=8082,
+        default=8082,
         description='Port of remote scheduler api process',
         config_path=dict(section='core', name='default-scheduler-port'))
-    lock = parameter.BooleanParameter(
-        is_global=True, default=False,
-        description='(Deprecated, replaced by no_lock)'
-                    'Do not run if similar process is already running')
     lock_size = parameter.IntParameter(
-        is_global=True, default=1,
+        default=1,
         description="Maximum number of workers running the same command")
-    no_lock = parameter.BooleanParameter(
-        is_global=True, default=False,
+    no_lock = parameter.BoolParameter(
+        default=False,
         description='Ignore if similar process is already running')
     lock_pid_dir = parameter.Parameter(
-        is_global=True, default='/var/tmp/luigi',
+        default=os.path.join(tempfile.gettempdir(), 'luigi'),
         description='Directory to store the pid file')
     workers = parameter.IntParameter(
-        is_global=True, default=1,
+        default=1,
         description='Maximum number of parallel tasks to run')
     logging_conf_file = parameter.Parameter(
-        is_global=True, default=None,
+        default=None,
         description='Configuration file for logging',
         config_path=dict(section='core', name='logging_conf_file'))
     module = parameter.Parameter(
-        is_global=True, default=None,
-        description='Used for dynamic loading of modules') # see DynamicArgParseInterface
-    parallel_scheduling = parameter.BooleanParameter(
-        is_global=True, default=False,
+        default=None,
+        description='Used for dynamic loading of modules')  # see DynamicArgParseInterface
+    parallel_scheduling = parameter.BoolParameter(
+        default=False,
         description='Use multiprocessing to do scheduling in parallel.',
         config_path={'section': 'core', 'name': 'parallel-scheduling'},
     )
 
-    @classmethod
-    def env_params(cls, override_defaults={}):
-        # Override any global parameter with whatever is in override_defaults
-        for param_name, param_obj in cls.get_global_params():
-            if param_name in override_defaults:
-                param_obj.set_global(override_defaults[param_name])
-
-        return cls()  # instantiate an object with the global params set on it
-
 
 class WorkerSchedulerFactory(object):
+
     def create_local_scheduler(self):
         return scheduler.CentralPlannerScheduler()
 
@@ -151,19 +113,23 @@ def create_worker(self, scheduler, worker_processes):
 
 
 class Interface(object):
+
     def parse(self):
         raise NotImplementedError
 
     @staticmethod
-    def run(tasks, worker_scheduler_factory=None, override_defaults={}):
+    def run(tasks, worker_scheduler_factory=None, override_defaults=None):
         """
-        :return: True if all tasks and their dependencies were successfully run (or already completed)
-        False if any error occurred
+        :param tasks:
+        :param worker_scheduler_factory:
+        :param override_defaults:
+        :return: True if all tasks and their dependencies were successfully run (or already completed);
+                 False if any error occurred.
         """
 
         if worker_scheduler_factory is None:
             worker_scheduler_factory = WorkerSchedulerFactory()
-        env_params = EnvironmentParamsContainer.env_params(override_defaults)
+        env_params = EnvironmentParamsContainer(**override_defaults)
         # search for logging configuration path first on the command line, then
         # in the application config file
         logging_conf = env_params.logging_conf_file
@@ -176,15 +142,6 @@ def run(tasks, worker_scheduler_factory=None, override_defaults={}):
                 'core', 'no_configure_logging', False):
             setup_interface_logging(logging_conf)
 
-        if env_params.lock:
-            warnings.warn(
-                "The --lock flag is deprecated and will be removed."
-                "Locking is now the default behavior."
-                "Use --no-lock to override to not use lock",
-                DeprecationWarning,
-                stacklevel=3
-            )
-
         if (not env_params.no_lock and
                 not(lock.acquire_for(env_params.lock_pid_dir, env_params.lock_size))):
             sys.exit(1)
@@ -209,129 +166,138 @@ def run(tasks, worker_scheduler_factory=None, override_defaults={}):
         return success
 
 
-class ErrorWrappedArgumentParser(argparse.ArgumentParser):
-    ''' Wraps ArgumentParser's error message to suggested similar tasks
-    '''
+# Simple unweighted Levenshtein distance
+def _editdistance(a, b):
+    r0 = range(0, len(b) + 1)
+    r1 = [0] * (len(b) + 1)
 
-    # Simple unweighted Levenshtein distance
-    def _editdistance(self, a, b):
-        r0 = range(0, len(b) + 1)
-        r1 = [0] * (len(b) + 1)
+    for i in range(0, len(a)):
+        r1[0] = i + 1
 
-        for i in range(0, len(a)):
-            r1[0] = i + 1
+        for j in range(0, len(b)):
+            c = 0 if a[i] is b[j] else 1
+            r1[j + 1] = min(r1[j] + 1, r0[j + 1] + 1, r0[j] + c)
 
-            for j in range(0, len(b)):
-                c = 0 if a[i] is b[j] else 1
-                r1[j + 1] = min(r1[j] + 1, r0[j + 1] + 1, r0[j] + c)
+        r0 = r1[:]
 
-            r0 = r1[:]
+    return r1[len(b)]
 
-        return r1[len(b)]
 
-    def error(self, message):
-        result = re.match("argument .+: invalid choice: '(\w+)'.+", message)
-        if result:
-            arg = result.group(1)
-            weightedTasks = [(self._editdistance(arg, task), task) for task in Register.get_reg().keys()]
-            orderedTasks = sorted(weightedTasks, key=lambda pair: pair[0])
-            candidates = [task for (dist, task) in orderedTasks if dist <= 5 and dist < len(task)]
-            displaystring = ""
-            if candidates:
-                displaystring = "No task %s. Did you mean:\n%s" % (arg, '\n'.join(candidates))
-            else:
-                displaystring = "No task %s." % arg
-            super(ErrorWrappedArgumentParser, self).error(displaystring)
-        else:
-            super(ErrorWrappedArgumentParser, self).error(message)
+def error_task_names(task_name, task_names):
+    weighted_tasks = [(_editdistance(task_name, task_name_2), task_name_2) for task_name_2 in task_names]
+    ordered_tasks = sorted(weighted_tasks, key=lambda pair: pair[0])
+    candidates = [task for (dist, task) in ordered_tasks if dist <= 5 and dist < len(task)]
+    display_string = ""
+    if candidates:
+        display_string = "No task %s. Did you mean:\n%s" % (task_name, '\n'.join(candidates))
+    else:
+        display_string = "No task %s." % task_name
 
+    raise SystemExit(display_string)
 
-class ArgParseInterface(Interface):
-    ''' Takes the task as the command, with parameters specific to it
-    '''
-    @classmethod
-    def add_parameter(cls, parser, param_name, param, prefix=None):
-        description = []
-        if prefix:
-            description.append('%s.%s' % (prefix, param_name))
-        else:
-            description.append(param_name)
-        if param.description:
-            description.append(param.description)
-        if param.has_value:
-            description.append(" [default: %s]" % (param.value,))
-
-        if param.is_list:
-            action = "append"
-        elif param.is_boolean:
-            action = "store_true"
-        else:
-            action = "store"
-        parser.add_argument('--' + param_name.replace('_', '-'), help=' '.join(description), default=None, action=action)
 
-    @classmethod
-    def add_task_parameters(cls, parser, task_cls):
-        for param_name, param in task_cls.get_nonglobal_params():
-            cls.add_parameter(parser, param_name, param, task_cls.task_family)
+def add_task_parameters(parser, task_cls, optparse=False):
+    for param_name, param in task_cls.get_params():
+        param.add_to_cmdline_parser(parser, param_name, task_cls.task_family, optparse=optparse, glob=False)
 
-    @classmethod
-    def add_global_parameters(cls, parser):
-        for param_name, param in Register.get_global_params():
-            cls.add_parameter(parser, param_name, param)
 
-    def parse_task(self, cmdline_args=None, main_task_cls=None):
-        parser = ErrorWrappedArgumentParser()
+def add_global_parameters(parser, optparse=False):
+    seen_params = set()
+    for task_name, is_without_section, param_name, param in Register.get_all_params():
+        if param in seen_params:
+            continue
+        seen_params.add(param)
+        param.add_to_cmdline_parser(parser, param_name, task_name, optparse=optparse, glob=True, is_without_section=is_without_section)
 
-        self.add_global_parameters(parser)
 
-        if main_task_cls:
-            self.add_task_parameters(parser, main_task_cls)
+def get_task_parameters(task_cls, args):
+    # Parse a str->str dict to the correct types
+    params = {}
+    for param_name, param in task_cls.get_params():
+        param.parse_from_args(param_name, task_cls.task_family, args, params)
+    return params
+
+
+def set_global_parameters(args):
+    # Note that this is not side effect free
+    for task_name, is_without_section, param_name, param in Register.get_all_params():
+        param.set_global_from_args(param_name, task_name, args, is_without_section=is_without_section)
 
-        else:
-            orderedtasks = '{%s}' % ','.join(sorted(Register.get_reg().keys()))
-            subparsers = parser.add_subparsers(dest='command', metavar=orderedtasks)
 
-            for name, cls in Register.get_reg().iteritems():
-                subparser = subparsers.add_parser(name)
-                if cls == Register.AMBIGUOUS_CLASS:
-                    continue
-                self.add_task_parameters(subparser, cls)
+class ArgParseInterface(Interface):
+    """
+    Takes the task as the command, with parameters specific to it.
+    """
 
-                # Add global params here as well so that we can support both:
-                # test.py --global-param xyz Test --n 42
-                # test.py Test --n 42 --global-param xyz
-                self.add_global_parameters(subparser)
+    def parse_task(self, cmdline_args=None, main_task_cls=None):
+        parser = argparse.ArgumentParser()
 
-        args = parser.parse_args(args=cmdline_args)
-        params = vars(args)  # convert to a str -> str hash
+        add_global_parameters(parser)
 
         if main_task_cls:
+            add_task_parameters(parser, main_task_cls)
+
+            args = parser.parse_args(args=cmdline_args)
             task_cls = main_task_cls
         else:
-            task_cls = Register.get_task_cls(args.command)
+            task_names = sorted(Register.get_reg().keys())
+
+            # Parse global arguments and pull out the task name.
+            # We used to do this using subparsers+command, but some issues with
+            # argparse across different versions of Python (2.7.9) made it hard.
+            args, unknown = parser.parse_known_args(args=cmdline_args)
+            if len(unknown) == 0:
+                raise SystemExit('No task specified')
+            task_name = unknown[0]
+            if task_name not in task_names:
+                error_task_names(task_name, task_names)
+
+            task_cls = Register.get_task_cls(task_name)
+
+            # Add a subparser to parse task-specific arguments
+            subparsers = parser.add_subparsers(dest='command')
+            subparser = subparsers.add_parser(task_name)
+
+            # Add both task and global params here so that we can support both:
+            # test.py --global-param xyz Test --n 42
+            # test.py Test --n 42 --global-param xyz
+            add_global_parameters(subparser)
+            add_task_parameters(subparser, task_cls)
+
+            # Workaround for bug in argparse for Python 2.7.9
+            # See https://mail.python.org/pipermail/python-dev/2015-January/137699.html
+            subargs = parser.parse_args(args=cmdline_args)
+            for key, value in vars(subargs).items():
+                if value:  # Either True (for boolean args) or non-None (everything else)
+                    setattr(args, key, value)
 
         # Notice that this is not side effect free because it might set global params
-        task = task_cls.from_str_params(params, Register.get_global_params())
+        set_global_parameters(args)
+        task_params = get_task_parameters(task_cls, args)
 
-        return [task]
+        return [task_cls(**task_params)]
 
     def parse(self, cmdline_args=None, main_task_cls=None):
         return self.parse_task(cmdline_args, main_task_cls)
 
 
 class DynamicArgParseInterface(ArgParseInterface):
-    ''' Uses --module as a way to load modules dynamically
+    """
+    Uses --module as a way to load modules dynamically
 
     Usage:
-    python whatever.py --module foo_module FooTask --blah xyz --x 123
 
-    This will dynamically import foo_module and then try to create FooTask from this
-    '''
+    .. code-block:: console
+
+        python whatever.py --module foo_module FooTask --blah xyz --x 123
+
+    This will dynamically import foo_module and then try to create FooTask from this.
+    """
 
     def parse(self, cmdline_args=None, main_task_cls=None):
-        parser = ErrorWrappedArgumentParser()
+        parser = argparse.ArgumentParser()
 
-        self.add_global_parameters(parser)
+        add_global_parameters(parser)
 
         args, unknown = parser.parse_known_args(args=cmdline_args)
         module = args.module
@@ -342,35 +308,35 @@ def parse(self, cmdline_args=None, main_task_cls=None):
 
 
 class PassThroughOptionParser(optparse.OptionParser):
-    '''
+    """
     An unknown option pass-through implementation of OptionParser.
 
-    When unknown arguments are encountered, bundle with largs and try again,
-    until rargs is depleted.
+    When unknown arguments are encountered, bundle with largs and try again, until rargs is depleted.
 
     sys.exit(status) will still be called if a known argument is passed
     incorrectly (e.g. missing arguments or bad argument types, etc.)
-    '''
+    """
+
     def _process_args(self, largs, rargs, values):
         while rargs:
             try:
                 optparse.OptionParser._process_args(self, largs, rargs, values)
-            except (optparse.BadOptionError, optparse.AmbiguousOptionError), e:
+            except (optparse.BadOptionError, optparse.AmbiguousOptionError) as e:
                 largs.append(e.opt_str)
 
 
 class OptParseInterface(Interface):
-    ''' Supported for legacy reasons where it's necessary to interact with an existing parser.
+    """
+    Supported for legacy reasons where it's necessary to interact with an existing parser.
 
     Takes the task using --task. All parameters to all possible tasks will be defined globally
     in a big unordered soup.
-    '''
+    """
+
     def __init__(self, existing_optparse):
         self.__existing_optparse = existing_optparse
 
     def parse(self, cmdline_args=None, main_task_cls=None):
-        global_params = list(Register.get_global_params())
-
         parser = PassThroughOptionParser()
 
         def add_task_option(p):
@@ -379,26 +345,7 @@ def add_task_option(p):
             else:
                 p.add_option('--task', help='Task to run (one of %s)' % Register.tasks_str())
 
-        def _add_parameter(parser, param_name, param):
-            description = [param_name]
-            if param.description:
-                description.append(param.description)
-            if param.has_value:
-                description.append(" [default: %s]" % (param.value,))
-
-            if param.is_list:
-                action = "append"
-            elif param.is_boolean:
-                action = "store_true"
-            else:
-                action = "store"
-            parser.add_option('--' + param_name.replace('_', '-'),
-                              help=' '.join(description),
-                              default=None,
-                              action=action)
-
-        for param_name, param in global_params:
-            _add_parameter(parser, param_name, param)
+        add_global_parameters(parser, optparse=True)
 
         add_task_option(parser)
         options, args = parser.parse_args(args=cmdline_args)
@@ -413,34 +360,43 @@ def _add_parameter(parser, param_name, param):
         task_cls = Register.get_task_cls(task_cls_name)
 
         # Register all parameters as a big mess
-        params = task_cls.get_nonglobal_params()
-
-        for param_name, param in global_params:
-            _add_parameter(parser, param_name, param)
-
-        for param_name, param in params:
-            _add_parameter(parser, param_name, param)
+        add_global_parameters(parser, optparse=True)
+        add_task_parameters(parser, task_cls, optparse=True)
 
         # Parse and run
         options, args = parser.parse_args(args=cmdline_args)
 
-        params = {}
-        for k, v in vars(options).iteritems():
-            if k != 'task':
-                params[k] = v
+        set_global_parameters(options)
+        task_params = get_task_parameters(task_cls, options)
 
-        task = task_cls.from_str_params(params, global_params)
+        return [task_cls(**task_params)]
 
-        return [task]
 
+def load_task(module, task_name, params_str):
+    """
+    Imports task dynamically given a module and a task name.
+    """
+    __import__(module)
+    task_cls = Register.get_task_cls(task_name)
+    return task_cls.from_str_params(params_str)
 
-def run(cmdline_args=None, existing_optparse=None, use_optparse=False, main_task_cls=None, worker_scheduler_factory=None, use_dynamic_argparse=False):
-    ''' Run from cmdline.
 
-    The default parser uses argparse.
-    However for legacy reasons we support optparse that optionally allows for
-    overriding an existing option parser with new args.
-    '''
+def run(cmdline_args=None, existing_optparse=None, use_optparse=False, main_task_cls=None,
+        worker_scheduler_factory=None, use_dynamic_argparse=False, local_scheduler=False):
+    """
+    Run from cmdline.
+
+    The default parser uses argparse however, for legacy reasons,
+    we support optparse that optionally allows for overriding an existing option parser with new args.
+
+    :param cmdline_args:
+    :param existing_optparse:
+    :param use_optparse:
+    :param main_task_cls:
+    :param worker_scheduler_factory:
+    :param use_dynamic_argparse:
+    :param local_scheduler:
+    """
     if use_optparse:
         interface = OptParseInterface(existing_optparse)
     elif use_dynamic_argparse:
@@ -448,21 +404,34 @@ def run(cmdline_args=None, existing_optparse=None, use_optparse=False, main_task
     else:
         interface = ArgParseInterface()
     tasks = interface.parse(cmdline_args, main_task_cls=main_task_cls)
-    return interface.run(tasks, worker_scheduler_factory)
+    override_defaults = {}
+    if local_scheduler:
+        override_defaults['local_scheduler'] = True
+    return interface.run(tasks, worker_scheduler_factory, override_defaults=override_defaults)
 
 
 def build(tasks, worker_scheduler_factory=None, **env_params):
-    ''' Run internally, bypassing the cmdline parsing.
+    """
+    Run internally, bypassing the cmdline parsing.
 
     Useful if you have some luigi code that you want to run internally.
-    Example
-    luigi.build([MyTask1(), MyTask2()], local_scheduler=True)
+    Example:
+
+    .. code-block:: python
+
+        luigi.build([MyTask1(), MyTask2()], local_scheduler=True)
 
     One notable difference is that `build` defaults to not using
     the identical process lock. Otherwise, `build` would only be
     callable once from each process.
-    '''
-    if "no_lock" not in env_params and "lock" not in env_params:
+
+    :param tasks:
+    :param worker_scheduler_factory:
+    :param env_params:
+    :return:
+    """
+    if "no_lock" not in env_params:
+        # TODO(erikbern): should we really override args here?
         env_params["no_lock"] = True
-        env_params["lock"] = False
+
     Interface.run(tasks, worker_scheduler_factory, env_params)
diff --git a/luigi/lock.py b/luigi/lock.py
index 4171e3d436..5d56ff3c70 100644
--- a/luigi/lock.py
+++ b/luigi/lock.py
@@ -12,13 +12,16 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import os
 import hashlib
+import os
 
 
 def getpcmd(pid):
-    ''' Returns command of process
-    '''
+    """
+    Returns command of process.
+
+    :param pid:
+    """
     cmd = 'ps -p %s -o command=' % (pid,)
     p = os.popen(cmd, 'r')
     return p.readline().strip()
@@ -35,20 +38,21 @@ def get_info(pid_dir):
 
 
 def acquire_for(pid_dir, num_available=1):
-    ''' Makes sure the process is only run once at the same time with the same name.
+    """
+    Makes sure the process is only run once at the same time with the same name.
 
     Notice that we since we check the process name, different parameters to the same
     command can spawn multiple processes at the same time, i.e. running
     "/usr/bin/my_process" does not prevent anyone from launching
     "/usr/bin/my_process --foo bar".
-    '''
+    """
 
     my_pid, my_cmd, pid_file = get_info(pid_dir)
 
     # Check if there is a pid file corresponding to this name
     if not os.path.exists(pid_dir):
         os.mkdir(pid_dir)
-        os.chmod(pid_dir, 0777)
+        os.chmod(pid_dir, 0o777)
 
     pids = set()
     pid_cmds = {}
@@ -78,6 +82,6 @@ def acquire_for(pid_dir, num_available=1):
     else:
         s = os.stat(pid_file)
         if os.getuid() == s.st_uid:
-            os.chmod(pid_file, s.st_mode | 0777)
+            os.chmod(pid_file, s.st_mode | 0o777)
 
     return True
diff --git a/luigi/mock.py b/luigi/mock.py
index ec61f1d485..c954ee847d 100644
--- a/luigi/mock.py
+++ b/luigi/mock.py
@@ -12,17 +12,19 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
+import multiprocessing
+import os
 import StringIO
-import target
 import sys
-import os
+
 import luigi.util
-import multiprocessing
+import target
 
 
 class MockFileSystem(target.FileSystem):
-    """MockFileSystem inspects/modifies _data to simulate
-    file system operations"""
+    """
+    MockFileSystem inspects/modifies _data to simulate file system operations.
+    """
     _data = None
 
     def get_all_data(self):
@@ -38,9 +40,11 @@ def exists(self, path):
         return MockFile(path).exists()
 
     def remove(self, path, recursive=True, skip_trash=True):
-        """Removes the given mockfile. skip_trash doesn't have any meaning."""
+        """
+        Removes the given mockfile. skip_trash doesn't have any meaning.
+        """
         if recursive:
-            to_delete=[]
+            to_delete = []
             for s in self.get_all_data().keys():
                 if s.startswith(path):
                     to_delete.append(s)
@@ -50,13 +54,16 @@ def remove(self, path, recursive=True, skip_trash=True):
             self.get_all_data().pop(path)
 
     def listdir(self, path):
-        """listdir does a prefix match of self.get_all_data(), but
-        doesn't yet support globs"""
+        """
+        listdir does a prefix match of self.get_all_data(), but doesn't yet support globs.
+        """
         return [s for s in self.get_all_data().keys()
                 if s.startswith(path)]
 
-    def mkdir(self, path):
-        """mkdir is a noop"""
+    def mkdir(self, path, parents=True, raise_if_exists=False):
+        """
+        mkdir is a noop.
+        """
         pass
 
     def clear(self):
@@ -80,9 +87,6 @@ def rename(self, path, fail_if_exists=False):
         contents = self.fs.get_all_data().pop(self._fn)
         self.fs.get_all_data()[path] = contents
 
-    def move_dir(self, path):
-        self.move(path, raise_if_exists=True)
-
     @property
     def path(self):
         return self._fn
@@ -92,6 +96,7 @@ def open(self, mode):
 
         class StringBuffer(StringIO.StringIO):
             # Just to be able to do writing + reading from the same buffer
+
             def write(self2, data):
                 if self._mirror_on_stderr:
                     self2.seek(-1, os.SEEK_END)
@@ -105,8 +110,8 @@ def close(self2):
                     self.fs.get_all_data()[fn] = self2.getvalue()
                 StringIO.StringIO.close(self2)
 
-            def __exit__(self, type, value, traceback):
-                if not type:
+            def __exit__(self, exc_type, exc_val, exc_tb):
+                if not exc_type:
                     self.close()
 
             def __enter__(self):
@@ -119,7 +124,9 @@ def __enter__(self):
 
 
 def skip(func):
-    """ Sort of a substitute for unittest.skip*, which is 2.7+ """
+    """
+    Sort of a substitute for unittest.skip*, which is 2.7+.
+    """
     def wrapper():
         pass
     return wrapper
diff --git a/luigi/mrrunner.py b/luigi/mrrunner.py
index ea9c2f37f6..0a06d314fa 100644
--- a/luigi/mrrunner.py
+++ b/luigi/mrrunner.py
@@ -14,22 +14,25 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-"""The hadoop runner.
+"""
+The hadoop runner.
 
 This module contains the main() method which will be used to run the
 mapper and reducer on the Hadoop nodes.
 """
 
+import cPickle as pickle
+import logging
 import os
 import sys
 import tarfile
-import cPickle as pickle
-import logging
 import traceback
 
 
 class Runner(object):
-    """Run the mapper or reducer on hadoop nodes."""
+    """
+    Run the mapper or reducer on hadoop nodes.
+    """
 
     def __init__(self, job=None):
         self.extract_packages_archive()
@@ -38,11 +41,11 @@ def __init__(self, job=None):
 
     def run(self, kind, stdin=sys.stdin, stdout=sys.stdout):
         if kind == "map":
-            self.job._run_mapper(stdin, stdout)
+            self.job.run_mapper(stdin, stdout)
         elif kind == "combiner":
-            self.job._run_combiner(stdin, stdout)
+            self.job.run_combiner(stdin, stdout)
         elif kind == "reduce":
-            self.job._run_reducer(stdin, stdout)
+            self.job.run_reducer(stdin, stdout)
         else:
             raise Exception('weird command: %s' % kind)
 
@@ -63,8 +66,9 @@ def print_exception(exc):
     print >> sys.stderr, 'luigi-exc-hex=%s' % tb.encode('hex')
 
 
-def main(args=sys.argv, stdin=sys.stdin, stdout=sys.stdout, print_exception=print_exception):
-    """Run either the mapper or the reducer from the class instance in the file "job-instance.pickle".
+def main(args=None, stdin=sys.stdin, stdout=sys.stdout, print_exception=print_exception):
+    """
+    Run either the mapper or the reducer from the class instance in the file "job-instance.pickle".
 
     Arguments:
 
@@ -73,10 +77,10 @@ def main(args=sys.argv, stdin=sys.stdin, stdout=sys.stdout, print_exception=prin
     try:
         # Set up logging.
         logging.basicConfig(level=logging.WARN)
-    
-        kind = args[1]
+
+        kind = args is not None and args[1] or sys.argv[1]
         Runner().run(kind, stdin=stdin, stdout=stdout)
-    except Exception, exc:
+    except Exception as exc:
         # Dump encoded data that we will try to fetch using mechanize
         print_exception(exc)
         raise
diff --git a/luigi/notifications.py b/luigi/notifications.py
index b7435d2fdd..9a4652af91 100644
--- a/luigi/notifications.py
+++ b/luigi/notifications.py
@@ -1,7 +1,23 @@
-import sys
+# Copyright (c) 2015 Spotify AB
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
 import logging
 import socket
+import sys
+
 from luigi import configuration
+
 logger = logging.getLogger("luigi-interface")
 
 
@@ -78,6 +94,7 @@ def send_email_ses(config, sender, subject, message, recipients, image_png):
                        source=msg_root['From'],
                        destinations=msg_root['To'])
 
+
 def send_email_sendgrid(config, sender, subject, message, recipients, image_png):
     import sendgrid
     client = sendgrid.SendGridClient(config.get('email', 'SENDGRID_USERNAME', None),
@@ -96,7 +113,10 @@ def send_email_sendgrid(config, sender, subject, message, recipients, image_png)
 
     client.send(to_send)
 
+
 def send_email(subject, message, sender, recipients, image_png=None):
+    config = configuration.get_config()
+
     subject = _prefix(subject)
     logger.debug("Emailing:\n"
                  "-------------\n"
@@ -108,12 +128,10 @@ def send_email(subject, message, sender, recipients, image_png=None):
                  "-------------", recipients, sender, subject, message)
     if not recipients or recipients == (None,):
         return
-    if sys.stdout.isatty() or DEBUG:
+    if (sys.stdout.isatty() or DEBUG) and (not config.getboolean('email', 'force-send', False)):
         logger.info("Not sending email when running from a tty or in debug mode")
         return
 
-    config = configuration.get_config()
-
     # Clean the recipients lists to allow multiple error-email addresses, comma
     # separated in client.cfg
     recipients_tmp = []
@@ -133,9 +151,10 @@ def send_email(subject, message, sender, recipients, image_png=None):
 
 
 def send_error_email(subject, message):
-    """ Sends an email to the configured error-email.
+    """
+    Sends an email to the configured error-email.
 
-    If no error-email is configured, then a message is logged
+    If no error-email is configured, then a message is logged.
     """
     config = configuration.get_config()
     receiver = config.get('core', 'error-email', None)
@@ -155,8 +174,9 @@ def send_error_email(subject, message):
 
 
 def _prefix(subject):
-    """If the config has a special prefix for emails then this function adds
-    this prefix
+    """
+    If the config has a special prefix for emails then this function adds
+    this prefix.
     """
     config = configuration.get_config()
     email_prefix = config.get('core', 'email-prefix', None)
diff --git a/luigi/parameter.py b/luigi/parameter.py
index 5b96f829a4..246e557f93 100644
--- a/luigi/parameter.py
+++ b/luigi/parameter.py
@@ -12,41 +12,54 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import configuration
 import datetime
 import warnings
-from ConfigParser import NoSectionError, NoOptionError
+from ConfigParser import NoOptionError, NoSectionError
+
+import configuration
+from deprecate_kwarg import deprecate_kwarg
 
 _no_value = object()
 
 
 class ParameterException(Exception):
-    """Base exception."""
+    """
+    Base exception.
+    """
     pass
 
 
 class MissingParameterException(ParameterException):
-    """Exception signifying that there was a missing Parameter."""
+    """
+    Exception signifying that there was a missing Parameter.
+    """
     pass
 
 
 class UnknownParameterException(ParameterException):
-    """Exception signifying that an unknown Parameter was supplied."""
+    """
+    Exception signifying that an unknown Parameter was supplied.
+    """
     pass
 
 
 class DuplicateParameterException(ParameterException):
-    """Exception signifying that a Parameter was specified multiple times."""
+    """
+    Exception signifying that a Parameter was specified multiple times.
+    """
     pass
 
 
 class UnknownConfigException(ParameterException):
-    """Exception signifying that the ``default_from_config`` for the Parameter could not be found."""
+    """
+    Exception signifying that the ``config_path`` for the Parameter could not be found.
+    """
     pass
 
 
 class Parameter(object):
-    """An untyped Parameter
+    """
+    An untyped Parameter
 
     Parameters are objects set on the Task class level to make it possible to parameterize tasks.
     For instance:
@@ -63,19 +76,21 @@ class MyTask(luigi.Task):
     The ``config_path`` argument lets you specify a place where the parameter is read from config
     in case no value is provided.
 
-    Providing ``is_global=True`` changes the behavior of the parameter so that the value is shared
-    across all instances of the task. Global parameters can be provided in several ways. In
-    falling order of precedence:
+    When a task is instantiated, it will first use any argument as the value of the parameter, eg.
+    if you instantiate a = TaskA(x=44) then a.x == 44. If this does not exist, it will use the value
+    of the Parameter object, which is defined on a class level. This will be resolved in this
+    order of falling priority:
 
-    * A value provided on the command line (eg. ``--my-global-value xyz``)
-    * A value provided via config (using the ``config_path`` argument)
-    * A default value set using the ``default`` flag.
+    * Any value provided on the command line on the class level (eg. ``--TaskA-param xyz``)
+    * Any value provided via config (using the ``config_path`` argument)
+    * Any default value set using the ``default`` flag.
     """
     counter = 0
     """non-atomically increasing counter used for ordering parameters."""
 
+    @deprecate_kwarg('is_boolean', 'is_bool', False)
     def __init__(self, default=_no_value, is_list=False, is_boolean=False, is_global=False, significant=True, description=None,
-                 config_path=None, default_from_config=None):
+                 config_path=None):
         """
         :param default: the default value for this parameter. This should match the type of the
                         Parameter, i.e. ``datetime.date`` for ``DateParameter`` or ``int`` for
@@ -84,10 +99,10 @@ def __init__(self, default=_no_value, is_list=False, is_boolean=False, is_global
         :param bool is_list: specify ``True`` if the parameter should allow a list of values rather
                              than a single value. Default: ``False``. A list has an implicit default
                              value of ``[]``.
-        :param bool is_boolean: specify ``True`` if the parameter is a boolean value. Default:
-                                ``False``. Boolean's have an implicit default value of ``False``.
+        :param bool is_bool: specify ``True`` if the parameter is a bool value. Default:
+                                ``False``. Bool's have an implicit default value of ``False``.
         :param bool is_global: specify ``True`` if the parameter is global (i.e. used by multiple
-                               Tasks). Default: ``False``.
+                               Tasks). Default: ``False``. DEPRECATED.
         :param bool significant: specify ``False`` if the parameter should not be treated as part of
                                  the unique identifier for a Task. An insignificant Parameter might
                                  also be used to specify a password or other sensitive information
@@ -106,20 +121,21 @@ def __init__(self, default=_no_value, is_list=False, is_boolean=False, is_global
         self.__global = _no_value
 
         self.is_list = is_list
-        self.is_boolean = is_boolean and not is_list  # Only BooleanParameter should ever use this. TODO(erikbern): should we raise some kind of exception?
+        self.is_bool = is_boolean and not is_list  # Only BoolParameter should ever use this. TODO(erikbern): should we raise some kind of exception?
         self.is_global = is_global  # It just means that the default value is exposed and you can override it
-        self.significant = significant # Whether different values for this parameter will differentiate otherwise equal tasks
+        self.significant = significant  # Whether different values for this parameter will differentiate otherwise equal tasks
 
-        if default_from_config is not None:
+        if is_global:
             warnings.warn(
-                "Use config_path parameter, not default_from_config",
+                'is_global is deprecated and will be removed. Please use either '
+                ' (a) class level config (eg. --MyTask-my-param 42)'
+                ' (b) a separate Config class with global settings on it',
                 DeprecationWarning,
-                stacklevel=2
-            )
-            config_path = default_from_config
+                stacklevel=2)
 
         if is_global and default == _no_value and config_path is None:
             raise ParameterException('Global parameters need default values')
+
         self.description = description
 
         if config_path is not None and ('section' not in config_path or 'name' not in config_path):
@@ -129,18 +145,21 @@ def __init__(self, default=_no_value, is_list=False, is_boolean=False, is_global
         self.counter = Parameter.counter  # We need to keep track of this to get the order right (see Task class)
         Parameter.counter += 1
 
-    def _get_value_from_config(self):
+    def _get_value_from_config(self, task_name, param_name):
         """Loads the default from the config. Returns _no_value if it doesn't exist"""
 
-        if not self.__config:
+        if self.__config:
+            section, name = self.__config['section'], self.__config['name']
+        elif task_name is not None and param_name is not None:
+            section, name = task_name, param_name
+        else:
             return _no_value
 
         conf = configuration.get_config()
-        (section, name) = (self.__config['section'], self.__config['name'])
 
         try:
             value = conf.get(section, name)
-        except (NoSectionError, NoOptionError), e:
+        except (NoSectionError, NoOptionError) as e:
             return _no_value
 
         if self.is_list:
@@ -148,38 +167,33 @@ def _get_value_from_config(self):
         else:
             return self.parse(value)
 
+    def _get_value(self, task_name=None, param_name=None):
+        values = [self.__global, self._get_value_from_config(task_name, param_name), self.__default]
+        for value in values:
+            if value != _no_value:
+                return value
+        else:
+            return _no_value
+
     @property
     def has_value(self):
-        """``True`` if a default was specified or if config_path references a valid entry in the conf.
+        """
+        ``True`` if a default was specified or if config_path references a valid entry in the conf.
 
         Note that "value" refers to the Parameter object itself - it can be either
+
         1. The default value for this parameter
         2. A value read from the config
         3. A global value
 
         Any Task instance can have its own value set that overrides this.
         """
-        values = [self.__global, self._get_value_from_config(), self.__default]
-        for value in values:
-            if value != _no_value:
-                return True
-        else:
-            return False
-
-    @property
-    def has_default(self):
-        """Don't use this function - see has_value instead"""
-        warnings.warn(
-            'Use has_value rather than has_default. The meaning of '
-            '"default" has changed',
-            DeprecationWarning,
-            stacklevel=2
-        )
-        return self.has_value
+        return self._get_value() != _no_value
 
     @property
     def value(self):
-        """The value for this Parameter.
+        """
+        The value for this Parameter.
 
         This refers to any value defined by a default, a config option, or
         a global value.
@@ -187,49 +201,36 @@ def value(self):
         :raises MissingParameterException: if a value is not set.
         :return: the parsed value.
         """
-        values = [self.__global, self._get_value_from_config(), self.__default]
-        for value in values:
-            if value != _no_value:
-                return value
-        else:
+        value = self._get_value()
+        if value == _no_value:
             raise MissingParameterException("No default specified")
+        else:
+            return value
 
-    @property
-    def default(self):
-        warnings.warn(
-            'Use value rather than default. The meaning of '
-            '"default" has changed',
-            DeprecationWarning,
-            stacklevel=2
-        )
-        return self.value
+    def has_task_value(self, task_name, param_name):
+        return self._get_value(task_name, param_name) != _no_value
+
+    def task_value(self, task_name, param_name):
+        value = self._get_value(task_name, param_name)
+        if value == _no_value:
+            raise MissingParameterException("No default specified")
+        else:
+            return value
 
     def set_global(self, value):
-        """Set the global value of this Parameter.
+        """
+        Set the global value of this Parameter.
 
         :param value: the new global value.
         """
-        assert self.is_global
         self.__global = value
 
     def reset_global(self):
         self.__global = _no_value
 
-    def set_default(self, value):
-        """Set the default value of this Parameter.
-
-        :param value: the new default value.
-        """
-        warnings.warn(
-            'Use set_global rather than set_default. The meaning of '
-            '"default" has changed',
-            DeprecationWarning,
-            stacklevel=2
-        )
-        self.__default = value
-
     def parse(self, x):
-        """Parse an individual value from the input.
+        """
+        Parse an individual value from the input.
 
         The default implementation is an identify (it returns ``x``), but subclasses should override
         this method for specialized parsing. This method is called by :py:meth:`parse_from_input`
@@ -241,8 +242,9 @@ def parse(self, x):
         """
         return x  # default impl
 
-    def serialize(self, x): # opposite of parse
-        """Opposite of :py:meth:`parse`.
+    def serialize(self, x):  # opposite of parse
+        """
+        Opposite of :py:meth:`parse`.
 
         Converts the value ``x`` to a string.
 
@@ -264,13 +266,13 @@ def parse_from_input(self, param_name, x):
         if not x:
             if self.has_value:
                 return self.value
-            elif self.is_boolean:
+            elif self.is_bool:
                 return False
             elif self.is_list:
                 return []
             else:
-                raise MissingParameterException("No value for '%s' (%s) submitted and no default value has been assigned." % \
-                    (param_name, "--" + param_name.replace('_', '-')))
+                raise MissingParameterException("No value for '%s' (%s) submitted and no default value has been assigned." %
+                                                (param_name, "--" + param_name.replace('_', '-')))
         elif self.is_list:
             return tuple(self.parse(p) for p in x)
         else:
@@ -282,72 +284,174 @@ def serialize_to_input(self, x):
         else:
             return self.serialize(x)
 
+    def parser_dest(self, param_name, task_name, glob=False, is_without_section=False):
+        if self.is_global or is_without_section:
+            if glob:
+                return param_name
+            else:
+                return None
+        else:
+            if glob:
+                return task_name + '_' + param_name
+            else:
+                return param_name
+
+    def add_to_cmdline_parser(self, parser, param_name, task_name, optparse=False, glob=False, is_without_section=False):
+        dest = self.parser_dest(param_name, task_name, glob, is_without_section=is_without_section)
+        if not dest:
+            return
+        flag = '--' + dest.replace('_', '-')
+
+        description = []
+        description.append('%s.%s' % (task_name, param_name))
+        if self.description:
+            description.append(self.description)
+        if self.has_value:
+            description.append(" [default: %s]" % (self.value,))
+
+        if self.is_list:
+            action = "append"
+        elif self.is_bool:
+            action = "store_true"
+        else:
+            action = "store"
+        if optparse:
+            f = parser.add_option
+        else:
+            f = parser.add_argument
+        f(flag,
+          help=' '.join(description),
+          action=action,
+          dest=dest)
+
+    def parse_from_args(self, param_name, task_name, args, params):
+        # Note: modifies arguments
+        dest = self.parser_dest(param_name, task_name, glob=False)
+        if dest is not None:
+            value = getattr(args, dest, None)
+            params[param_name] = self.parse_from_input(param_name, value)
+
+    def set_global_from_args(self, param_name, task_name, args, is_without_section=False):
+        # Note: side effects
+        dest = self.parser_dest(param_name, task_name, glob=True, is_without_section=is_without_section)
+        if dest is not None:
+            value = getattr(args, dest, None)
+            if value is not None:
+                self.set_global(self.parse_from_input(param_name, value))
+            else:
+                self.reset_global()
+
 
 class DateHourParameter(Parameter):
-    """Parameter whose value is a :py:class:`~datetime.datetime` specified to the hour.
+    """
+    Parameter whose value is a :py:class:`~datetime.datetime` specified to the hour.
 
     A DateHourParameter is a `ISO 8601 <http://en.wikipedia.org/wiki/ISO_8601>`_ formatted
     date and time specified to the hour. For example, ``2013-07-10T19`` specifies July 10, 2013 at
     19:00.
     """
 
+    date_format = '%Y-%m-%dT%H'  # ISO 8601 is to use 'T'
+
     def parse(self, s):
         """
         Parses a string to a :py:class:`~datetime.datetime` using the format string ``%Y-%m-%dT%H``.
         """
         # TODO(erikbern): we should probably use an internal class for arbitary
         # time intervals (similar to date_interval). Or what do you think?
-        return datetime.datetime.strptime(s, "%Y-%m-%dT%H")  # ISO 8601 is to use 'T'
+        return datetime.datetime.strptime(s, self.date_format)
 
     def serialize(self, dt):
         """
         Converts the datetime to a string usnig the format string ``%Y-%m-%dT%H``.
         """
-        if dt is None: return str(dt)
-        return dt.strftime('%Y-%m-%dT%H')
+        if dt is None:
+            return str(dt)
+        return dt.strftime(self.date_format)
+
+
+class DateMinuteParameter(DateHourParameter):
+    """
+    Parameter whose value is a :py:class:`~datetime.datetime` specified to the minute.
+
+    A DateMinuteParameter is a `ISO 8601 <http://en.wikipedia.org/wiki/ISO_8601>`_ formatted
+    date and time specified to the minute. For example, ``2013-07-10T19H07`` specifies July 10, 2013 at
+    19:07.
+    """
+
+    date_format = '%Y-%m-%dT%HH%M'  # ISO 8601 is to use 'T' and 'H'
 
 
 class DateParameter(Parameter):
-    """Parameter whose value is a :py:class:`~datetime.date`.
+    """
+    Parameter whose value is a :py:class:`~datetime.date`.
 
     A DateParameter is a Date string formatted ``YYYY-MM-DD``. For example, ``2013-07-10`` specifies
     July 10, 2013.
     """
+
     def parse(self, s):
         """Parses a date string formatted as ``YYYY-MM-DD``."""
         return datetime.date(*map(int, s.split('-')))
 
 
 class IntParameter(Parameter):
-    """Parameter whose value is an ``int``."""
+    """
+    Parameter whose value is an ``int``.
+    """
+
     def parse(self, s):
-        """Parses an ``int`` from the string using ``int()``."""
+        """
+        Parses an ``int`` from the string using ``int()``.
+        """
         return int(s)
 
+
 class FloatParameter(Parameter):
-    """Parameter whose value is a ``float``."""
+    """
+    Parameter whose value is a ``float``.
+    """
+
     def parse(self, s):
-        """Parses a ``float`` from the string using ``float()``."""
+        """
+        Parses a ``float`` from the string using ``float()``.
+        """
         return float(s)
 
-class BooleanParameter(Parameter):
-    """A Parameter whose value is a ``bool``."""
-    # TODO(erikbern): why do we call this "boolean" instead of "bool"?
-    # The integer parameter is called "int" so calling this "bool" would be
-    # more consistent, especially given the Python type names.
+
+class BoolParameter(Parameter):
+    """
+    A Parameter whose value is a ``bool``.
+    """
+
     def __init__(self, *args, **kwargs):
-        """This constructor passes along args and kwargs to ctor for :py:class:`Parameter` but
-        specifies ``is_boolean=True``.
         """
-        super(BooleanParameter, self).__init__(*args, is_boolean=True, **kwargs)
+        This constructor passes along args and kwargs to ctor for :py:class:`Parameter` but
+        specifies ``is_bool=True``.
+        """
+        super(BoolParameter, self).__init__(*args, is_bool=True, **kwargs)
 
     def parse(self, s):
-        """Parses a ``boolean`` from the string, matching 'true' or 'false' ignoring case."""
+        """
+        Parses a ``bool`` from the string, matching 'true' or 'false' ignoring case.
+        """
         return {'true': True, 'false': False}[str(s).lower()]
 
 
+class BooleanParameter(BoolParameter):
+
+    def __init__(self, *args, **kwargs):
+        warnings.warn(
+            'BooleanParameter is deprecated, use BoolParameter instead',
+            DeprecationWarning,
+            stacklevel=2
+        )
+        super(BooleanParameter, self).__init__(*args, **kwargs)
+
+
 class DateIntervalParameter(Parameter):
-    """A Parameter whose value is a :py:class:`~luigi.date_interval.DateInterval`.
+    """
+    A Parameter whose value is a :py:class:`~luigi.date_interval.DateInterval`.
 
     Date Intervals are specified using the ISO 8601 `Time Interval
     <http://en.wikipedia.org/wiki/ISO_8601#Time_intervals>`_ notation.
@@ -356,7 +460,8 @@ class DateIntervalParameter(Parameter):
     # Also gives some helpful interval algebra
 
     def parse(self, s):
-        """Parses a `:py:class:`~luigi.date_interval.DateInterval` from the input.
+        """
+        Parses a `:py:class:`~luigi.date_interval.DateInterval` from the input.
 
         see :py:mod:`luigi.date_interval`
           for details on the parsing of DateIntervals.
@@ -374,12 +479,13 @@ def parse(self, s):
 
 
 class TimeDeltaParameter(Parameter):
-    """Class that maps to timedelta using strings in any of the following forms:
+    """
+    Class that maps to timedelta using strings in any of the following forms:
 
-     - ``n {w[eek[s]]|d[ay[s]]|h[our[s]]|m[inute[s]|s[second[s]]}`` (e.g. "1 week 2 days" or "1 h")
+     * ``n {w[eek[s]]|d[ay[s]]|h[our[s]]|m[inute[s]|s[second[s]]}`` (e.g. "1 week 2 days" or "1 h")
         Note: multiple arguments must be supplied in longest to shortest unit order
-     - ISO 8601 duration ``PnDTnHnMnS`` (each field optional, years and months not supported)
-     - ISO 8601 duration ``PnW``
+     * ISO 8601 duration ``PnDTnHnMnS`` (each field optional, years and months not supported)
+     * ISO 8601 duration ``PnW``
 
     See https://en.wikipedia.org/wiki/ISO_8601#Durations
     """
@@ -391,7 +497,7 @@ def _apply_regex(self, regex, input):
         if re_match:
             kwargs = {}
             has_val = False
-            for k,v in re_match.groupdict(default="0").items():
+            for k, v in re_match.groupdict(default="0").iteritems():
                 val = int(v)
                 has_val = has_val or val != 0
                 kwargs[k] = val
@@ -401,11 +507,12 @@ def _apply_regex(self, regex, input):
     def _parseIso8601(self, input):
         def field(key):
             return "(?P<%s>\d+)%s" % (key, key[0].upper())
+
         def optional_field(key):
             return "(%s)?" % field(key)
         # A little loose: ISO 8601 does not allow weeks in combination with other fields, but this regex does (as does python timedelta)
         regex = "P(%s|%s(T%s)?)" % (field("weeks"), optional_field("days"), "".join([optional_field(key) for key in ["hours", "minutes", "seconds"]]))
-        return self._apply_regex(regex,input)
+        return self._apply_regex(regex, input)
 
     def _parseSimple(self, input):
         keys = ["weeks", "days", "hours", "minutes", "seconds"]
@@ -415,7 +522,8 @@ def _parseSimple(self, input):
         return self._apply_regex(regex, input)
 
     def parse(self, input):
-        """Parses a time delta from the input.
+        """
+        Parses a time delta from the input.
 
         See :py:class:`TimeDeltaParameter` for details on supported formats.
         """
diff --git a/luigi/postgres.py b/luigi/postgres.py
index 0c2d9a02eb..e0d6999c83 100644
--- a/luigi/postgres.py
+++ b/luigi/postgres.py
@@ -14,8 +14,8 @@
 
 import datetime
 import logging
-import tempfile
 import re
+import tempfile
 
 import luigi
 from luigi.contrib import rdbms
@@ -29,9 +29,10 @@
 except ImportError:
     logger.warning("Loading postgres module without psycopg2 installed. Will crash at runtime if postgres functionality is used.")
 
+
 class MultiReplacer(object):
-    # TODO: move to misc/util module
-    """Object for one-pass replace of multiple words
+    """
+    Object for one-pass replace of multiple words
 
     Substituted parts will not be matched against other replace patterns, as opposed to when using multipass replace.
     The order of the items in the replace_pairs input will dictate replacement precedence.
@@ -40,17 +41,28 @@ class MultiReplacer(object):
     replace_pairs -- list of 2-tuples which hold strings to be replaced and replace string
 
     Usage:
-    >>> replace_pairs = [("a", "b"), ("b", "c")]
-    >>> MultiReplacer(replace_pairs)("abcd")
-    'bccd'
-    >>> replace_pairs = [("ab", "x"), ("a", "x")]
-    >>> MultiReplacer(replace_pairs)("ab")
-    'x'
-    >>> replace_pairs.reverse()
-    >>> MultiReplacer(replace_pairs)("ab")
-    'xb'
+
+    .. code-block:: python
+
+        >>> replace_pairs = [("a", "b"), ("b", "c")]
+        >>> MultiReplacer(replace_pairs)("abcd")
+        'bccd'
+        >>> replace_pairs = [("ab", "x"), ("a", "x")]
+        >>> MultiReplacer(replace_pairs)("ab")
+        'x'
+        >>> replace_pairs.reverse()
+        >>> MultiReplacer(replace_pairs)("ab")
+        'xb'
     """
+# TODO: move to misc/util module
+
     def __init__(self, replace_pairs):
+        """
+        Initializes a MultiReplacer instance.
+
+        :param replace_pairs: list of 2-tuples which hold strings to be replaced and replace string.
+        :type replace_pairs: tuple
+        """
         replace_list = list(replace_pairs)  # make a copy in case input is iterable
         self._replace_dict = dict(replace_list)
         pattern = '|'.join(re.escape(x) for x, y in replace_list)
@@ -78,9 +90,11 @@ def __call__(self, search_string):
 
 
 class PostgresTarget(luigi.Target):
-    """Target for a resource in Postgres.
+    """
+    Target for a resource in Postgres.
 
-    This will rarely have to be directly instantiated by the user"""
+    This will rarely have to be directly instantiated by the user.
+    """
     marker_table = luigi.configuration.get_config().get('postgres', 'marker-table', 'table_updates')
 
     # Use DB side timestamps or client side timestamps in the marker_table
@@ -108,10 +122,12 @@ def __init__(self, host, database, user, password, table, update_id):
         self.update_id = update_id
 
     def touch(self, connection=None):
-        """Mark this update as complete.
+        """
+        Mark this update as complete.
 
         Important: If the marker table doesn't exist, the connection transaction will be aborted
-        and the connection reset. Then the marker table will be created.
+        and the connection reset.
+        Then the marker table will be created.
         """
         self.create_marker_table()
 
@@ -125,14 +141,14 @@ def touch(self, connection=None):
                 """INSERT INTO {marker_table} (update_id, target_table)
                    VALUES (%s, %s)
                 """.format(marker_table=self.marker_table),
-                    (self.update_id, self.table))
+                (self.update_id, self.table))
         else:
             connection.cursor().execute(
-                    """INSERT INTO {marker_table} (update_id, target_table, inserted)
+                """INSERT INTO {marker_table} (update_id, target_table, inserted)
                          VALUES (%s, %s, %s);
                     """.format(marker_table=self.marker_table),
-                            (self.update_id, self.table,
-                            datetime.datetime.now()))
+                (self.update_id, self.table,
+                 datetime.datetime.now()))
 
         # make sure update is properly marked
         assert self.exists(connection)
@@ -146,10 +162,10 @@ def exists(self, connection=None):
             cursor.execute("""SELECT 1 FROM {marker_table}
                 WHERE update_id = %s
                 LIMIT 1""".format(marker_table=self.marker_table),
-                (self.update_id,)
-            )
+                           (self.update_id,)
+                           )
             row = cursor.fetchone()
-        except psycopg2.ProgrammingError, e:
+        except psycopg2.ProgrammingError as e:
             if e.pgcode == psycopg2.errorcodes.UNDEFINED_TABLE:
                 row = None
             else:
@@ -157,7 +173,9 @@ def exists(self, connection=None):
         return row is not None
 
     def connect(self):
-        "Get a psycopg2 connection object to the database where the table is"
+        """
+        Get a psycopg2 connection object to the database where the table is.
+        """
         connection = psycopg2.connect(
             host=self.host,
             port=self.port,
@@ -168,9 +186,11 @@ def connect(self):
         return connection
 
     def create_marker_table(self):
-        """Create marker table if it doesn't exist.
+        """
+        Create marker table if it doesn't exist.
 
-        Using a separate connection since the transaction might have to be reset"""
+        Using a separate connection since the transaction might have to be reset.
+        """
         connection = self.connect()
         connection.autocommit = True
         cursor = connection.cursor()
@@ -188,7 +208,7 @@ def create_marker_table(self):
                   """.format(marker_table=self.marker_table)
         try:
             cursor.execute(sql)
-        except psycopg2.ProgrammingError, e:
+        except psycopg2.ProgrammingError as e:
             if e.pgcode == psycopg2.errorcodes.DUPLICATE_TABLE:
                 pass
             else:
@@ -209,19 +229,21 @@ class CopyToTable(rdbms.CopyToTable):
 
     To customize how to access data from an input task, override the `rows` method
     with a generator that yields each row as a tuple with fields ordered according to `columns`.
-
     """
 
     def rows(self):
-        """Return/yield tuples or lists corresponding to each row to be inserted """
+        """
+        Return/yield tuples or lists corresponding to each row to be inserted.
+        """
         with self.input().open('r') as fobj:
             for line in fobj:
                 yield line.strip('\n').split('\t')
 
     def map_column(self, value):
-        """Applied to each column of every row returned by `rows`
+        """
+        Applied to each column of every row returned by `rows`.
 
-        Default behaviour is to escape special characters and identify any self.null_values
+        Default behaviour is to escape special characters and identify any self.null_values.
         """
         if value in self.null_values:
             return '\N'
@@ -230,11 +252,11 @@ def map_column(self, value):
         else:
             return default_escape(str(value))
 
-
 # everything below will rarely have to be overridden
 
     def output(self):
-        """Returns a PostgresTarget representing the inserted dataset.
+        """
+        Returns a PostgresTarget representing the inserted dataset.
 
         Normally you don't override this.
         """
@@ -245,8 +267,7 @@ def output(self):
             password=self.password,
             table=self.table,
             update_id=self.update_id()
-         )
-
+        )
 
     def copy(self, cursor, file):
         if isinstance(self.columns[0], basestring):
@@ -258,7 +279,8 @@ def copy(self, cursor, file):
         cursor.copy_from(file, self.table, null='\N', sep=self.column_separator, columns=column_names)
 
     def run(self):
-        """Inserts data generated by rows() into target table.
+        """
+        Inserts data generated by rows() into target table.
 
         If the target table doesn't exist, self.create_table will be called to attempt to create the table.
 
@@ -291,7 +313,7 @@ def run(self):
                 cursor = connection.cursor()
                 self.init_copy(connection)
                 self.copy(cursor, tmp_file)
-            except psycopg2.ProgrammingError, e:
+            except psycopg2.ProgrammingError as e:
                 if e.pgcode == psycopg2.errorcodes.UNDEFINED_TABLE and attempt == 0:
                     # if first attempt fails with "relation not found", try creating table
                     logger.info("Creating table %s", self.table)
diff --git a/luigi/process.py b/luigi/process.py
index a5cdd05257..ef73293f9f 100644
--- a/luigi/process.py
+++ b/luigi/process.py
@@ -12,12 +12,13 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import os
-import signal
-import random
 import datetime
 import logging
 import logging.handlers
+import os
+import random
+import signal
+
 rootlogger = logging.getLogger()
 server_logger = logging.getLogger("luigi.server")
 
@@ -110,7 +111,8 @@ def daemonize(cmd, pidfile=None, logdir=None, api_port=8082, address=None):
 
 
 def fork_linked_workers(num_processes):
-    """ Forks num_processes child processes.
+    """
+    Forks num_processes child processes.
 
     Returns an id between 0 and num_processes - 1 for each child process.
     Will consume the parent process and kill it and all child processes as soon as one child exits with status 0
@@ -132,7 +134,6 @@ def shutdown_handler(signum=None, frame=None):
                 os.waitpid(c, 0)
             except OSError:
                 print "Child %d is already dead" % c
-                pass
         os._exit(0)  # exit without calling exit handler again...
 
     sigs = [signal.SIGINT, signal.SIGTERM, signal.SIGQUIT]
@@ -140,7 +141,7 @@ def shutdown_handler(signum=None, frame=None):
         signal.signal(s, shutdown_handler)
         signal.signal(s, shutdown_handler)
         signal.signal(s, shutdown_handler)
-    #haven't found a way to unregister: atexit.register(shutdown_handler) #
+    # haven't found a way to unregister: atexit.register(shutdown_handler) #
 
     def fork_child(child_id, attempt):
         child_pid = os.fork()
@@ -161,7 +162,7 @@ def fork_child(child_id, attempt):
 
     assert len(children) == num_processes
 
-    while 1:
+    while True:
         pid, status = os.wait()
         if status != 0:
             # unclean exit, restart process
diff --git a/luigi/rpc.py b/luigi/rpc.py
index fce00e3c8c..4ad2c37992 100644
--- a/luigi/rpc.py
+++ b/luigi/rpc.py
@@ -12,25 +12,29 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import urllib
-import urllib2
-import logging
 import json
+import logging
 import time
-from scheduler import Scheduler, PENDING
+import urllib
+import urllib2
+
 import configuration
+from scheduler import PENDING, Scheduler
 
 logger = logging.getLogger('luigi-interface')  # TODO: 'interface'?
 
 
 class RPCError(Exception):
+
     def __init__(self, message, sub_exception=None):
         super(RPCError, self).__init__(message)
         self.sub_exception = sub_exception
 
 
 class RemoteScheduler(Scheduler):
-    ''' Scheduler proxy object. Talks to a RemoteSchedulerResponder '''
+    """
+    Scheduler proxy object. Talks to a RemoteSchedulerResponder.
+    """
 
     def __init__(self, host='localhost', port=8082, connect_timeout=None):
         self._host = host
@@ -93,7 +97,7 @@ def ping(self, worker):
         self._request('/api/ping', {'worker': worker}, attempts=1)
 
     def add_task(self, worker, task_id, status=PENDING, runnable=False,
-                 deps=None, new_deps=None, expl=None, resources={},priority=0,
+                 deps=None, new_deps=None, expl=None, resources={}, priority=0,
                  family='', params={}):
         self._request('/api/add_task', {
             'task_id': task_id,
@@ -110,19 +114,11 @@ def add_task(self, worker, task_id, status=PENDING, runnable=False,
         })
 
     def get_work(self, worker, host=None):
-        ''' Ugly work around for an older scheduler version, where get_work doesn't have a host argument. Try once passing
-            host to it, falling back to the old version. Should be removed once people have had time to update everything
-        '''
-        try:
-            return self._request(
-                '/api/get_work',
-                {'worker': worker, 'host': host},
-                log_exceptions=False,
-                attempts=1
-            )
-        except:
-            logger.info("get_work RPC call failed, is it possible that you need to update your scheduler?")
-            raise
+        return self._request(
+            '/api/get_work',
+            {'worker': worker, 'host': host},
+            log_exceptions=False,
+            attempts=1)
 
     def graph(self):
         return self._request('/api/graph', {})
@@ -147,60 +143,3 @@ def fetch_error(self, task_id):
 
     def add_worker(self, worker, info):
         return self._request('/api/add_worker', {'worker': worker, 'info': info})
-
-
-class RemoteSchedulerResponder(object):
-    """ Use on the server side for responding to requests
-
-    The kwargs are there for forwards compatibility in case workers add
-    new (optional) arguments. That way there's no dependency on the server
-    component when upgrading Luigi on the worker side.
-
-    TODO(erikbern): what is this class actually used for? Other than an
-    unnecessary layer of indirection around central scheduler
-    """
-
-    def __init__(self, scheduler):
-        self._scheduler = scheduler
-
-    def add_task(self, worker, task_id, status, runnable, deps, new_deps, expl,
-                 resources=None, priority=0, family='', params={}, **kwargs):
-        return self._scheduler.add_task(
-            worker, task_id, status, runnable, deps, new_deps, expl,
-            resources, priority, family, params)
-
-    def add_worker(self, worker, info, **kwargs):
-        return self._scheduler.add_worker(worker, info)
-
-    def get_work(self, worker, host=None, **kwargs):
-        return self._scheduler.get_work(worker, host)
-
-    def ping(self, worker, **kwargs):
-        return self._scheduler.ping(worker)
-
-    def graph(self, **kwargs):
-        return self._scheduler.graph()
-
-    index = graph
-
-    def dep_graph(self, task_id, **kwargs):
-        return self._scheduler.dep_graph(task_id)
-
-    def inverse_dep_graph(self, task_id, **kwargs):
-        return self._scheduler.inverse_dependencies(task_id)
-
-    def task_list(self, status, upstream_status, **kwargs):
-        return self._scheduler.task_list(status, upstream_status)
-
-    def worker_list(self, **kwargs):
-        return self._scheduler.worker_list()
-
-    def task_search(self, task_str, **kwargs):
-        return self._scheduler.task_search(task_str)
-
-    def fetch_error(self, task_id, **kwargs):
-        return self._scheduler.fetch_error(task_id)
-
-    @property
-    def task_history(self):
-        return self._scheduler.task_history
diff --git a/luigi/s3.py b/luigi/s3.py
index d8af947bd4..2e60b141e1 100644
--- a/luigi/s3.py
+++ b/luigi/s3.py
@@ -17,17 +17,15 @@
 import os.path
 import random
 import tempfile
-import warnings
 import urlparse
+import warnings
+from ConfigParser import NoSectionError
 
 import configuration
-from ConfigParser import NoSectionError
+from luigi.format import FileWrapper
 from luigi.parameter import Parameter
-from luigi.target import FileSystem
-from luigi.target import FileSystemTarget
-from luigi.target import FileSystemException
+from luigi.target import FileSystem, FileSystemException, FileSystemTarget
 from luigi.task import ExternalTask
-from luigi.format import FileWrapper
 
 logger = logging.getLogger('luigi-interface')
 
@@ -166,7 +164,7 @@ def put_string(self, content, destination_s3_path):
         (bucket, key) = self._path_to_bucket_and_key(destination_s3_path)
         # grab and validate the bucket
         s3_bucket = self.s3.get_bucket(bucket, validate=True)
-        
+
         # put the content
         s3_key = Key(s3_bucket)
         s3_key.key = key
@@ -194,14 +192,14 @@ def put_multipart(self, local_path, destination_s3_path, part_size=67108864):
         # grab and validate the bucket
         s3_bucket = self.s3.get_bucket(bucket, validate=True)
 
-        # calculate the number of parts (int division). 
+        # calculate the number of parts (int division).
         # use modulo to avoid float precision issues
         # for exactly-sized fits
         num_parts = \
             (source_size / part_size) \
             if source_size % part_size == 0 \
             else (source_size / part_size) + 1
-        
+
         mp = None
         try:
             mp = s3_bucket.initiate_multipart_upload(key)
@@ -211,9 +209,9 @@ def put_multipart(self, local_path, destination_s3_path, part_size=67108864):
                 offset = part_size * i
                 bytes = min(part_size, source_size - offset)
                 with open(local_path, 'rb') as fp:
-                    part_num = i+1
-                    logger.info('Uploading part %s/%s to %s' % \
-                        (part_num, num_parts, destination_s3_path))
+                    part_num = i + 1
+                    logger.info('Uploading part %s/%s to %s' %
+                                (part_num, num_parts, destination_s3_path))
                     fp.seek(offset)
                     mp.upload_part_from_file(fp, part_num=part_num, size=bytes)
 
@@ -221,13 +219,12 @@ def put_multipart(self, local_path, destination_s3_path, part_size=67108864):
             mp.complete_upload()
         except:
             if mp:
-                logger.info('Canceling multipart s3 upload for %s' %  destination_s3_path)
+                logger.info('Canceling multipart s3 upload for %s' % destination_s3_path)
                 # cancel the upload so we don't get charged for
                 # storage consumed by uploaded parts
                 mp.cancel_upload()
             raise
 
-
     def copy(self, source_path, destination_path):
         """
         Copy an object from one S3 location to another.
@@ -305,7 +302,7 @@ def _get_s3_config(self, key=None):
         except NoSectionError:
             return {}
         # So what ports etc can be read without us having to specify all dtypes
-        for k, v in config.items():
+        for k, v in config.iteritems():
             try:
                 config[k] = int(v)
             except ValueError:
@@ -330,6 +327,7 @@ class AtomicS3File(file):
     """
     An S3 file that writes to a temp file and put to S3 on close.
     """
+
     def __init__(self, path, s3_client):
         self.__tmp_path = \
             os.path.join(tempfile.gettempdir(),
@@ -353,7 +351,9 @@ def __del__(self):
             os.remove(self.__tmp_path)
 
     def __exit__(self, exc_type, exc, traceback):
-        " Close/commit the file if there are no exception "
+        """
+        Close/commit the file if there are no exception.
+        """
         if exc_type:
             return
         return file.__exit__(self, exc_type, exc, traceback)
@@ -467,20 +467,34 @@ class S3FlagTarget(S3Target):
     Defines a target directory with a flag-file (defaults to `_SUCCESS`) used
     to signify job success.
 
-    This checks for two things:  that the path exists (just like the S3Target)
-    and that the _SUCCESS file exists within the directory.  Because Hadoop
-    outputs into a directory and not a single file, the path is assume to be a
-    directory.
+    This checks for two things:
+
+    * the path exists (just like the S3Target)
+    * the _SUCCESS file exists within the directory.
+
+    Because Hadoop outputs into a directory and not a single file,
+    the path is assumed to be a directory.
+
+    This is meant to be a handy alternative to AtomicS3File.
 
-    This is meant to be a handy alternative to AtomicS3File.  The AtomicFile
-    approach can be burdensome for S3 since there are no directories, per se.
-    If we have 1,000,000 output files, then we have to rename 1,000,000
-    objects.
+    The AtomicFile approach can be burdensome for S3 since there are no directories, per se.
+
+    If we have 1,000,000 output files, then we have to rename 1,000,000 objects.
     """
 
     fs = None
 
     def __init__(self, path, format=None, client=None, flag='_SUCCESS'):
+        """
+        Initializes a S3FlagTarget.
+
+        :param path: the directory where the files are stored.
+        :type path: str
+        :param client:
+        :type client:
+        :param flag:
+        :type flag: str
+        """
         if path[-1] is not "/":
             raise ValueError("S3FlagTarget requires the path to be to a "
                              "directory.  It must end with a slash ( / ).")
@@ -498,6 +512,7 @@ class S3EmrTarget(S3FlagTarget):
     """
     Deprecated. Use :py:class:`S3FlagTarget`
     """
+
     def __init__(self, *args, **kwargs):
         warnings.warn("S3EmrTarget is deprecated. Please use S3FlagTarget")
         super(S3EmrTarget, self).__init__(*args, **kwargs)
@@ -505,8 +520,7 @@ def __init__(self, *args, **kwargs):
 
 class S3PathTask(ExternalTask):
     """
-    A external task that to require existence of
-    a path in S3.
+    A external task that to require existence of a path in S3.
     """
     path = Parameter()
 
@@ -516,7 +530,7 @@ def output(self):
 
 class S3EmrTask(ExternalTask):
     """
-    An external task that requires the existence of EMR output in S3
+    An external task that requires the existence of EMR output in S3.
     """
     path = Parameter()
 
@@ -526,7 +540,7 @@ def output(self):
 
 class S3FlagTask(ExternalTask):
     """
-    An external task that requires the existence of EMR output in S3
+    An external task that requires the existence of EMR output in S3.
     """
     path = Parameter()
     flag = Parameter(default=None)
diff --git a/luigi/scalding.py b/luigi/scalding.py
index ebb22aa6ce..488b39645e 100644
--- a/luigi/scalding.py
+++ b/luigi/scalding.py
@@ -1,261 +1,19 @@
-import logging
-import os
-import re
-import subprocess
-
-from luigi import LocalTarget
-import configuration
-import hadoop
-import hadoop_jar
-
-logger = logging.getLogger('luigi-interface')
-
-"""
-Scalding support for Luigi.
-
-Example configuration section in client.cfg:
-[scalding]
-# scala home directory, which should include a lib subdir with scala jars.
-scala-home: /usr/share/scala
-
-# scalding home directory, which should include a lib subdir with
-# scalding-*-assembly-* jars as built from the official Twitter build script.
-scalding-home: /usr/share/scalding
-
-# provided dependencies, e.g. jars required for compiling but not executing
-# scalding jobs. Currently requred jars:
-# org.apache.hadoop/hadoop-core/0.20.2
-# org.slf4j/slf4j-log4j12/1.6.6
-# log4j/log4j/1.2.15
-# commons-httpclient/commons-httpclient/3.1
-# commons-cli/commons-cli/1.2
-# org.apache.zookeeper/zookeeper/3.3.4
-scalding-provided: /usr/share/scalding/provided
-
-# additional jars required.
-scalding-libjars: /usr/share/scalding/libjars
-"""
-
-
-class ScaldingJobRunner(hadoop.JobRunner):
-    """JobRunner for `pyscald` commands. Used to run a ScaldingJobTask"""
-
-    def __init__(self):
-        conf = configuration.get_config()
-
-        default = os.environ.get('SCALA_HOME', '/usr/share/scala')
-        self.scala_home = conf.get('scalding', 'scala-home', default)
-
-        default = os.environ.get('SCALDING_HOME', '/usr/share/scalding')
-        self.scalding_home = conf.get('scalding', 'scalding-home', default)
-        self.provided_dir = conf.get(
-            'scalding', 'scalding-provided', os.path.join(default, 'provided'))
-        self.libjars_dir = conf.get(
-            'scalding', 'scalding-libjars', os.path.join(default, 'libjars'))
-
-        self.tmp_dir = LocalTarget(is_tmp=True)
-
-    def _get_jars(self, path):
-        return [os.path.join(path, j) for j in os.listdir(path)
-                if j.endswith('.jar')]
-
-    def get_scala_jars(self, include_compiler=False):
-        lib_dir = os.path.join(self.scala_home, 'lib')
-        jars = [os.path.join(lib_dir, 'scala-library.jar')]
-
-        # additional jar for scala 2.10 only
-        reflect = os.path.join(lib_dir, 'scala-reflect.jar')
-        if os.path.exists(reflect):
-            jars.append(reflect)
-
-        if include_compiler:
-            jars.append(os.path.join(lib_dir, 'scala-compiler.jar'))
-
-        return jars
-
-    def get_scalding_jars(self):
-        lib_dir = os.path.join(self.scalding_home, 'lib')
-        return self._get_jars(lib_dir)
-
-    def get_scalding_core(self):
-        lib_dir = os.path.join(self.scalding_home, 'lib')
-        for j in os.listdir(lib_dir):
-            if j.startswith('scalding-core-'):
-                p = os.path.join(lib_dir, j)
-                logger.debug('Found scalding-core: %s', p)
-                return p
-        raise hadoop.HadoopJobError('Coudl not find scalding-core.')
-
-    def get_provided_jars(self):
-        return self._get_jars(self.provided_dir)
-
-    def get_libjars(self):
-        return self._get_jars(self.libjars_dir)
-
-    def get_tmp_job_jar(self, source):
-        job_name = os.path.basename(os.path.splitext(source)[0])
-        return os.path.join(self.tmp_dir.path, job_name + '.jar')
-
-    def get_build_dir(self, source):
-        build_dir = os.path.join(self.tmp_dir.path, 'build')
-        return build_dir
-
-    def get_job_class(self, source):
-        # find name of the job class
-        # usually the one that matches file name or last class that extends Job
-        job_name = os.path.splitext(os.path.basename(source))[0]
-        package = None
-        job_class = None
-        for l in open(source).readlines():
-            p = re.search(r'package\s+([^\s\(]+)', l)
-            if p:
-                package = p.groups()[0]
-            p = re.search(r'class\s+([^\s\(]+).*extends\s+.*Job', l)
-            if p:
-                job_class = p.groups()[0]
-                if job_class == job_name:
-                    break
-        if job_class:
-            if package:
-                job_class = package + '.' + job_class
-            logger.debug('Found scalding job class: %s', job_class)
-            return job_class
-        else:
-            raise hadoop.HadoopJobError('Coudl not find scalding job class.')
-
-    def build_job_jar(self, job):
-        job_jar = job.jar()
-        if job_jar:
-            if not os.path.exists(job_jar):
-                logger.error("Can't find jar: {0}, full path {1}".format(
-                             job_jar, os.path.abspath(job_jar)))
-                raise Exception("job jar does not exist")
-            if not job.job_class():
-                logger.error("Undefined job_class()")
-                raise Exception("Undefined job_class()")
-            return job_jar
-
-        job_src = job.source()
-        if not job_src:
-            logger.error("Both source() and jar() undefined")
-            raise Exception("Both source() and jar() undefined")
-        if not os.path.exists(job_src):
-            logger.error("Can't find source: {0}, full path {1}".format(
-                         job_src, os.path.abspath(job_src)))
-            raise Exception("job source does not exist")
-
-        job_src = job.source()
-        job_jar = self.get_tmp_job_jar(job_src)
-
-        build_dir = self.get_build_dir(job_src)
-        if not os.path.exists(build_dir):
-            os.makedirs(build_dir)
-
-        classpath = ':'.join(filter(None,
-                                    self.get_scalding_jars() +
-                                    self.get_provided_jars() +
-                                    self.get_libjars() +
-                                    job.extra_jars()))
-        scala_cp = ':'.join(self.get_scala_jars(include_compiler=True))
-
-        # compile scala source
-        arglist = ['java', '-cp', scala_cp, 'scala.tools.nsc.Main',
-                   '-classpath', classpath,
-                   '-d', build_dir, job_src]
-        logger.info('Compiling scala source: %s', ' '.join(arglist))
-        subprocess.check_call(arglist)
-
-        # build job jar file
-        arglist = ['jar', 'cf', job_jar, '-C', build_dir, '.']
-        logger.info('Building job jar: %s', ' '.join(arglist))
-        subprocess.check_call(arglist)
-        return job_jar
-
-    def run_job(self, job):
-        job_jar = self.build_job_jar(job)
-        jars = [job_jar] + self.get_libjars() + job.extra_jars()
-        scalding_core = self.get_scalding_core()
-        libjars = ','.join(filter(None, jars))
-        arglist = ['hadoop', 'jar', scalding_core, '-libjars', libjars]
-        arglist += ['-D%s' % c for c in job.jobconfs()]
-
-        job_class = job.job_class() or self.get_job_class(job.source())
-        arglist += [job_class, '--hdfs']
-
-        # scalding does not parse argument with '=' properly
-        arglist += ['--name', job.task_id.replace('=', ':')]
-
-        (tmp_files, job_args) = hadoop_jar.fix_paths(job)
-        arglist += job_args
-
-        env = os.environ.copy()
-        jars.append(scalding_core)
-        hadoop_cp = ':'.join(filter(None, jars))
-        env['HADOOP_CLASSPATH'] = hadoop_cp
-        logger.info("Submitting Hadoop job: HADOOP_CLASSPATH=%s %s",
-                    hadoop_cp, ' '.join(arglist))
-        hadoop.run_and_track_hadoop_job(arglist, env=env)
-
-        for a, b in tmp_files:
-            a.move(b)
-
-
-class ScaldingJobTask(hadoop.BaseHadoopJobTask):
-    """A job task for Scalding that define a scala source and (optional) main
-    method
-
-    requires() should return a dictionary where the keys are Scalding argument
-    names and values are lists of paths. For example:
-    {'input1': ['A', 'B'], 'input2': ['C']} => --input1 A B --input2 C
-    """
-
-    def relpath(self, current_file, rel_path):
-        """Compute path given current file and relative path"""
-        script_dir = os.path.dirname(os.path.abspath(current_file))
-        rel_path = os.path.abspath(os.path.join(script_dir, rel_path))
-        return rel_path
-
-    def source(self):
-        """Path to the scala source for this Scalding Job
-        Either one of source() or jar() must be specified.
-        """
-        return None
-
-    def jar(self):
-        """Path to the jar file for this Scalding Job
-        Either one of source() or jar() must be specified.
-        """
-        return None
-
-    def extra_jars(self):
-        """Extra jars for building and running this Scalding Job"""
-        return []
-
-    def job_class(self):
-        """optional main job class for this Scalding Job"""
-        return None
-
-    def job_runner(self):
-        return ScaldingJobRunner()
-
-    def atomic_output(self):
-        """If True, then rewrite output arguments to be temp locations and
-        atomically move them into place after the job finishes"""
-        return True
-
-    def requires(self):
-        return {}
-
-    def job_args(self):
-        """Extra arguments to pass to the Scalding job"""
-        return []
-
-    def args(self):
-        """returns an array of args to pass to the job."""
-        arglist = []
-        for k, v in self.requires_hadoop().iteritems():
-            arglist.append('--' + k)
-            arglist.extend([t.output().path for t in v])
-        arglist.extend(['--output', self.output()])
-        arglist.extend(self.job_args())
-        return arglist
+# Copyright (c) 2015 Spotify AB
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+import warnings
+
+from luigi.contrib.scalding import *
+
+warnings.warn("luigi.scalding has now moved to luigi.contrib.scalding", DeprecationWarning, stacklevel=3)
diff --git a/luigi/scheduler.py b/luigi/scheduler.py
index bfefc61ded..e885509ce2 100644
--- a/luigi/scheduler.py
+++ b/luigi/scheduler.py
@@ -13,24 +13,27 @@
 # the License.
 
 import collections
+import cPickle as pickle
 import datetime
 import functools
-import notifications
-import os
+import itertools
 import logging
+import os
 import time
-import cPickle as pickle
+
+import notifications
 import task_history as history
-logger = logging.getLogger("luigi.server")
+from task_status import DISABLED, DONE, FAILED, PENDING, RUNNING, SUSPENDED, UNKNOWN
 
-from task_status import PENDING, FAILED, DONE, RUNNING, SUSPENDED, UNKNOWN, DISABLED
+logger = logging.getLogger("luigi.server")
 
 
 class Scheduler(object):
-    ''' Abstract base class
+    """
+    Abstract base class.
 
     Note that the methods all take string arguments, not Task objects...
-    '''
+    """""
     add_task = NotImplemented
     get_work = NotImplemented
     ping = NotImplemented
@@ -47,7 +50,7 @@ class Scheduler(object):
     UPSTREAM_FAILED,
     UPSTREAM_DISABLED,
 )
-UPSTREAM_SEVERITY_KEY = lambda st: UPSTREAM_SEVERITY_ORDER.index(st)
+UPSTREAM_SEVERITY_KEY = UPSTREAM_SEVERITY_ORDER.index
 STATUS_TO_UPSTREAM_MAP = {
     FAILED: UPSTREAM_FAILED,
     RUNNING: UPSTREAM_RUNNING,
@@ -58,8 +61,10 @@ class Scheduler(object):
 
 # We're passing around this config a lot, so let's put it on an object
 SchedulerConfig = collections.namedtuple('SchedulerConfig', [
-        'retry_delay', 'remove_delay', 'worker_disconnect_delay',
-        'disable_failures', 'disable_window', 'disable_persist', 'disable_time'])
+    'retry_delay', 'remove_delay', 'worker_disconnect_delay',
+    'disable_failures', 'disable_window', 'disable_persist', 'disable_time',
+    'max_shown_tasks',
+])
 
 
 def fix_time(x):
@@ -72,27 +77,32 @@ def fix_time(x):
 
 
 class Failures(object):
-    """ This class tracks the number of failures in a given time window
+    """
+    This class tracks the number of failures in a given time window.
 
     Failures added are marked with the current timestamp, and this class counts
     the number of failures in a sliding time window ending at the present.
-
     """
 
     def __init__(self, window):
-        """ Initialize with the given window
+        """
+        Initialize with the given window.
 
-        :param window: how long to track failures for, as a float (number of seconds)
+        :param window: how long to track failures for, as a float (number of seconds).
         """
         self.window = window
         self.failures = collections.deque()
 
     def add_failure(self):
-        """ Add a failure event with the current timestamp """
+        """
+        Add a failure event with the current timestamp.
+        """
         self.failures.append(time.time())
 
     def num_failures(self):
-        """ Return the number of failures in the window """
+        """
+        Return the number of failures in the window.
+        """
         min_time = time.time() - self.window
 
         while self.failures and fix_time(self.failures[0]) < min_time:
@@ -101,14 +111,24 @@ def num_failures(self):
         return len(self.failures)
 
     def clear(self):
-        """ Clear the failure queue """
+        """
+        Clear the failure queue.
+        """
         self.failures.clear()
 
 
+def _get_default(x, default):
+    if x is not None:
+        return x
+    else:
+        return default
+
+
 class Task(object):
-    def __init__(self, id, status, deps, resources={}, priority=0, family='', params={},
+
+    def __init__(self, task_id, status, deps, resources=None, priority=0, family='', params=None,
                  disable_failures=None, disable_window=None):
-        self.id = id
+        self.id = task_id
         self.stakeholders = set()  # workers ids that are somehow related to this task (i.e. don't prune while any of these workers are still active)
         self.workers = set()  # workers ids that can perform task - task is 'BROKEN' if none of these workers are active
         if deps is None:
@@ -123,9 +143,9 @@ def __init__(self, id, status, deps, resources={}, priority=0, family='', params
         self.time_running = None  # Timestamp when picked up by worker
         self.expl = None
         self.priority = priority
-        self.resources = resources
+        self.resources = _get_default(resources, {})
         self.family = family
-        self.params = params
+        self.params = _get_default(params, {})
         self.disable_failures = disable_failures
         self.failures = Failures(disable_window)
         self.scheduler_disable_time = None
@@ -142,83 +162,14 @@ def has_excessive_failures(self):
     def can_disable(self):
         return self.disable_failures is not None
 
-    def re_enable(self):
-        self.scheduler_disable_time = None
-        self.status = FAILED
-        self.failures.clear()
-
-    def set_status(self, new_status, config):
-        # not sure why we have SUSPENDED, as it can never be set
-        if new_status == SUSPENDED:
-            new_status = PENDING
-
-        if new_status == DISABLED and self.status == RUNNING:
-            return
-
-        if self.status == DISABLED:
-            if new_status == DONE:
-                self.re_enable()
-
-            # don't allow workers to override a scheduler disable
-            elif self.scheduler_disable_time is not None:
-                return
-
-        if new_status == FAILED and self.can_disable():
-            self.add_failure()
-            if self.has_excessive_failures():
-                self.scheduler_disable_time = time.time()
-                new_status = DISABLED
-                notifications.send_error_email(
-                    'Luigi Scheduler: DISABLED {task} due to excessive failures'.format(task=self.id),
-                    '{task} failed {failures} times in the last {window} seconds, so it is being '
-                    'disabled for {persist} seconds'.format(
-                        failures=config.disable_failures,
-                        task=self.id,
-                        window=config.disable_window,
-                        persist=config.disable_persist,
-                        ))
-        elif new_status == DISABLED:
-            self.scheduler_disable_time = None
-
-        self.status = new_status
-
-    def prune(self, config):
-        remove = False
-
-        # Mark tasks with no remaining active stakeholders for deletion
-        if not self.stakeholders:
-            if self.remove is None:
-                logger.info("Task %r has stakeholders %r but none remain connected -> will remove task in %s seconds", self.id, self.stakeholders, config.remove_delay)
-                self.remove = time.time() + config.remove_delay
-
-        # If a running worker disconnects, tag all its jobs as FAILED and subject it to the same retry logic
-        if self.status == RUNNING and self.worker_running and self.worker_running not in self.stakeholders:
-            logger.info("Task %r is marked as running by disconnected worker %r -> marking as FAILED with retry delay of %rs", self.id, self.worker_running, config.retry_delay)
-            self.worker_running = None
-            self.set_status(FAILED, config)
-            self.retry = time.time() + config.retry_delay
-
-        # Re-enable task after the disable time expires
-        if self.status == DISABLED and self.scheduler_disable_time:
-            if time.time() - fix_time(self.scheduler_disable_time) > config.disable_time:
-                self.re_enable()
-
-        # Remove tasks that have no stakeholders
-        if self.remove and time.time() > self.remove:
-            logger.info("Removing task %r (no connected stakeholders)", self.id)
-            remove = True
-
-        # Reset FAILED tasks to PENDING if max timeout is reached, and retry delay is >= 0
-        if self.status == FAILED and config.retry_delay >= 0 and self.retry < time.time():
-            self.set_status(PENDING, config)
-
-        return remove
-
 
 class Worker(object):
-    """ Structure for tracking worker activity and keeping their references """
-    def __init__(self, id, last_active=None):
-        self.id = id
+    """
+    Structure for tracking worker activity and keeping their references.
+    """
+
+    def __init__(self, worker_id, last_active=None):
+        self.id = worker_id
         self.reference = None  # reference to the worker in the real world. (Currently a dict containing just the host)
         self.last_active = last_active  # seconds since epoch
         self.started = time.time()  # seconds since epoch
@@ -242,16 +193,18 @@ def __str__(self):
 
 
 class SimpleTaskState(object):
-    ''' Keep track of the current state and handle persistance
+    """
+    Keep track of the current state and handle persistance.
 
     The point of this class is to enable other ways to keep state, eg. by using a database
     These will be implemented by creating an abstract base class that this and other classes
     inherit from.
-    '''
+    """
 
     def __init__(self, state_path):
         self._state_path = state_path
         self._tasks = {}  # map from id to a Task object
+        self._status_tasks = collections.defaultdict(dict)
         self._active_workers = {}  # map from id to a Worker object
 
     def dump(self):
@@ -276,40 +229,134 @@ def load(self):
                 return
 
             self._tasks, self._active_workers = state
+            self._status_tasks = collections.defaultdict(dict)
+            for task in self._tasks.itervalues():
+                self._status_tasks[task.status][task.id] = task
 
             # Convert from old format
             # TODO: this is really ugly, we need something more future-proof
             # Every time we add an attribute to the Worker class, this code needs to be updated
             for k, v in self._active_workers.iteritems():
                 if isinstance(v, float):
-                    self._active_workers[k] = Worker(id=k, last_active=v)
+                    self._active_workers[k] = Worker(worker_id=k, last_active=v)
         else:
             logger.info("No prior state file exists at %s. Starting with clean slate", self._state_path)
 
-    def get_active_tasks(self):
-        for task in self._tasks.itervalues():
-            yield task
+    def get_active_tasks(self, status=None):
+        if status:
+            for task in self._status_tasks[status].itervalues():
+                yield task
+        else:
+            for task in self._tasks.itervalues():
+                yield task
+
+    def get_running_tasks(self):
+        return self._status_tasks[RUNNING].itervalues()
 
     def get_pending_tasks(self):
-        for task in self._tasks.itervalues():
-            if task.status in [PENDING, RUNNING]:
-                yield task
+        return itertools.chain.from_iterable(self._status_tasks[status].itervalues()
+                                             for status in [PENDING, RUNNING])
 
     def get_task(self, task_id, default=None, setdefault=None):
         if setdefault:
-            return self._tasks.setdefault(task_id, setdefault)
+            task = self._tasks.setdefault(task_id, setdefault)
+            self._status_tasks[task.status][task.id] = task
+            return task
         else:
             return self._tasks.get(task_id, default)
 
     def has_task(self, task_id):
         return task_id in self._tasks
 
+    def re_enable(self, task, config=None):
+        task.scheduler_disable_time = None
+        task.failures.clear()
+        if config:
+            self.set_status(task, FAILED, config)
+            task.failures.clear()
+
+    def set_status(self, task, new_status, config=None):
+        if new_status == FAILED:
+            assert config is not None
+
+        # not sure why we have SUSPENDED, as it can never be set
+        if new_status == SUSPENDED:
+            new_status = PENDING
+
+        if new_status == DISABLED and task.status == RUNNING:
+            return
+
+        if task.status == DISABLED:
+            if new_status == DONE:
+                self.re_enable(task)
+
+            # don't allow workers to override a scheduler disable
+            elif task.scheduler_disable_time is not None:
+                return
+
+        if new_status == FAILED and task.can_disable():
+            task.add_failure()
+            if task.has_excessive_failures():
+                task.scheduler_disable_time = time.time()
+                new_status = DISABLED
+                notifications.send_error_email(
+                    'Luigi Scheduler: DISABLED {task} due to excessive failures'.format(task=task.id),
+                    '{task} failed {failures} times in the last {window} seconds, so it is being '
+                    'disabled for {persist} seconds'.format(
+                        failures=config.disable_failures,
+                        task=task.id,
+                        window=config.disable_window,
+                        persist=config.disable_persist,
+                    ))
+        elif new_status == DISABLED:
+            task.scheduler_disable_time = None
+
+        self._status_tasks[task.status].pop(task.id)
+        self._status_tasks[new_status][task.id] = task
+        task.status = new_status
+
+    def prune(self, task, config):
+        remove = False
+
+        # Mark tasks with no remaining active stakeholders for deletion
+        if not task.stakeholders:
+            if task.remove is None:
+                logger.info("Task %r has stakeholders %r but none remain connected -> will remove "
+                            "task in %s seconds", task.id, task.stakeholders, config.remove_delay)
+                task.remove = time.time() + config.remove_delay
+
+        # If a running worker disconnects, tag all its jobs as FAILED and subject it to the same retry logic
+        if task.status == RUNNING and task.worker_running and task.worker_running not in task.stakeholders:
+            logger.info("Task %r is marked as running by disconnected worker %r -> marking as "
+                        "FAILED with retry delay of %rs", task.id, task.worker_running,
+                        config.retry_delay)
+            task.worker_running = None
+            self.set_status(task, FAILED, config)
+            task.retry = time.time() + config.retry_delay
+
+        # Re-enable task after the disable time expires
+        if task.status == DISABLED and task.scheduler_disable_time:
+            if time.time() - fix_time(task.scheduler_disable_time) > config.disable_time:
+                self.re_enable(task, config)
+
+        # Remove tasks that have no stakeholders
+        if task.remove and time.time() > task.remove:
+            logger.info("Removing task %r (no connected stakeholders)", task.id)
+            remove = True
+
+        # Reset FAILED tasks to PENDING if max timeout is reached, and retry delay is >= 0
+        if task.status == FAILED and config.retry_delay >= 0 and task.retry < time.time():
+            self.set_status(task, PENDING, config)
+
+        return remove
+
     def inactivate_tasks(self, delete_tasks):
         # The terminology is a bit confusing: we used to "delete" tasks when they became inactive,
         # but with a pluggable state storage, you might very well want to keep some history of
         # older tasks as well. That's why we call it "inactivate" (as in the verb)
         for task in delete_tasks:
-            self._tasks.pop(task)
+            task_obj = self._tasks.pop(task)
+            self._status_tasks[task_obj.status].pop(task)
 
     def get_active_workers(self, last_active_lt=None):
         for worker in self._active_workers.itervalues():
@@ -318,7 +365,7 @@ def get_active_workers(self, last_active_lt=None):
             yield worker
 
     def get_worker_ids(self):
-        return self._active_workers.keys() # only used for unit tests
+        return self._active_workers.keys()  # only used for unit tests
 
     def get_worker(self, worker_id):
         return self._active_workers.setdefault(worker_id, Worker(worker_id))
@@ -335,22 +382,24 @@ def inactivate_workers(self, delete_workers):
 
 
 class CentralPlannerScheduler(Scheduler):
-    ''' Async scheduler that can handle multiple workers etc
+    """
+    Async scheduler that can handle multiple workers, etc.
 
     Can be run locally or on a server (using RemoteScheduler + server.Server).
-    '''
+    """
 
     def __init__(self, retry_delay=900.0, remove_delay=600.0, worker_disconnect_delay=60.0,
                  state_path='/var/lib/luigi-server/state.pickle', task_history=None,
-                 resources=None, disable_persist=0, disable_window=0, disable_failures=None):
-        '''
+                 resources=None, disable_persist=0, disable_window=0, disable_failures=None,
+                 max_shown_tasks=100000):
+        """
         (all arguments are in seconds)
         Keyword Arguments:
-        retry_delay -- How long after a Task fails to try it again, or -1 to never retry
-        remove_delay -- How long after a Task finishes to remove it from the scheduler
-        state_path -- Path to state file (tasks and active workers)
-        worker_disconnect_delay -- If a worker hasn't communicated for this long, remove it from active workers
-        '''
+        :param retry_delay: how long after a Task fails to try it again, or -1 to never retry.
+        :param remove_delay: how long after a Task finishes to remove it from the scheduler.
+        :param state_path: path to state file (tasks and active workers).
+        :param worker_disconnect_delay: if a worker hasn't communicated for this long, remove it from active workers.
+        """
         self._config = SchedulerConfig(
             retry_delay=retry_delay,
             remove_delay=remove_delay,
@@ -358,11 +407,11 @@ def __init__(self, retry_delay=900.0, remove_delay=600.0, worker_disconnect_dela
             disable_failures=disable_failures,
             disable_window=disable_window,
             disable_persist=disable_persist,
-            disable_time=disable_persist)
+            disable_time=disable_persist,
+            max_shown_tasks=max_shown_tasks,
+        )
 
-        self._task_history = task_history or history.NopHistory()
         self._state = SimpleTaskState(state_path)
-
         self._task_history = task_history or history.NopHistory()
         self._resources = resources
         self._make_task = functools.partial(
@@ -387,7 +436,7 @@ def prune(self):
 
         remove_tasks = []
         for task in self._state.get_active_tasks():
-            if task.prune(self._config):
+            if self._state.prune(task, self._config):
                 remove_tasks.append(task.id)
 
         self._state.inactivate_tasks(remove_tasks)
@@ -395,15 +444,18 @@ def prune(self):
         logger.info("Done pruning task graph")
 
     def update(self, worker_id, worker_reference=None):
-        """ Keep track of whenever the worker was last active """
+        """
+        Keep track of whenever the worker was last active.
+        """
         worker = self._state.get_worker(worker_id)
         worker.update(worker_reference)
 
     def _update_priority(self, task, prio, worker):
-        """ Update priority of the given task
+        """
+        Update priority of the given task.
 
-        Priority can only be increased. If the task doesn't exist, a placeholder
-        task is created to preserve priority when the task is later scheduled.
+        Priority can only be increased.
+        If the task doesn't exist, a placeholder task is created to preserve priority when the task is later scheduled.
         """
         task.priority = prio = max(prio, task.priority)
         for dep in task.deps or []:
@@ -413,25 +465,25 @@ def _update_priority(self, task, prio, worker):
 
     def add_task(self, worker, task_id, status=PENDING, runnable=True,
                  deps=None, new_deps=None, expl=None, resources=None,
-                 priority=0, family='', params={}):
+                 priority=0, family='', params=None, **kwargs):
         """
-        * Add task identified by task_id if it doesn't exist
-        * If deps is not None, update dependency list
-        * Update status of task
-        * Add additional workers/stakeholders
-        * Update priority when needed
+        * add task identified by task_id if it doesn't exist
+        * if deps is not None, update dependency list
+        * update status of task
+        * add additional workers/stakeholders
+        * update priority when needed
         """
         self.update(worker)
 
         task = self._state.get_task(task_id, setdefault=self._make_task(
-                id=task_id, status=PENDING, deps=deps, resources=resources,
-                priority=priority, family=family, params=params))
+            task_id=task_id, status=PENDING, deps=deps, resources=resources,
+            priority=priority, family=family, params=params))
 
         # for setting priority, we'll sometimes create tasks with unset family and params
         if not task.family:
             task.family = family
         if not task.params:
-            task.params = params
+            task.params = _get_default(params, {})
 
         if task.remove is not None:
             task.remove = None  # unmark task for removal so it isn't removed after being added
@@ -443,7 +495,7 @@ def add_task(self, worker, task_id, status=PENDING, runnable=True,
                 # We also check for status == PENDING b/c that's the default value
                 # (so checking for status != task.status woule lie)
                 self._update_task_history(task_id, status)
-            task.set_status(PENDING if status == SUSPENDED else status, self._config)
+            self._state.set_status(task, PENDING if status == SUSPENDED else status, self._config)
             if status == FAILED:
                 task.retry = time.time() + self._config.retry_delay
 
@@ -459,7 +511,7 @@ def add_task(self, worker, task_id, status=PENDING, runnable=True,
         # Task dependencies might not exist yet. Let's create dummy tasks for them for now.
         # Otherwise the task dependencies might end up being pruned if scheduling takes a long time
         for dep in task.deps or []:
-            t = self._state.get_task(dep, setdefault=self._make_task(id=dep, status=UNKNOWN, deps=None, priority=priority))
+            t = self._state.get_task(dep, setdefault=self._make_task(task_id=dep, status=UNKNOWN, deps=None, priority=priority))
             t.stakeholders.add(worker)
 
         self._update_priority(task, priority, worker)
@@ -470,7 +522,7 @@ def add_task(self, worker, task_id, status=PENDING, runnable=True,
         if expl is not None:
             task.expl = expl
 
-    def add_worker(self, worker, info):
+    def add_worker(self, worker, info, **kwargs):
         self._state.get_worker(worker).add_info(info)
 
     def update_resources(self, **resources):
@@ -483,7 +535,7 @@ def _has_resources(self, needed_resources, used_resources):
             return True
 
         available_resources = self._resources or {}
-        for resource, amount in needed_resources.items():
+        for resource, amount in needed_resources.iteritems():
             if amount + used_resources[resource] > available_resources.get(resource, 1):
                 return False
         return True
@@ -493,17 +545,22 @@ def _used_resources(self):
         if self._resources is not None:
             for task in self._state.get_active_tasks():
                 if task.status == RUNNING and task.resources:
-                    for resource, amount in task.resources.items():
+                    for resource, amount in task.resources.iteritems():
                         used_resources[resource] += amount
         return used_resources
 
     def _rank(self):
-        ''' Return worker's rank function for task scheduling '''
+        """
+        Return worker's rank function for task scheduling.
+
+        :return:
+        """
         dependents = collections.defaultdict(int)
+
         def not_done(t):
             task = self._state.get_task(t, default=None)
             return task is None or task.status != DONE
-        for task in self._state.get_active_tasks():
+        for task in self._state.get_pending_tasks():
             if task.status != DONE:
                 deps = filter(not_done, task.deps)
                 inverse_num_deps = 1.0 / max(len(deps), 1)
@@ -521,7 +578,7 @@ def _schedulable(self, task):
                 return False
         return True
 
-    def get_work(self, worker, host=None):
+    def get_work(self, worker, host=None, **kwargs):
         # TODO: remove any expired nodes
 
         # Algo: iterate over all nodes, find the highest priority node no dependencies and available
@@ -568,7 +625,7 @@ def get_work(self, worker, host=None):
 
             if task.status == RUNNING and task.worker_running in greedy_workers:
                 greedy_workers[task.worker_running] -= 1
-                for resource, amount in (task.resources or {}).items():
+                for resource, amount in (task.resources or {}).iteritems():
                     greedy_resources[resource] += amount
 
             if not best_task and self._schedulable(task) and self._has_resources(task.resources, greedy_resources):
@@ -582,13 +639,13 @@ def get_work(self, worker, host=None):
                             greedy_workers[task_worker] -= 1
 
                             # keep track of the resources used in greedy scheduling
-                            for resource, amount in (task.resources or {}).items():
+                            for resource, amount in (task.resources or {}).iteritems():
                                 greedy_resources[resource] += amount
 
                             break
 
         if best_task:
-            best_task.status = RUNNING
+            self._state.set_status(best_task, RUNNING, self._config)
             best_task.worker_running = worker
             best_task.time_running = time.time()
             self._update_task_history(best_task.id, RUNNING, host=host)
@@ -598,7 +655,7 @@ def get_work(self, worker, host=None):
                 'task_id': best_task_id,
                 'running_tasks': running_tasks}
 
-    def ping(self, worker):
+    def ping(self, worker, **kwargs):
         self.update(worker)
 
     def _upstream_status(self, task_id, upstream_status_table):
@@ -621,7 +678,7 @@ def _upstream_status(self, task_id, upstream_status_table):
                     elif upstream_status_table[dep_id] == '' and dep.deps:
                         # This is the postorder update step when we set the
                         # status based on the previously calculated child elements
-                        upstream_status = [upstream_status_table.get(id, '') for id in dep.deps]
+                        upstream_status = [upstream_status_table.get(task_id, '') for task_id in dep.deps]
                         upstream_status.append('')  # to handle empty list
                         status = max(upstream_status, key=UPSTREAM_SEVERITY_KEY)
                         upstream_status_table[dep_id] = status
@@ -645,7 +702,7 @@ def _serialize_task(self, task_id, include_deps=True):
             ret['deps'] = list(task.deps)
         return ret
 
-    def graph(self):
+    def graph(self, **kwargs):
         self.prune()
         serialized = {}
         for task in self._state.get_active_tasks():
@@ -678,27 +735,30 @@ def _recurse_deps(self, task_id, serialized):
                 for dep in task.deps:
                     self._recurse_deps(dep, serialized)
 
-    def dep_graph(self, task_id):
+    def dep_graph(self, task_id, **kwargs):
         self.prune()
         serialized = {}
         if self._state.has_task(task_id):
             self._recurse_deps(task_id, serialized)
         return serialized
 
-    def task_list(self, status, upstream_status):
-        ''' query for a subset of tasks by status '''
+    def task_list(self, status, upstream_status, limit=True, **kwargs):
+        """
+        Query for a subset of tasks by status.
+        """
         self.prune()
         result = {}
         upstream_status_table = {}  # used to memoize upstream status
-        for task in self._state.get_active_tasks():
-            if not status or task.status == status:
-                if (task.status != PENDING or not upstream_status or
+        for task in self._state.get_active_tasks(status):
+            if (task.status != PENDING or not upstream_status or
                     upstream_status == self._upstream_status(task.id, upstream_status_table)):
-                    serialized = self._serialize_task(task.id, False)
-                    result[task.id] = serialized
+                serialized = self._serialize_task(task.id, False)
+                result[task.id] = serialized
+        if limit and len(result) > self._config.max_shown_tasks:
+            return {'num_tasks': len(result)}
         return result
 
-    def worker_list(self, include_running=True):
+    def worker_list(self, include_running=True, **kwargs):
         self.prune()
         workers = [
             dict(
@@ -728,7 +788,7 @@ def worker_list(self, include_running=True):
                 worker['running'] = tasks
         return workers
 
-    def inverse_dependencies(self, task_id):
+    def inverse_dependencies(self, task_id, **kwargs):
         self.prune()
         serialized = {}
         if self._state.has_task(task_id):
@@ -748,8 +808,13 @@ def _traverse_inverse_deps(self, task_id, serialized):
                         serialized[task.id]["deps"] = []
                         stack.append(task.id)
 
-    def task_search(self, task_str):
-        ''' query for a subset of tasks by task_id '''
+    def task_search(self, task_str, **kwargs):
+        """
+        Query for a subset of tasks by task_id.
+
+        :param task_str:
+        :return:
+        """
         self.prune()
         result = collections.defaultdict(dict)
         for task in self._state.get_active_tasks():
@@ -762,11 +827,11 @@ def re_enable_task(self, task_id):
         serialized = {}
         task = self._state.get_task(task_id)
         if task and task.status == DISABLED and task.scheduler_disable_time:
-            task.re_enable()
+            self._state.re_enable(task, self._config)
             serialized = self._serialize_task(task_id)
         return serialized
 
-    def fetch_error(self, task_id):
+    def fetch_error(self, task_id, **kwargs):
         if self._state.has_task(task_id):
             return {"taskId": task_id, "error": self._state.get_task(task_id).expl}
         else:
diff --git a/luigi/server.py b/luigi/server.py
index cddbfa71cd..430737deda 100644
--- a/luigi/server.py
+++ b/luigi/server.py
@@ -13,23 +13,25 @@
 # the License.
 
 # Simple REST server that takes commands in a JSON payload
-import json
-import os
 import atexit
+import json
+import logging
 import mimetypes
+import os
 import posixpath
+import signal
+
+import pkg_resources
+import tornado.httpclient
+import tornado.httpserver
 import tornado.ioloop
 import tornado.netutil
 import tornado.web
-import tornado.httpclient
-import tornado.httpserver
+
 import configuration
 import scheduler
-import pkg_resources
-import signal
-from rpc import RemoteSchedulerResponder
 import task_history
-import logging
+
 logger = logging.getLogger("luigi.server")
 
 
@@ -45,6 +47,7 @@ def _create_scheduler():
     disable_window = config.getint('scheduler', 'disable-window-seconds', 3600)
     disable_failures = config.getint('scheduler', 'disable-num-failures', None)
     disable_persist = config.getint('scheduler', 'disable-persist-seconds', 86400)
+    max_shown_tasks = config.getint('scheduler', 'max-shown-tasks', 100000)
 
     resources = config.getintdict('resources')
     if config.getboolean('scheduler', 'record_task_history', False):
@@ -54,21 +57,28 @@ def _create_scheduler():
         task_history_impl = task_history.NopHistory()
     return scheduler.CentralPlannerScheduler(
         retry_delay, remove_delay, worker_disconnect_delay, state_path, task_history_impl,
-        resources, disable_persist, disable_window, disable_failures)
+        resources, disable_persist, disable_window, disable_failures, max_shown_tasks,
+    )
 
 
 class RPCHandler(tornado.web.RequestHandler):
-    """ Handle remote scheduling calls using rpc.RemoteSchedulerResponder"""
+    """
+    Handle remote scheduling calls using rpc.RemoteSchedulerResponder.
+    """
 
-    def initialize(self, api):
-        self._api = api
+    def initialize(self, scheduler):
+        self._scheduler = scheduler
 
     def get(self, method):
         payload = self.get_argument('data', default="{}")
         arguments = json.loads(payload)
 
-        if hasattr(self._api, method):
-            result = getattr(self._api, method)(**arguments)
+        # TODO: we should probably denote all methods on the scheduler that are "API-level"
+        # versus internal methods. Right now you can do a REST method call to any method
+        # defined on the scheduler, which is pretty bad from a security point of view.
+
+        if hasattr(self._scheduler, method):
+            result = getattr(self._scheduler, method)(**arguments)
             self.write({"response": result})  # wrap all json response in a dictionary
         else:
             self.send_error(404)
@@ -77,40 +87,46 @@ def get(self, method):
 
 
 class BaseTaskHistoryHandler(tornado.web.RequestHandler):
-    def initialize(self, api):
-        self._api = api
+
+    def initialize(self, scheduler):
+        self._scheduler = scheduler
 
     def get_template_path(self):
         return pkg_resources.resource_filename(__name__, 'templates')
 
 
 class RecentRunHandler(BaseTaskHistoryHandler):
+
     def get(self):
-        tasks = self._api.task_history.find_latest_runs()
+        tasks = self._scheduler.task_history.find_latest_runs()
         self.render("recent.html", tasks=tasks)
 
 
 class ByNameHandler(BaseTaskHistoryHandler):
+
     def get(self, name):
-        tasks = self._api.task_history.find_all_by_name(name)
+        tasks = self._scheduler.task_history.find_all_by_name(name)
         self.render("recent.html", tasks=tasks)
 
 
 class ByIdHandler(BaseTaskHistoryHandler):
+
     def get(self, id):
-        task = self._api.task_history.find_task_by_id(id)
+        task = self._scheduler.task_history.find_task_by_id(id)
         self.render("show.html", task=task)
 
 
 class ByParamsHandler(BaseTaskHistoryHandler):
+
     def get(self, name):
         payload = self.get_argument('data', default="{}")
         arguments = json.loads(payload)
-        tasks = self._api.task_history.find_all_by_parameters(name, session=None, **arguments)
+        tasks = self._scheduler.task_history.find_all_by_parameters(name, session=None, **arguments)
         self.render("recent.html", tasks=tasks)
 
 
 class StaticFileHandler(tornado.web.RequestHandler):
+
     def get(self, path):
         # Path checking taken from Flask's safe_join function:
         # https://github.com/mitsuhiko/flask/blob/1d55b8983/flask/helpers.py#L563-L587
@@ -126,27 +142,29 @@ def get(self, path):
 
 
 class RootPathHandler(tornado.web.RequestHandler):
+
     def get(self):
         self.redirect("/static/visualiser/index.html")
 
 
-def app(api):
+def app(scheduler):
     handlers = [
-        (r'/api/(.*)', RPCHandler, {"api": api}),
+        (r'/api/(.*)', RPCHandler, {"scheduler": scheduler}),
         (r'/static/(.*)', StaticFileHandler),
         (r'/', RootPathHandler),
-        (r'/history', RecentRunHandler, {'api': api}),
-        (r'/history/by_name/(.*?)', ByNameHandler, {'api': api}),
-        (r'/history/by_id/(.*?)', ByIdHandler, {'api': api}),
-        (r'/history/by_params/(.*?)', ByParamsHandler, {'api': api})
+        (r'/history', RecentRunHandler, {'scheduler': scheduler}),
+        (r'/history/by_name/(.*?)', ByNameHandler, {'scheduler': scheduler}),
+        (r'/history/by_id/(.*?)', ByIdHandler, {'scheduler': scheduler}),
+        (r'/history/by_params/(.*?)', ByParamsHandler, {'scheduler': scheduler})
     ]
     api_app = tornado.web.Application(handlers)
     return api_app
 
 
-def _init_api(sched, responder, api_port, address):
-    api = responder or RemoteSchedulerResponder(sched)
-    api_app = app(api)
+def _init_api(sched, responder=None, api_port=None, address=None):
+    if responder:
+        raise Exception('The "responder" argument is no longer supported')
+    api_app = app(sched)
     api_sockets = tornado.netutil.bind_sockets(api_port, address=address)
     server = tornado.httpserver.HTTPServer(api_app)
     server.add_sockets(api_sockets)
@@ -156,7 +174,9 @@ def _init_api(sched, responder, api_port, address):
 
 
 def run(api_port=8082, address=None, scheduler=None, responder=None):
-    """ Runs one instance of the API server """
+    """
+    Runs one instance of the API server.
+    """
     sched = scheduler or _create_scheduler()
     # load scheduler state
     sched.load()
@@ -175,9 +195,9 @@ def shutdown_handler(foo=None, bar=None):
     signal.signal(signal.SIGINT, shutdown_handler)
     signal.signal(signal.SIGTERM, shutdown_handler)
     if os.name == 'nt':
-            signal.signal(signal.SIGBREAK, shutdown_handler)
+        signal.signal(signal.SIGBREAK, shutdown_handler)
     else:
-            signal.signal(signal.SIGQUIT, shutdown_handler)
+        signal.signal(signal.SIGQUIT, shutdown_handler)
     atexit.register(shutdown_handler)
 
     logger.info("Scheduler starting up")
@@ -186,10 +206,17 @@ def shutdown_handler(foo=None, bar=None):
 
 
 def run_api_threaded(api_port=8082, address=None):
-    ''' For integration tests'''
+    """
+    For integration tests.
+
+    :param api_port:
+    :param address:
+    :return:
+    """
     sock_names = _init_api(_create_scheduler(), None, api_port, address)
 
     import threading
+
     def scheduler_thread():
         # this is wrapped in a function so we get the instance
         # from the scheduler thread and not from the main thread
diff --git a/luigi/static/visualiser/index.html b/luigi/static/visualiser/index.html
index f479865c6a..151ac630b5 100644
--- a/luigi/static/visualiser/index.html
+++ b/luigi/static/visualiser/index.html
@@ -82,6 +82,9 @@ <h4>
             </div>
             {{/tasks}}
         </script>
+        <script type="text/template" name="rowCountTemplate">
+            <h4>Too many tasks to display: {{num_tasks}}</h4>
+        </script>
         <script type="text/template" name="errorTemplate">
             <div class="modal-header">
                 <h3>Traceback for {{taskId}}</h3>
diff --git a/luigi/static/visualiser/js/visualiserApp.js b/luigi/static/visualiser/js/visualiserApp.js
index 3a39b6378d..bd3e5e320f 100644
--- a/luigi/static/visualiser/js/visualiserApp.js
+++ b/luigi/static/visualiser/js/visualiserApp.js
@@ -185,8 +185,16 @@ function visualiserApp(luigi) {
     }
 
     function getTaskList(id, tasks, expand) {
-        $(id).append(renderTasks(tasks));
-        $(id).prev("h3").append(" (" + tasks.length + ")");
+        if (tasks.length == 1 && typeof(tasks[0]) === "number") {
+            var length = tasks[0];
+            var rendered = renderTemplate("rowCountTemplate", {'num_tasks': length});
+            $(id).parent().addClass('emptyTaskGroup');
+        } else {
+            var length = tasks.length;
+            var rendered = renderTasks(tasks);
+        }
+        $(id).append(rendered);
+        $(id).prev("h3").append(" (" + length + ")");
         bindTaskEvents(id, expand);
         filterTasks();
     }
@@ -215,7 +223,7 @@ function visualiserApp(luigi) {
     }
 
     function updateCount() {
-        taskGroups = $('#taskList .taskGroup');
+        taskGroups = $('#taskList .taskGroup:not(.emptyTaskGroup)');
         for (i=0; i<taskGroups.length; i++) {
             groupCount = 0;
 
diff --git a/luigi/target.py b/luigi/target.py
index ff3462fed1..4a708f18d8 100644
--- a/luigi/target.py
+++ b/luigi/target.py
@@ -14,11 +14,13 @@
 
 import abc
 import logging
+
 logger = logging.getLogger('luigi-interface')
 
 
 class Target(object):
-    """A Target is a resource generated by a :py:class:`~luigi.Task`.
+    """
+    A Target is a resource generated by a :py:class:`~luigi.Task`.
 
     For example, a Target might correspond to a file in HDFS or data in a database. The Target
     interface defines one method that must be overridden: :py:meth:`exists`, which signifies if the
@@ -31,31 +33,38 @@ class Target(object):
 
     @abc.abstractmethod
     def exists(self):
-        """Returns ``True`` if the :py:class:`Target` exists and ``False`` otherwise.
+        """
+        Returns ``True`` if the :py:class:`Target` exists and ``False`` otherwise.
         """
         pass
 
 
 class FileSystemException(Exception):
-    """Base class for generic file system exceptions. """
+    """
+    Base class for generic file system exceptions.
+    """
     pass
 
 
 class FileAlreadyExists(FileSystemException):
-    """Raised when a file system operation can't be performed because a directory exists but is
-    required to not exist.
+    """
+    Raised when a file system operation can't be performed because
+    a directory exists but is required to not exist.
     """
     pass
 
 
 class MissingParentDirectory(FileSystemException):
-    """Raised when a parent directory doesn't exist. (Imagine mkdir without -p)
+    """
+    Raised when a parent directory doesn't exist.
+    (Imagine mkdir without -p)
     """
     pass
 
 
 class FileSystem(object):
-    """FileSystem abstraction used in conjunction with :py:class:`FileSystemTarget`.
+    """
+    FileSystem abstraction used in conjunction with :py:class:`FileSystemTarget`.
 
     Typically, a FileSystem is associated with instances of a :py:class:`FileSystemTarget`. The
     instances of the py:class:`FileSystemTarget` will delegate methods such as
@@ -68,14 +77,15 @@ class FileSystem(object):
 
     @abc.abstractmethod
     def exists(self, path):
-        """ Return ``True`` if file or directory at ``path`` exist, ``False`` otherwise
+        """
+        Return ``True`` if file or directory at ``path`` exist, ``False`` otherwise
 
         :param str path: a path within the FileSystem to check for existence.
         """
         pass
 
     @abc.abstractmethod
-    def remove(self, path, recursive=True):
+    def remove(self, path, recursive=True, skip_trash=True):
         """ Remove file or directory at location ``path``
 
         :param str path: a path within the FileSystem to remove.
@@ -85,7 +95,8 @@ def remove(self, path, recursive=True):
         pass
 
     def mkdir(self, path, parents=True, raise_if_exists=False):
-        """ Create directory at location ``path``
+        """
+        Create directory at location ``path``
 
         Creates the directory at ``path`` and implicitly create parent
         directories if they do not already exist.
@@ -98,14 +109,15 @@ def mkdir(self, path, parents=True, raise_if_exists=False):
                                      the folder already exists.
 
         *Note*: This method is optional, not all FileSystem subclasses implements it.
+
         *Note*: parents and raise_if_exists were added in August 2014. Some
                 implementations might not support these flags yet.
-
         """
         raise NotImplementedError("mkdir() not implemented on {0}".format(self.__class__.__name__))
 
     def isdir(self, path):
-        """Return ``True`` if the location at ``path`` is a directory. If not, return ``False``.
+        """
+        Return ``True`` if the location at ``path`` is a directory. If not, return ``False``.
 
         :param str path: a path within the FileSystem to check as a directory.
 
@@ -115,7 +127,8 @@ def isdir(self, path):
 
 
 class FileSystemTarget(Target):
-    """Base class for FileSystem Targets like LocalTarget and HdfsTarget.
+    """
+    Base class for FileSystem Targets like LocalTarget and HdfsTarget.
 
     A FileSystemTarget has an associated :py:class:`FileSystem` to which certain operations can be
     delegated. By default, :py:meth:`exists` and :py:meth:`remove` are delegated to the
@@ -127,18 +140,23 @@ class FileSystemTarget(Target):
 
     def __init__(self, path):
         """
+        Initializes a FileSystemTarget instance.
+
         :param str path: the path associated with this FileSystemTarget.
         """
         self.path = path
 
     @abc.abstractproperty
     def fs(self):
-        """The :py:class:`FileSystem` associated with this FileSystemTarget."""
+        """
+        The :py:class:`FileSystem` associated with this FileSystemTarget.
+        """
         raise
 
     @abc.abstractmethod
     def open(self, mode):
-        """Open the FileSystem target.
+        """
+        Open the FileSystem target.
 
         This method returns a file-like object which can either be read from or written to depending
         on the specified mode.
@@ -150,7 +168,8 @@ def open(self, mode):
         pass
 
     def exists(self):
-        """Returns ``True`` if the path for this FileSystemTarget exists and ``False`` otherwise.
+        """
+        Returns ``True`` if the path for this FileSystemTarget exists; ``False`` otherwise.
 
         This method is implemented by using :py:meth:`fs`.
         """
@@ -161,7 +180,8 @@ def exists(self):
         return self.fs.exists(path)
 
     def remove(self):
-        """Remove the resource at the path specified by this FileSystemTarget.
+        """
+        Remove the resource at the path specified by this FileSystemTarget.
 
         This method is implemented by using :py:meth:`fs`.
         """
diff --git a/luigi/task.py b/luigi/task.py
index 25d613bc8d..df8523618e 100644
--- a/luigi/task.py
+++ b/luigi/task.py
@@ -13,19 +13,20 @@
 # the License.
 
 import abc
+import itertools
 import logging
-import parameter
-import warnings
 import traceback
-import itertools
-import pyparsing as pp
+import warnings
+
+import parameter
 
 Parameter = parameter.Parameter
 logger = logging.getLogger('luigi-interface')
 
 
 def namespace(namespace=None):
-    """ Call to set namespace of tasks declared after the call.
+    """
+    Call to set namespace of tasks declared after the call.
 
     If called without arguments or with ``None`` as the namespace, the namespace
     is reset, which is recommended to do at the end of any file where the
@@ -36,48 +37,16 @@ def namespace(namespace=None):
 
 
 def id_to_name_and_params(task_id):
-    ''' Turn a task_id into a (task_family, {params}) tuple.
-        E.g. calling with ``Foo(bar=bar, baz=baz)`` returns
-        ``('Foo', {'bar': 'bar', 'baz': 'baz'})``
-    '''
-    name_chars = pp.alphanums + '_'
-    parameter = (
-        (pp.Word(name_chars) +
-         pp.Literal('=').suppress() +
-         ((pp.Literal('(').suppress() | pp.Literal('[').suppress()) +
-          pp.ZeroOrMore(pp.Word(name_chars) +
-                        pp.ZeroOrMore(pp.Literal(',')).suppress()) +
-          (pp.Literal(')').suppress() |
-           pp.Literal(']').suppress()))).setResultsName('list_params',
-                                                        listAllMatches=True) |
-        (pp.Word(name_chars) +
-         pp.Literal('=').suppress() +
-         pp.Word(name_chars)).setResultsName('params', listAllMatches=True))
-
-    parser = (
-        pp.Word(name_chars).setResultsName('task') +
-        pp.Literal('(').suppress() +
-        pp.ZeroOrMore(parameter + (pp.Literal(',')).suppress()) +
-        pp.ZeroOrMore(parameter) +
-        pp.Literal(')').suppress())
-
-    parsed = parser.parseString(task_id).asDict()
-    task_name = parsed['task']
-
-    params = {}
-    if 'params' in parsed:
-        for k, v in parsed['params']:
-            params[k] = v
-    if 'list_params' in parsed:
-        for x in parsed['list_params']:
-            params[x[0]] = x[1:]
-    return task_name, params
+    # DEPRECATED
+    import luigi.tools.parse_task
+    return luigi.tools.parse_task.id_to_name_and_params(task_id)
 
 
 class Register(abc.ABCMeta):
     """
-    The Metaclass of :py:class:`Task`. Acts as a global registry of Tasks with
-    the following properties:
+    The Metaclass of :py:class:`Task`.
+
+    Acts as a global registry of Tasks with the following properties:
 
     1. Cache instances of objects so that eg. ``X(1, 2, 3)`` always returns the
        same object.
@@ -92,9 +61,12 @@ class Register(abc.ABCMeta):
     an error."""
 
     def __new__(metacls, classname, bases, classdict):
-        """ Custom class creation for namespacing. Also register all subclasses
+        """
+        Custom class creation for namespacing.
+
+        Also register all subclasses.
 
-        Set the task namespace to whatever the currently declared namespace is
+        Set the task namespace to whatever the currently declared namespace is.
         """
         if "task_namespace" not in classdict:
             classdict["task_namespace"] = metacls._default_namespace
@@ -105,16 +77,18 @@ def __new__(metacls, classname, bases, classdict):
         return cls
 
     def __call__(cls, *args, **kwargs):
-        """ Custom class instantiation utilizing instance cache.
+        """
+        Custom class instantiation utilizing instance cache.
 
         If a Task has already been instantiated with the same parameters,
-        the previous instance is returned to reduce number of object instances."""
+        the previous instance is returned to reduce number of object instances.
+        """
         def instantiate():
             return super(Register, cls).__call__(*args, **kwargs)
 
         h = Register.__instance_cache
 
-        if h == None:  # disabled
+        if h is None:  # disabled
             return instantiate()
 
         params = cls.get_params()
@@ -134,18 +108,23 @@ def instantiate():
         return h[k]
 
     @classmethod
-    def clear_instance_cache(self):
-        """Clear/Reset the instance cache."""
+    def clear_instance_cache(cls):
+        """
+        Clear/Reset the instance cache.
+        """
         Register.__instance_cache = {}
 
     @classmethod
-    def disable_instance_cache(self):
-        """Disables the instance cache."""
+    def disable_instance_cache(cls):
+        """
+        Disables the instance cache.
+        """
         Register.__instance_cache = None
 
     @property
     def task_family(cls):
-        """The task family for the given class.
+        """
+        The task family for the given class.
 
         If ``cls.task_namespace is None`` then it's the name of the class.
         Otherwise, ``<task_namespace>.`` is prefixed to the class name.
@@ -156,7 +135,7 @@ def task_family(cls):
             return "%s.%s" % (cls.task_namespace, cls.__name__)
 
     @classmethod
-    def get_reg(cls):
+    def get_reg(cls, include_config_without_section=False):
         """Return all of the registery classes.
 
         :return:  a ``dict`` of task_family -> class
@@ -164,29 +143,35 @@ def get_reg(cls):
         # We have to do this on-demand in case task names have changed later
         reg = {}
         for cls in cls._reg:
-            if cls.run != NotImplemented:
-                name = cls.task_family
-                if name in reg and reg[name] != cls and \
-                        reg[name] != cls.AMBIGUOUS_CLASS and \
-                        not issubclass(cls, reg[name]):
-                    # Registering two different classes - this means we can't instantiate them by name
-                    # The only exception is if one class is a subclass of the other. In that case, we
-                    # instantiate the most-derived class (this fixes some issues with decorator wrappers).
-                    reg[name] = cls.AMBIGUOUS_CLASS
-                else:
-                    reg[name] = cls
+            if cls.run == NotImplemented:
+                continue
+            if issubclass(cls, ConfigWithoutSection) and not include_config_without_section:
+                continue
+            name = cls.task_family
+
+            if name in reg and reg[name] != cls and \
+                    reg[name] != cls.AMBIGUOUS_CLASS and \
+                    not issubclass(cls, reg[name]):
+                # Registering two different classes - this means we can't instantiate them by name
+                # The only exception is if one class is a subclass of the other. In that case, we
+                # instantiate the most-derived class (this fixes some issues with decorator wrappers).
+                reg[name] = cls.AMBIGUOUS_CLASS
+            else:
+                reg[name] = cls
 
         return reg
 
     @classmethod
     def tasks_str(cls):
-        """Human-readable register contents dump.
+        """
+        Human-readable register contents dump.
         """
         return repr(sorted(Register.get_reg().keys()))
 
     @classmethod
     def get_task_cls(cls, name):
-        """Returns an unambiguous class or raises an exception.
+        """
+        Returns an unambiguous class or raises an exception.
         """
         task_cls = Register.get_reg().get(name)
         if not task_cls:
@@ -196,30 +181,26 @@ def get_task_cls(cls, name):
         return task_cls
 
     @classmethod
-    def get_global_params(cls):
-        """Compiles and returns the global parameters for all :py:class:`Task`.
+    def get_all_params(cls):
+        """
+        Compiles and returns all parameters for all :py:class:`Task`.
 
         :return: a ``dict`` of parameter name -> parameter.
         """
-        global_params = {}
-        for t_name, t_cls in cls.get_reg().iteritems():
-            if t_cls == cls.AMBIGUOUS_CLASS:
+        for task_name, task_cls in cls.get_reg(include_config_without_section=True).iteritems():
+            if task_cls == cls.AMBIGUOUS_CLASS:
                 continue
-            for param_name, param_obj in t_cls.get_global_params():
-                if param_name in global_params and global_params[param_name] != param_obj:
-                    # Could be registered multiple times in case there's subclasses
-                    raise Exception('Global parameter %r registered by multiple classes' % param_name)
-                global_params[param_name] = param_obj
-        return global_params.iteritems()
-
+            for param_name, param_obj in task_cls.get_params():
+                yield task_name, issubclass(task_cls, ConfigWithoutSection), param_name, param_obj
 
 
 class Task(object):
     """
     This is the base class of all Luigi Tasks, the base unit of work in Luigi.
 
-    A Luigi Task describes a unit or work. The key methods of a Task, which must
-    be implemented in a subclass are:
+    A Luigi Task describes a unit or work.
+
+    The key methods of a Task, which must be implemented in a subclass are:
 
     * :py:meth:`run` - the computation done by this task.
     * :py:meth:`requires` - the list of Tasks that this Task depends on.
@@ -227,6 +208,8 @@ class Task(object):
 
     Parameters to the Task should be declared as members of the class, e.g.::
 
+    .. code-block:: python
+
         class MyTask(luigi.Task):
             count = luigi.IntParameter()
 
@@ -258,17 +241,23 @@ class MyTask(luigi.Task):
     # task requires 1 unit of the scp resource.
     resources = {}
 
+    # Number of seconds after which to time out the run function. No timeout if set to 0. Defaults
+    # to 0 or value in client.cfg
+    worker_timeout = None
+
     @classmethod
     def event_handler(cls, event):
-        """ Decorator for adding event handlers """
+        """
+        Decorator for adding event handlers.
+        """
         def wrapped(callback):
             cls._event_callbacks.setdefault(cls, {}).setdefault(event, set()).add(callback)
             return callback
         return wrapped
 
     def trigger_event(self, event, *args, **kwargs):
-        """Trigger that calls all of the specified events associated with this
-        class.
+        """
+        Trigger that calls all of the specified events associated with this class.
         """
         for event_class, event_callbacks in self._event_callbacks.iteritems():
             if not isinstance(self, event_class):
@@ -281,18 +270,25 @@ def trigger_event(self, event, *args, **kwargs):
                     return
                 except:
                     logger.exception("Error in event callback for %r", event)
-                    pass
+
+    @property
+    def task_module(self):
+        # Returns what Python module to import to get access to this class
+        # TODO(erikbern): we should think about a language-agnostic mechanism
+        return self.__class__.__module__
 
     @property
     def task_family(self):
-        """Convenience method since a property on the metaclass isn't directly
-        accessible through the class instances.
+        """
+        Convenience method since a property on the metaclass isn't directly accessible through the class instances.
         """
         return self.__class__.task_family
 
     @classmethod
     def get_params(cls):
-        """Returns all of the Parameters for this Task."""
+        """
+        Returns all of the Parameters for this Task.
+        """
         # We want to do this here and not at class instantiation, or else there is no room to extend classes dynamically
         params = []
         for param_name in dir(cls):
@@ -306,19 +302,10 @@ def get_params(cls):
         params.sort(key=lambda t: t[1].counter)
         return params
 
-    @classmethod
-    def get_global_params(cls):
-        """Return the global parameters for this Task."""
-        return [(param_name, param_obj) for param_name, param_obj in cls.get_params() if param_obj.is_global]
-
-    @classmethod
-    def get_nonglobal_params(cls):
-        """Return the non-global parameters for this Task."""
-        return [(param_name, param_obj) for param_name, param_obj in cls.get_params() if not param_obj.is_global]
-
     @classmethod
     def get_param_values(cls, params, args, kwargs):
-        """Get the values of the parameters from the args and kwargs.
+        """
+        Get the values of the parameters from the args and kwargs.
 
         :param params: list of (param_name, Parameter).
         :param args: positional arguments
@@ -329,12 +316,14 @@ def get_param_values(cls, params, args, kwargs):
 
         params_dict = dict(params)
 
+        task_name = cls.task_family
+
         # In case any exceptions are thrown, create a helpful description of how the Task was invoked
         # TODO: should we detect non-reprable arguments? These will lead to mysterious errors
-        exc_desc = '%s[args=%s, kwargs=%s]' % (cls.__name__, args, kwargs)
+        exc_desc = '%s[args=%s, kwargs=%s]' % (task_name, args, kwargs)
 
         # Fill in the positional arguments
-        positional_params = [(n, p) for n, p in params if not p.is_global]
+        positional_params = [(n, p) for n, p in params]
         for i, arg in enumerate(args):
             if i >= len(positional_params):
                 raise parameter.UnknownParameterException('%s: takes at most %d parameters (%d given)' % (exc_desc, len(positional_params), len(args)))
@@ -347,16 +336,14 @@ def get_param_values(cls, params, args, kwargs):
                 raise parameter.DuplicateParameterException('%s: parameter %s was already set as a positional parameter' % (exc_desc, param_name))
             if param_name not in params_dict:
                 raise parameter.UnknownParameterException('%s: unknown parameter %s' % (exc_desc, param_name))
-            if params_dict[param_name].is_global:
-                raise parameter.ParameterException('%s: can not override global parameter %s' % (exc_desc, param_name))
             result[param_name] = arg
 
         # Then use the defaults for anything not filled in
         for param_name, param_obj in params:
             if param_name not in result:
-                if not param_obj.has_value:
+                if not param_obj.has_task_value(task_name, param_name):
                     raise parameter.MissingParameterException("%s: requires the '%s' parameter to be set" % (exc_desc, param_name))
-                result[param_name] = param_obj.value
+                result[param_name] = param_obj.task_value(task_name, param_name)
 
         def list_to_tuple(x):
             """ Make tuples out of lists and sets to allow hashing """
@@ -368,9 +355,12 @@ def list_to_tuple(x):
         return [(param_name, list_to_tuple(result[param_name])) for param_name, param_obj in params]
 
     def __init__(self, *args, **kwargs):
-        """Constructor to resolve values for all Parameters.
+        """
+        Constructor to resolve values for all Parameters.
+
+        For example, the Task:
 
-        For example, the Task::
+        .. code-block:: python
 
             class MyTask(luigi.Task):
                 count = luigi.IntParameter()
@@ -399,32 +389,29 @@ class MyTask(luigi.Task):
         self.__hash = hash(self.task_id)
 
     def initialized(self):
-        """Returns ``True`` if the Task is initialized and ``False`` otherwise."""
+        """
+        Returns ``True`` if the Task is initialized and ``False`` otherwise.
+        """
         return hasattr(self, 'task_id')
 
     @classmethod
-    def from_str_params(cls, params_str, global_params):
-        """Creates an instance from a str->str hash
-
-        This method is for parsing of command line arguments or other
-        non-programmatic invocations.
+    def from_str_params(cls, params_str=None):
+        """
+        Creates an instance from a str->str hash.
 
         :param params_str: dict of param name -> value.
-        :param global_params: dict of param name -> value, the global params.
         """
-        for param_name, param in global_params:
-            value = param.parse_from_input(param_name, params_str[param_name])
-            param.set_global(value)
-
+        if params_str is None:
+            params_str = {}
         kwargs = {}
-        for param_name, param in cls.get_nonglobal_params():
+        for param_name, param in cls.get_params():
             value = param.parse_from_input(param_name, params_str[param_name])
             kwargs[param_name] = value
 
         return cls(**kwargs)
 
     def to_str_params(self):
-        """Opposite of from_str_params"""
+        # Convert all parameters to a str->str hash
         params_str = {}
         params = dict(self.get_params())
         for param_name, param_value in self.param_kwargs.iteritems():
@@ -433,20 +420,26 @@ def to_str_params(self):
         return params_str
 
     def clone(self, cls=None, **kwargs):
-        ''' Creates a new instance from an existing instance where some of the args have changed.
+        """
+        Creates a new instance from an existing instance where some of the args have changed.
+
+        There's at least two scenarios where this is useful (see test/clone_test.py):
 
-        There's at least two scenarios where this is useful (see test/clone_test.py)
-        - Remove a lot of boiler plate when you have recursive dependencies and lots of args
-        - There's task inheritance and some logic is on the base class
-        '''
+        * remove a lot of boiler plate when you have recursive dependencies and lots of args
+        * there's task inheritance and some logic is on the base class
+
+        :param cls:
+        :param kwargs:
+        :return:
+        """
         k = self.param_kwargs.copy()
-        k.update(kwargs.items())
+        k.update(kwargs.iteritems())
 
         if cls is None:
             cls = self.__class__
 
         new_k = {}
-        for param_name, param_class in cls.get_nonglobal_params():
+        for param_name, param_class in cls.get_params():
             if param_name in k:
                 new_k[param_name] = k[param_name]
 
@@ -463,10 +456,10 @@ def __eq__(self, other):
 
     def complete(self):
         """
-            If the task has any outputs, return ``True`` if all outputs exists.
-            Otherwise, return ``False``.
+        If the task has any outputs, return ``True`` if all outputs exists.
+        Otherwise, return ``False``.
 
-            However, you may freely override this method with custom logic.
+        However, you may freely override this method with custom logic.
         """
         outputs = flatten(self.output())
         if len(outputs) == 0:
@@ -478,8 +471,19 @@ def complete(self):
 
         return all(itertools.imap(lambda output: output.exists(), outputs))
 
+    @classmethod
+    def bulk_complete(cls, parameter_tuples):
+        """
+        Returns those of parameter_tuples for which this Task is complete.
+
+        Override (with an efficient implementation) for efficient scheduling
+        with range tools. Keep the logic consistent with that of complete().
+        """
+        raise NotImplementedError
+
     def output(self):
-        """The output that this Task produces.
+        """
+        The output that this Task produces.
 
         The output of the Task determines if the Task needs to be run--the task
         is considered finished iff the outputs all exist. Subclasses should
@@ -494,7 +498,8 @@ def output(self):
         return []  # default impl
 
     def requires(self):
-        """The Tasks that this Task depends on.
+        """
+        The Tasks that this Task depends on.
 
         A Task will only run if all of the Tasks that it requires are completed.
         If your Task does not require any other Tasks, then you don't need to
@@ -505,7 +510,7 @@ def requires(self):
         return []  # default impl
 
     def _requires(self):
-        '''
+        """
         Override in "template" tasks which themselves are supposed to be
         subclassed and thus have their requires() overridden (name preserved to
         provide consistent end-user experience), yet need to introduce
@@ -513,19 +518,20 @@ def _requires(self):
 
         Must return an iterable which among others contains the _requires() of
         the superclass.
-        '''
+        """
         return flatten(self.requires())  # base impl
 
     def process_resources(self):
-        '''
+        """
         Override in "template" tasks which provide common resource functionality
         but allow subclasses to specify additional resources while preserving
         the name for consistent end-user experience.
-        '''
+        """
         return self.resources  # default impl
 
     def input(self):
-        """Returns the outputs of the Tasks returned by :py:meth:`requires`
+        """
+        Returns the outputs of the Tasks returned by :py:meth:`requires`
 
         :return: a list of :py:class:`Target` objects which are specified as
                  outputs of all required Tasks.
@@ -533,7 +539,8 @@ def input(self):
         return getpaths(self.requires())
 
     def deps(self):
-        """Internal method used by the scheduler
+        """
+        Internal method used by the scheduler.
 
         Returns the flattened list of requires.
         """
@@ -541,11 +548,14 @@ def deps(self):
         return flatten(self._requires())
 
     def run(self):
-        """The task run method, to be overridden in a subclass."""
+        """
+        The task run method, to be overridden in a subclass.
+        """
         pass  # default impl
 
     def on_failure(self, exception):
-        """ Override for custom error handling
+        """
+        Override for custom error handling.
 
         This method gets called if an exception is raised in :py:meth:`run`.
         Return value of this method is json encoded and sent to the scheduler as the `expl` argument. Its string representation will be used as the body of the error email sent out if any.
@@ -557,16 +567,20 @@ def on_failure(self, exception):
         return "Runtime error:\n%s" % traceback_string
 
     def on_success(self):
-        """ Override for doing custom completion handling for a larger class of tasks
+        """
+        Override for doing custom completion handling for a larger class of tasks
 
         This method gets called when :py:meth:`run` completes without raising any exceptions.
+
         The returned value is json encoded and sent to the scheduler as the `expl` argument.
+
         Default behavior is to send an None value"""
         pass
 
 
 def externalize(task):
-    """Returns an externalized version of the Task.
+    """
+    Returns an externalized version of the Task.
 
     See :py:class:`ExternalTask`.
     """
@@ -575,7 +589,8 @@ def externalize(task):
 
 
 class ExternalTask(Task):
-    """Subclass for references to external dependencies.
+    """
+    Subclass for references to external dependencies.
 
     An ExternalTask's does not have a `run` implementation, which signifies to
     the framework that this Task's :py:meth:`output` is generated outside of
@@ -585,15 +600,37 @@ class ExternalTask(Task):
 
 
 class WrapperTask(Task):
-    """Use for tasks that only wrap other tasks and that by definition are done
-    if all their requirements exist.
     """
+    Use for tasks that only wrap other tasks and that by definition are done if all their requirements exist.
+    """
+
     def complete(self):
         return all(r.complete() for r in flatten(self.requires()))
 
 
+class Config(Task):
+
+    """Used for configuration that's not specific to a certain task
+
+    TODO: let's refactor Task & Config so that it inherits from a common
+    ParamContainer base class
+    """
+    pass
+
+
+class ConfigWithoutSection(Task):
+
+    """Used for configuration that doesn't have a particular section
+
+    (eg. --n-workers)
+    """
+    pass
+
+
 def getpaths(struct):
-    """ Maps all Tasks in a structured data object to their .output()"""
+    """
+    Maps all Tasks in a structured data object to their .output().
+    """
     if isinstance(struct, Task):
         return struct.output()
     elif isinstance(struct, dict):
@@ -612,16 +649,19 @@ def getpaths(struct):
 
 
 def flatten(struct):
-    """Creates a flat list of all all items in structured output (dicts, lists, items)
-
-    >>> flatten({'a': 'foo', 'b': 'bar'})
-    ['foo', 'bar']
-    >>> flatten(['foo', ['bar', 'troll']])
-    ['foo', 'bar', 'troll']
-    >>> flatten('foo')
-    ['foo']
-    >>> flatten(42)
-    [42]
+    """
+    Creates a flat list of all all items in structured output (dicts, lists, items):
+
+    .. code-block:: python
+
+        >>> flatten({'a': 'foo', 'b': 'bar'})
+        ['foo', 'bar']
+        >>> flatten(['foo', ['bar', 'troll']])
+        ['foo', 'bar', 'troll']
+        >>> flatten('foo')
+        ['foo']
+        >>> flatten(42)
+        [42]
     """
     if struct is None:
         return []
@@ -645,7 +685,8 @@ def flatten(struct):
 
 
 def flatten_output(task):
-    """Lists all output targets by recursively walking output-less (wrapper) tasks.
+    """
+    Lists all output targets by recursively walking output-less (wrapper) tasks.
 
     FIXME order consistently.
     """
diff --git a/luigi/task_history.py b/luigi/task_history.py
index 715e23852a..8363bf8556 100644
--- a/luigi/task_history.py
+++ b/luigi/task_history.py
@@ -14,14 +14,17 @@
 
 import abc
 import logging
+
 import task
 
 logger = logging.getLogger('luigi-interface')
 
 
 class Task(object):
-    ''' Interface for methods on TaskHistory
-    '''
+    """
+    Interface for methods on TaskHistory
+    """
+
     def __init__(self, task_id, status, host=None):
         self.task_family, self.parameters = task.id_to_name_and_params(task_id)
         self.status = status
@@ -30,8 +33,9 @@ def __init__(self, task_id, status, host=None):
 
 
 class TaskHistory(object):
-    ''' Abstract Base Class for updating the run history of a task
-    '''
+    """
+    Abstract Base Class for updating the run history of a task
+    """
     __metaclass__ = abc.ABCMeta
 
     @abc.abstractmethod
@@ -50,6 +54,7 @@ def task_started(self, task_id, worker_host):
 
 
 class NopHistory(TaskHistory):
+
     def task_scheduled(self, task_id):
         pass
 
diff --git a/luigi/task_status.py b/luigi/task_status.py
index 16b0837916..4f50d31182 100644
--- a/luigi/task_status.py
+++ b/luigi/task_status.py
@@ -9,8 +9,10 @@
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 # License for the specific language governing permissions and limitations under
 # the License.
-''' Possible values for a Task's status in the Scheduler
-'''
+"""
+Possible values for a Task's status in the Scheduler
+"""
+
 PENDING = 'PENDING'
 FAILED = 'FAILED'
 DONE = 'DONE'
diff --git a/luigi/tools/__init__.py b/luigi/tools/__init__.py
index d39b0ac43d..95ebdeaba6 100644
--- a/luigi/tools/__init__.py
+++ b/luigi/tools/__init__.py
@@ -12,7 +12,8 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-"""Sort of a standard library for doing stuff with Tasks at a somewhat abstract level.
+"""
+Sort of a standard library for doing stuff with Tasks at a somewhat abstract level.
 
 Submodule introduced to stop growing util.py unstructured.
 """
diff --git a/luigi/tools/parse_task.py b/luigi/tools/parse_task.py
new file mode 100644
index 0000000000..21dbd1b48a
--- /dev/null
+++ b/luigi/tools/parse_task.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2015 Spotify AB
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+import warnings
+
+import pyparsing as pp
+
+
+def id_to_name_and_params(task_id):
+    """
+    Turn a task_id into a (task_family, {params}) tuple.
+
+    E.g. calling with ``Foo(bar=bar, baz=baz)`` returns ``('Foo', {'bar': 'bar', 'baz': 'baz'})``.
+    """
+
+    warnings.warn(
+        'id_to_name_and_params is deprecated (and moved to luigi.tools.parse_task). '
+        'Please don\'t use task names as a serialization mechanism. Rather, store '
+        'the task family and the parameters as separate strings',
+        DeprecationWarning,
+        stacklevel=2)
+
+    name_chars = pp.alphanums + '_'
+    # modified version of pp.printables. Removed '[]', '()', ','
+    value_chars = pp.alphanums + '\'!"#$%&*+-./:;<=>?@\\^_`{|}~'
+    parameter = (
+        (pp.Word(name_chars) +
+         pp.Literal('=').suppress() +
+         ((pp.Literal('(').suppress() | pp.Literal('[').suppress()) +
+          pp.ZeroOrMore(pp.Word(value_chars) +
+                        pp.ZeroOrMore(pp.Literal(',')).suppress()) +
+          (pp.Literal(')').suppress() |
+           pp.Literal(']').suppress()))).setResultsName('list_params',
+                                                        listAllMatches=True) |
+        (pp.Word(name_chars) +
+         pp.Literal('=').suppress() +
+         pp.Word(value_chars)).setResultsName('params', listAllMatches=True))
+
+    parser = (
+        pp.Word(name_chars).setResultsName('task') +
+        pp.Literal('(').suppress() +
+        pp.ZeroOrMore(parameter + (pp.Literal(',')).suppress()) +
+        pp.ZeroOrMore(parameter) +
+        pp.Literal(')').suppress())
+
+    parsed = parser.parseString(task_id).asDict()
+    task_name = parsed['task']
+
+    params = {}
+    if 'params' in parsed:
+        for k, v in parsed['params']:
+            params[k] = v
+    if 'list_params' in parsed:
+        for x in parsed['list_params']:
+            params[x[0]] = x[1:]
+    return task_name, params
diff --git a/luigi/tools/range.py b/luigi/tools/range.py
index 8029235f92..fdf503f1f9 100644
--- a/luigi/tools/range.py
+++ b/luigi/tools/range.py
@@ -12,28 +12,34 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-"""Produces contiguous completed ranges of recurring tasks.
+"""
+Produces contiguous completed ranges of recurring tasks.
 
 Caveat - if gap causes (missing dependencies) aren't acted upon, then this will
 eventually schedule the same gaps again and again and make no progress to other
 datehours.
+
 TODO foolproof against that kind of misuse?
 """
 
-from datetime import datetime, timedelta
+import itertools
 import logging
+import operator
+import re
+import time
+from datetime import date, datetime, timedelta
+
 import luigi
 from luigi.parameter import ParameterException
 from luigi.target import FileSystemTarget
 from luigi.task import Register, flatten_output
-import re
-import time
 
 logger = logging.getLogger('luigi-interface')
 
 
 class RangeEvent(luigi.Event):  # Not sure if subclassing currently serves a purpose. Stringly typed, events are.
-    """Events communicating useful metrics.
+    """
+    Events communicating useful metrics.
 
     COMPLETE_COUNT would normally be nondecreasing, and its derivative would
     describe performance (how many instances complete
@@ -47,90 +53,106 @@ class RangeEvent(luigi.Event):  # Not sure if subclassing currently serves a pur
     + hours_forward), or till stop if it is defined. In hours for Hourly.
     TBD different units for other frequencies?
     TODO any different for reverse mode? From first missing till last missing?
+    From last gap till stop?
     """
     COMPLETE_COUNT = "event.tools.range.complete.count"
     COMPLETE_FRACTION = "event.tools.range.complete.fraction"
     DELAY = "event.tools.range.delay"
 
 
-class RangeHourlyBase(luigi.WrapperTask):
-    """Produces a contiguous completed range of a hourly recurring task.
+class RangeBase(luigi.WrapperTask):
+    """
+    Produces a contiguous completed range of a recurring task.
 
-    Made for the common use case where a task is parameterized by datehour and
-    assurance is needed that any gaps arising from downtime are eventually
-    filled.
+    Made for the common use case where a task is parameterized by e.g.
+    DateParameter, and assurance is needed that any gaps arising from downtime
+    are eventually filled.
 
-    TODO Emits events that one can use to monitor gaps and delays.
+    Emits events that one can use to monitor gaps and delays.
 
     At least one of start and stop needs to be specified.
-    """
 
+    (This is quite an abstract base class for subclasses with different
+    datetime parameter class, e.g. DateParameter, DateHourParameter, ..., and
+    different parameter naming, e.g. days_back/forward, hours_back/forward,
+    ..., as well as different documentation wording, for good user experience.)
+    """
+    # TODO lift the single parameter constraint by passing unknown parameters through WrapperTask?
     of = luigi.Parameter(
-        description="task name to be completed. The task must take a single datehour parameter")
-        # TODO lift the single parameter constraint by passing unknown parameters through WrapperTask?
-    start = luigi.DateHourParameter(
-        default=None,
-        description="beginning datehour, inclusive. Default: None - work backward forever (requires reverse=True)")
-    stop = luigi.DateHourParameter(
-        default=None,
-        description="ending datehour, exclusive. Default: None - work forward forever")
-        # wanted to name them "from" and "to", but "from" is a reserved word :/ So named after https://docs.python.org/2/library/functions.html#range arguments
+        description="task name to be completed. The task must take a single datetime parameter")
+    # The common parameters 'start' and 'stop' have type (e.g. DateParameter,
+    # DateHourParameter) dependent on the concrete subclass, cumbersome to
+    # define here generically without dark magic. Refer to the overrides.
+    start = luigi.Parameter()
+    stop = luigi.Parameter()
     reverse = luigi.BooleanParameter(
         default=False,
         description="specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward")
     task_limit = luigi.IntParameter(
         default=50,
         description="how many of 'of' tasks to require. Guards against scheduling insane amounts of tasks in one go")
-        # TODO vary based on cluster load (time of day)? Resources feature suits that better though
-    hours_back = luigi.IntParameter(
-        default=100 * 24,  # slightly more than three months
-        description="extent to which contiguousness is to be assured into past, in hours from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker's memory is the limit")
-        # TODO always entire interval for reprocessings (fixed start and stop)?
-    hours_forward = luigi.IntParameter(
-        default=0,
-        description="extent to which contiguousness is to be assured into future, in hours from current time. Prevents infinite loop when stop is none")
-    # TODO when generalizing for RangeDaily/Weekly, "hours_" will need to become something else... strict_back, always in seconds?
-    # TODO overridable exclude_datehours or something...
+    # TODO overridable exclude_datetimes or something...
     now = luigi.IntParameter(
         default=None,
         description="set to override current time. In seconds since epoch")
 
-    @staticmethod
-    def total_seconds(td):
-        """Total seconds in datetime.timedelta td. Python 2.7 backport."""
-        return td.days * 24 * 60 * 60 + td.seconds + td.microseconds * 1E-6
+    # a bunch of datetime arithmetic building blocks that need to be provided in subclasses
+    def datetime_to_parameter(self, dt):
+        raise NotImplementedError
 
-    def missing_datehours(self, task_cls, finite_datehours):
-        """Override in subclasses to do bulk checks.
+    def parameter_to_datetime(self, p):
+        raise NotImplementedError
 
-        This is a conservative base implementation that brutally checks
-        completeness, instance by instance. Inadvisable as it may be slow.
+    def moving_start(self, now):
         """
-        return [d for d in finite_datehours if not task_cls(d).complete()]
+        Returns a datetime from which to ensure contiguousness in the case when
+        start is None or unfeasibly far back.
+        """
+        raise NotImplementedError
 
-    def _emit_metrics(self, missing_datehours, now):
-        """For consistent metrics one should consider the entire range, but it
-        is open (infinite) if stop or start is None. In those cases the most
-        meaningful thing I can think of is to use (now + hours_forward) resp.
-        (now - hours_back) as a moving endpoint.
+    def moving_stop(self, now):
         """
-        if self.start:
-            finite_start = self.start
-            finite_stop = max(self.stop or now + timedelta(hours=self.hours_forward + 1), finite_start)
-        else:
-            finite_stop = self.stop
-            finite_start = min(self.start or now - timedelta(hours=self.hours_back), finite_stop)
+        Returns a datetime till which to ensure contiguousness in the case when
+        stop is None or unfeasibly far forward.
+        """
+        raise NotImplementedError
+
+    def finite_datetimes(self, finite_start, finite_stop):
+        """
+        Returns the individual datetimes in interval [finite_start, finite_stop)
+        for which task completeness should be required, as a sorted list.
+        """
+        raise NotImplementedError
+
+    def _emit_metrics(self, missing_datetimes, finite_start, finite_stop):
+        """
+        For consistent metrics one should consider the entire range, but
+        it is open (infinite) if stop or start is None.
+
+        Hence make do with metrics respective to the finite simplification.
+        """
+        datetimes = self.finite_datetimes(
+            finite_start if self.start is None else min(finite_start, self.parameter_to_datetime(self.start)),
+            finite_stop if self.stop is None else max(finite_stop, self.parameter_to_datetime(self.stop)))
 
-        delay_in_hours = (self.total_seconds(finite_stop - missing_datehours[0]) if missing_datehours else 0) / 3600.
-        self.trigger_event(RangeEvent.DELAY, self.of, delay_in_hours)
+        delay_in_jobs = len(datetimes) - datetimes.index(missing_datetimes[0]) if datetimes and missing_datetimes else 0
+        self.trigger_event(RangeEvent.DELAY, self.of, delay_in_jobs)
 
-        expected_count = int(self.total_seconds(finite_stop - finite_start)) / 3600
-        complete_count = expected_count - len(missing_datehours)
+        expected_count = len(datetimes)
+        complete_count = expected_count - len(missing_datetimes)
         self.trigger_event(RangeEvent.COMPLETE_COUNT, self.of, complete_count)
         self.trigger_event(RangeEvent.COMPLETE_FRACTION, self.of, float(complete_count) / expected_count if expected_count else 1)
 
+    def _format_datetime(self, dt):
+        return self.datetime_to_parameter(dt)
+
+    def _format_range(self, datetimes):
+        param_first = self._format_datetime(datetimes[0])
+        param_last = self._format_datetime(datetimes[-1])
+        return '[%s, %s]' % (param_first, param_last)
+
     def requires(self):
-        # cache because we anticipate lots of tasks
+        # cache because we anticipate a fair amount of computation
         if hasattr(self, '_cached_requires'):
             return self._cached_requires
 
@@ -138,39 +160,148 @@ def requires(self):
             raise ParameterException("At least one of start and stop needs to be specified")
         if not self.start and not self.reverse:
             raise ParameterException("Either start needs to be specified or reverse needs to be True")
+        if self.start and self.stop and self.start > self.stop:
+            raise ParameterException("Can't have start > stop")
         # TODO check overridden complete() and exists()
 
         now = datetime.utcfromtimestamp(time.time() if self.now is None else self.now)
-        now = datetime(now.year, now.month, now.day, now.hour)
-        datehours = [now + timedelta(hours=h) for h in range(-self.hours_back, self.hours_forward + 1)]
-        datehours = filter(lambda h: (not self.start or h >= self.start) and (not self.stop or h < self.stop), datehours)
+
+        moving_start = self.moving_start(now)
+        finite_start = moving_start if self.start is None else max(self.parameter_to_datetime(self.start), moving_start)
+        moving_stop = self.moving_stop(now)
+        finite_stop = moving_stop if self.stop is None else min(self.parameter_to_datetime(self.stop), moving_stop)
+
+        datetimes = self.finite_datetimes(finite_start, finite_stop) if finite_start <= finite_stop else []
 
         task_cls = Register.get_task_cls(self.of)
-        if datehours:
-            logger.debug('Actually checking if range [%s, %s] of %s is complete' % (datehours[0], datehours[-1], self.of))
-            missing_datehours = sorted(self.missing_datehours(task_cls, datehours))
-            logger.debug('Range [%s, %s] lacked %d of expected %d %s instances' % (datehours[0], datehours[-1], len(missing_datehours), len(datehours), self.of))
+        if datetimes:
+            logger.debug('Actually checking if range %s of %s is complete' % (self._format_range(datetimes), self.of))
+            missing_datetimes = sorted(self.missing_datetimes(task_cls, datetimes))
+            logger.debug('Range %s lacked %d of expected %d %s instances' % (self._format_range(datetimes), len(missing_datetimes), len(datetimes), self.of))
         else:
-            missing_datehours = []
+            missing_datetimes = []
+            logger.debug('Empty range. No %s instances expected' % (self.of, ))
 
-        self._emit_metrics(missing_datehours, now)
+        self._emit_metrics(missing_datetimes, finite_start, finite_stop)
 
         if self.reverse:
-            required_datehours = missing_datehours[-self.task_limit:]
+            required_datetimes = missing_datetimes[-self.task_limit:]
         else:
-            required_datehours = missing_datehours[:self.task_limit]
-        if required_datehours:
-            logger.debug('Requiring %d missing %s instances in range [%s, %s]' % (len(required_datehours), self.of, required_datehours[0], required_datehours[-1]))
+            required_datetimes = missing_datetimes[:self.task_limit]
+        if required_datetimes:
+            logger.debug('Requiring %d missing %s instances in range %s' % (len(required_datetimes), self.of, self._format_range(required_datetimes)))
         if self.reverse:
-            required_datehours.reverse()  # I wish this determined the order tasks were scheduled or executed, but it doesn't. No priorities in Luigi yet
+            required_datetimes.reverse()  # TODO priorities, so that within the batch tasks are ordered too
 
-        self._cached_requires = [task_cls(d) for d in required_datehours]
+        self._cached_requires = [task_cls(self.datetime_to_parameter(d)) for d in required_datetimes]
         return self._cached_requires
 
+    def missing_datetimes(self, task_cls, finite_datetimes):
+        """
+        Override in subclasses to do bulk checks.
+
+        Returns a sorted list.
+
+        This is a conservative base implementation that brutally checks completeness, instance by instance.
+
+        Inadvisable as it may be slow.
+        """
+        return [d for d in finite_datetimes if not task_cls(self.datetime_to_parameter(d)).complete()]
+
+
+class RangeDailyBase(RangeBase):
+    """
+    Produces a contiguous completed range of a daily recurring task.
+    """
+    start = luigi.DateParameter(
+        default=None,
+        description="beginning date, inclusive. Default: None - work backward forever (requires reverse=True)")
+    stop = luigi.DateParameter(
+        default=None,
+        description="ending date, exclusive. Default: None - work forward forever")
+    days_back = luigi.IntParameter(
+        default=100,  # slightly more than three months
+        description="extent to which contiguousness is to be assured into past, in days from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker's memory is the limit")
+    days_forward = luigi.IntParameter(
+        default=0,
+        description="extent to which contiguousness is to be assured into future, in days from current time. Prevents infinite loop when stop is none")
+
+    def datetime_to_parameter(self, dt):
+        return dt.date()
+
+    def parameter_to_datetime(self, p):
+        return datetime(p.year, p.month, p.day)
+
+    def moving_start(self, now):
+        return now - timedelta(days=self.days_back)
+
+    def moving_stop(self, now):
+        return now + timedelta(days=self.days_forward)
+
+    def finite_datetimes(self, finite_start, finite_stop):
+        """
+        Simply returns the points in time that correspond to turn of day.
+        """
+        date_start = datetime(finite_start.year, finite_start.month, finite_start.day)
+        dates = []
+        for i in itertools.count():
+            t = date_start + timedelta(days=i)
+            if t >= finite_stop:
+                return dates
+            if t >= finite_start:
+                dates.append(t)
+
+
+class RangeHourlyBase(RangeBase):
+    """
+    Produces a contiguous completed range of an hourly recurring task.
+    """
+    start = luigi.DateHourParameter(
+        default=None,
+        description="beginning datehour, inclusive. Default: None - work backward forever (requires reverse=True)")
+    stop = luigi.DateHourParameter(
+        default=None,
+        description="ending datehour, exclusive. Default: None - work forward forever")
+    hours_back = luigi.IntParameter(
+        default=100 * 24,  # slightly more than three months
+        description="extent to which contiguousness is to be assured into past, in hours from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker's memory is the limit")
+    # TODO always entire interval for reprocessings (fixed start and stop)?
+    hours_forward = luigi.IntParameter(
+        default=0,
+        description="extent to which contiguousness is to be assured into future, in hours from current time. Prevents infinite loop when stop is none")
+
+    def datetime_to_parameter(self, dt):
+        return dt
+
+    def parameter_to_datetime(self, p):
+        return p
+
+    def moving_start(self, now):
+        return now - timedelta(hours=self.hours_back)
+
+    def moving_stop(self, now):
+        return now + timedelta(hours=self.hours_forward)
+
+    def finite_datetimes(self, finite_start, finite_stop):
+        """
+        Simply returns the points in time that correspond to whole hours.
+        """
+        datehour_start = datetime(finite_start.year, finite_start.month, finite_start.day, finite_start.hour)
+        datehours = []
+        for i in itertools.count():
+            t = datehour_start + timedelta(hours=i)
+            if t >= finite_stop:
+                return datehours
+            if t >= finite_start:
+                datehours.append(t)
+
+    def _format_datetime(self, dt):
+        return luigi.DateHourParameter().serialize(dt)
+
 
 def _constrain_glob(glob, paths, limit=5):
-    """Tweaks glob into a list of more specific globs that together still cover
-    paths and not too much extra.
+    """
+    Tweaks glob into a list of more specific globs that together still cover paths and not too much extra.
 
     Saves us minutes long listings for long dataset histories.
 
@@ -180,7 +311,8 @@ def _constrain_glob(glob, paths, limit=5):
     """
 
     def digit_set_wildcard(chars):
-        """Makes a wildcard expression for the set, a bit readable. E.g. [1-5]
+        """
+        Makes a wildcard expression for the set, a bit readable, e.g. [1-5].
         """
         chars = sorted(chars)
         if len(chars) > 1 and ord(chars[-1]) - ord(chars[0]) == len(chars) - 1:
@@ -208,110 +340,155 @@ def digit_set_wildcard(chars):
 
 
 def most_common(items):
-    """Wanted functionality from Counters (new in Python 2.7)
+    """
+    Wanted functionality from Counters (new in Python 2.7).
     """
     counts = {}
     for i in items:
         counts.setdefault(i, 0)
         counts[i] += 1
-    return sorted(counts.items(), key=lambda x: x[1])[-1]
+    return max(counts.iteritems(), key=operator.itemgetter(1))
 
 
-class RangeHourly(RangeHourlyBase):
-    """Benefits from bulk completeness information to efficiently cover gaps.
+def _get_per_location_glob(tasks, outputs, regexes):
+    """
+    Builds a glob listing existing output paths.
 
-    The current implementation works for the common case of a task writing
-    output to a FileSystemTarget whose path is built using strftime with format
-    like '...%Y...%m...%d...%H...', without custom complete() or exists(). All
-    to efficiently determine missing datehours by filesystem listing.
+    Esoteric reverse engineering, but worth it given that (compared to an
+    equivalent contiguousness guarantee by naive complete() checks)
+    requests to the filesystem are cut by orders of magnitude, and users
+    don't even have to retrofit existing tasks anyhow.
+    """
+    paths = [o.path for o in outputs]
+    matches = [r.search(p) for r, p in zip(regexes, paths)]  # naive, because some matches could be confused by numbers earlier in path, e.g. /foo/fifa2000k/bar/2000-12-31/00
 
-    Convenient to use even from command line, like:
+    for m, p, t in zip(matches, paths, tasks):
+        if m is None:
+            raise NotImplementedError("Couldn't deduce datehour representation in output path %r of task %s" % (p, t))
 
-        luigi --module your.module RangeHourly --of YourActualTask --start 2014-01-01T00
+    positions = [most_common((m.start(i), m.end(i)) for m in matches)[0] for i in range(1, 5)]  # the most common position of every group is likely to be conclusive hit or miss
+
+    glob = list(paths[0])  # TODO sanity check that it's the same for all paths?
+    for start, end in positions:
+        glob = glob[:start] + ['[0-9]'] * (end - start) + glob[end:]
+    return ''.join(glob).rsplit('/', 1)[0]  # chop off the last path item (wouldn't need to if `hadoop fs -ls -d` equivalent were available)
+
+
+def _get_filesystems_and_globs(task_cls):
+    """
+    Yields a (filesystem, glob) tuple per every output location of task_cls.
+
+    task_cls can have one or several FileSystemTarget outputs.
+
+    For convenience, task_cls can be a wrapper task,
+    in which case outputs of all its dependencies are considered.
+    """
+    # probe some scattered datehours unlikely to all occur in paths, other than by being sincere datehour parameter's representations
+    # TODO limit to [self.start, self.stop) so messages are less confusing? Done trivially it can kill correctness
+    sample_datehours = [datetime(y, m, d, h) for y in range(2000, 2050, 10) for m in range(1, 4) for d in range(5, 8) for h in range(21, 24)]
+    regexes = [re.compile('(%04d).*(%02d).*(%02d).*(%02d)' % (d.year, d.month, d.day, d.hour)) for d in sample_datehours]
+    sample_tasks = [task_cls(d) for d in sample_datehours]
+    sample_outputs = [flatten_output(t) for t in sample_tasks]
+
+    for o, t in zip(sample_outputs, sample_tasks):
+        if len(o) != len(sample_outputs[0]):
+            raise NotImplementedError("Outputs must be consistent over time, sorry; was %r for %r and %r for %r" % (o, t, sample_outputs[0], sample_tasks[0]))
+            # TODO fall back on requiring last couple of days? to avoid astonishing blocking when changes like that are deployed
+            # erm, actually it's not hard to test entire hours_back..hours_forward and split into consistent subranges FIXME?
+        for target in o:
+            if not isinstance(target, FileSystemTarget):
+                raise NotImplementedError("Output targets must be instances of FileSystemTarget; was %r for %r" % (target, t))
+
+    for o in zip(*sample_outputs):  # transposed, so here we're iterating over logical outputs, not datehours
+        glob = _get_per_location_glob(sample_tasks, o, regexes)
+        yield o[0].fs, glob
+
+
+def _list_existing(filesystem, glob, paths):
+    """
+    Get all the paths that do in fact exist. Returns a set of all existing paths.
+
+    Takes a luigi.target.FileSystem object, a str which represents a glob and
+    a list of strings representing paths.
+    """
+    globs = _constrain_glob(glob, paths)
+    time_start = time.time()
+    listing = []
+    for g in sorted(globs):
+        logger.debug('Listing %s' % g)
+        if filesystem.exists(g):
+            listing.extend(filesystem.listdir(g))
+    logger.debug('%d %s listings took %f s to return %d items' % (len(globs), filesystem.__class__.__name__, time.time() - time_start, len(listing)))
+    return set(listing)
+
+
+def infer_bulk_complete_from_fs(task_cls, finite_datehours):
+    """
+    Efficiently determines missing datehours by filesystem listing.
+
+    The current implementation works for the common case of a task writing
+    output to a FileSystemTarget whose path is built using strftime with format
+    like '...%Y...%m...%d...%H...', without custom complete() or exists().
 
-    Intended to be further developed to use an explicit API or pick a suitable
-    heuristic for other types of tasks too, e.g. Postgres exporters...
     (Eventually Luigi could have ranges of completion as first-class citizens.
     Then this listing business could be factored away/be provided for
     explicitly in target API or some kind of a history server.)
+
+    TODO support RangeDaily
     """
+    filesystems_and_globs_by_location = _get_filesystems_and_globs(task_cls)
+    paths_by_datehour = [[o.path for o in flatten_output(task_cls(d))] for d in finite_datehours]
+    listing = set()
+    for (f, g), p in zip(filesystems_and_globs_by_location, zip(*paths_by_datehour)):  # transposed, so here we're iterating over logical outputs, not datehours
+        listing |= _list_existing(f, g, p)
 
-    @classmethod
-    def _get_per_location_glob(_, tasks, outputs, regexes):
-        """Builds a glob listing existing output paths.
+    # quickly learn everything that's missing
+    missing_datehours = []
+    for d, p in zip(finite_datehours, paths_by_datehour):
+        if not set(p) <= listing:
+            missing_datehours.append(d)
 
-        Esoteric reverse engineering, but worth it given that (compared to an
-        equivalent contiguousness guarantee by naive complete() checks)
-        requests to the filesystem are cut by orders of magnitude, and users
-        don't even have to retrofit existing tasks anyhow.
-        """
-        paths = [o.path for o in outputs]
-        matches = [r.search(p) for r, p in zip(regexes, paths)]  #  naive, because some matches could be confused by numbers earlier in path, e.g. /foo/fifa2000k/bar/2000-12-31/00
+    return missing_datehours
 
-        for m, p, t in zip(matches, paths, tasks):
-            if m is None:
-                raise NotImplementedError("Couldn't deduce datehour representation in output path %r of task %s" % (p, t))
 
-        positions = [most_common((m.start(i), m.end(i)) for m in matches)[0] for i in range(1, 5)]  # the most common position of every group is likely to be conclusive hit or miss
+class RangeDaily(RangeDailyBase):
+    """
+    Efficiently produces a contiguous completed range of a daily recurring task.
 
-        glob = list(paths[0])  # TODO sanity check that it's the same for all paths?
-        for start, end in positions:
-            glob = glob[:start] + ['[0-9]'] * (end - start) + glob[end:]
-        return ''.join(glob).rsplit('/', 1)[0]  # chop off the last path item (wouldn't need to if `hadoop fs -ls -d` equivalent were available)
+    Benefits from bulk_complete information to efficiently cover gaps.
 
-    @classmethod
-    def _get_filesystems_and_globs(cls, task_cls):
-        """Yields a (filesystem, glob) tuple per every output location of
-        task_cls.
+    Falls back to infer it from output filesystem listing to facilitate the common
+    case usage.
+    (FIXME the latter not implemented yet)
 
-        task_cls can have one or several FileSystemTarget outputs. For
-        convenience, task_cls can be a wrapper task, in which case outputs of
-        all its dependencies are considered.
-        """
-        # probe some scattered datehours unlikely to all occur in paths, other than by being sincere datehour parameter's representations
-        # TODO limit to [self.start, self.stop) so messages are less confusing? Done trivially it can kill correctness
-        sample_datehours = [datetime(y, m, d, h) for y in range(2000, 2050, 10) for m in range(1, 4) for d in range(5, 8) for h in range(21, 24)]
-        regexes = [re.compile('(%04d).*(%02d).*(%02d).*(%02d)' % (d.year, d.month, d.day, d.hour)) for d in sample_datehours]
-        sample_tasks = [task_cls(d) for d in sample_datehours]
-        sample_outputs = [flatten_output(t) for t in sample_tasks]
-
-        for o, t in zip(sample_outputs, sample_tasks):
-            if len(o) != len(sample_outputs[0]):
-                raise NotImplementedError("Outputs must be consistent over time, sorry; was %r for %r and %r for %r" % (o, t, sample_outputs[0], sample_tasks[0]))
-                # TODO fall back on requiring last couple of days? to avoid astonishing blocking when changes like that are deployed
-                # erm, actually it's not hard to test entire hours_back..hours_forward and split into consistent subranges FIXME
-            for target in o:
-                if not isinstance(target, FileSystemTarget):
-                    raise NotImplementedError("Output targets must be instances of FileSystemTarget; was %r for %r" % (target, t))
-
-        for o in zip(*sample_outputs):  # transposed, so here we're iterating over logical outputs, not datehours
-            glob = cls._get_per_location_glob(sample_tasks, o, regexes)
-            yield o[0].fs, glob
-
-    @classmethod
-    def _list_existing(_, filesystem, glob, paths):
-        globs = _constrain_glob(glob, paths)
-        time_start = time.time()
-        listing = []
-        for g in sorted(globs):
-            logger.debug('Listing %s' % g)
-            listing.extend(filesystem.listdir(g))
-        logger.debug('%d %s listings took %f s to return %d items' % (len(globs), filesystem.__class__.__name__, time.time() - time_start, len(listing)))
-        return set(listing)
+    Convenient to use even from command line, like:
 
-    def missing_datehours(self, task_cls, finite_datehours):
-        """Infers them by listing the task output target(s) filesystem.
-        """
-        filesystems_and_globs_by_location = self._get_filesystems_and_globs(task_cls)
-        paths_by_datehour = [[o.path for o in flatten_output(task_cls(d))] for d in finite_datehours]
-        listing = set()
-        for (f, g), p in zip(filesystems_and_globs_by_location, zip(*paths_by_datehour)):  # transposed, so here we're iterating over logical outputs, not datehours
-            listing |= self._list_existing(f, g, p)
-
-        # quickly learn everything that's missing
-        missing_datehours = []
-        for d, p in zip(finite_datehours, paths_by_datehour):
-            if not set(p) <= listing:
-                missing_datehours.append(d)
-
-        return missing_datehours
+    .. code-block:: console
+
+        luigi --module your.module RangeDaily --of YourActualTask --start 2014-01-01
+    """
+
+    def missing_datetimes(self, task_cls, finite_datetimes):
+        return set(finite_datetimes) - set(task_cls.bulk_complete(finite_datetimes))
+
+
+class RangeHourly(RangeHourlyBase):
+    """
+    Efficiently produces a contiguous completed range of an hourly recurring task.
+
+    Benefits from bulk_complete information to efficiently cover gaps.
+
+    Falls back to infer it from output filesystem listing to facilitate the common case usage.
+
+    Convenient to use even from command line, like:
+
+    .. code-block:: console
+
+        luigi --module your.module RangeHourly --of YourActualTask --start 2014-01-01T00
+    """
+
+    def missing_datetimes(self, task_cls, finite_datetimes):
+        try:
+            return set(finite_datetimes) - set(task_cls.bulk_complete(finite_datetimes))
+        except NotImplementedError:
+            return infer_bulk_complete_from_fs(task_cls, finite_datetimes)
diff --git a/luigi/util.py b/luigi/util.py
index 5217477080..e0e75cb748 100644
--- a/luigi/util.py
+++ b/luigi/util.py
@@ -12,27 +12,29 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import warnings
-import logging
 import datetime
-from luigi import parameter
-import task
 import functools
+import logging
 
+import task
+from luigi import parameter
+from luigi.deprecate_kwarg import deprecate_kwarg
 
 logger = logging.getLogger('luigi-interface')
 
 
 def common_params(task_instance, task_cls):
-    """Grab all the values in task_instance that are found in task_cls"""
+    """
+    Grab all the values in task_instance that are found in task_cls.
+    """
     if not isinstance(task_cls, task.Register):
         raise TypeError("task_cls must be an uninstantiated Task")
 
     task_instance_param_names = dict(task_instance.get_params()).keys()
     task_cls_param_names = dict(task_cls.get_params()).keys()
-    common_param_names = list(set.intersection(set(task_instance_param_names),set(task_cls_param_names)))
-    common_param_vals = [(key,dict(task_cls.get_params())[key]) for key in common_param_names]
-    common_kwargs = dict([(key,task_instance.param_kwargs[key]) for key in common_param_names])
+    common_param_names = list(set.intersection(set(task_instance_param_names), set(task_cls_param_names)))
+    common_param_vals = [(key, dict(task_cls.get_params())[key]) for key in common_param_names]
+    common_kwargs = dict([(key, task_instance.param_kwargs[key]) for key in common_param_names])
     vals = dict(task_instance.get_param_values(common_param_vals, [], common_kwargs))
     return vals
 
@@ -47,9 +49,12 @@ def task_wraps(P):
 
 
 class inherits(object):
-    '''Task inheritance.
+    """
+    Task inheritance.
 
-    Usage::
+    Usage:
+
+    .. code-block:: python
 
         class AnotherTask(luigi.Task):
             n = luigi.IntParameter()
@@ -63,13 +68,13 @@ def requires(self):
             def run(self):
                print self.n # this will be defined
                # ...
-    '''
+    """
+
     def __init__(self, task_to_inherit):
         super(inherits, self).__init__()
         self.task_to_inherit = task_to_inherit
 
     def __call__(self, task_that_inherits):
-        this_param_names = dict(task_that_inherits.get_nonglobal_params()).keys()
         for param_name, param_obj in self.task_to_inherit.get_params():
             if not hasattr(task_that_inherits, param_name):
                 setattr(task_that_inherits, param_name, param_obj)
@@ -77,6 +82,7 @@ def __call__(self, task_that_inherits):
         # Modify task_that_inherits by subclassing it and adding methods
         @task_wraps(task_that_inherits)
         class Wrapped(task_that_inherits):
+
             def clone_parent(_self, **args):
                 return _self.clone(cls=self.task_to_inherit, **args)
 
@@ -84,8 +90,10 @@ def clone_parent(_self, **args):
 
 
 class requires(object):
-    ''' Same as @inherits, but also auto-defines the requires method
-    '''
+    """
+    Same as @inherits, but also auto-defines the requires method.
+    """
+
     def __init__(self, task_to_require):
         super(requires, self).__init__()
         self.inherit_decorator = inherits(task_to_require)
@@ -96,6 +104,7 @@ def __call__(self, task_that_requires):
         # Modify task_that_requres by subclassing it and adding methods
         @task_wraps(task_that_requires)
         class Wrapped(task_that_requires):
+
             def requires(_self):
                 return _self.clone_parent()
 
@@ -103,15 +112,19 @@ def requires(_self):
 
 
 class copies(object):
-    ''' Auto-copies a task
+    """
+    Auto-copies a task.
+
+    Usage:
 
-    Usage::
+    .. code-block:: python
 
         @copies(MyTask):
         class CopyOfMyTask(luigi.Task):
             def output(self):
                return LocalTarget(self.date.strftime('/var/xyz/report-%Y-%m-%d'))
-    '''
+    """
+
     def __init__(self, task_to_copy):
         super(copies, self).__init__()
         self.requires_decorator = requires(task_to_copy)
@@ -122,6 +135,7 @@ def __call__(self, task_that_copies):
         # Modify task_that_copies by subclassing it and adding methods
         @task_wraps(task_that_copies)
         class Wrapped(task_that_copies):
+
             def run(_self):
                 i, o = _self.input(), _self.output()
                 f = o.open('w')  # TODO: assert that i, o are Target objects and not complex datastructures
@@ -131,14 +145,18 @@ def run(_self):
 
         return Wrapped
 
+
 def delegates(task_that_delegates):
-    ''' Lets a task call methods on subtask(s).
+    """ Lets a task call methods on subtask(s).
 
-    The way this works is that the subtask is run as a part of the task, but the task itself doesn't have
-    to care about the requirements of the subtasks. The subtask doesn't exist from the scheduler's point
-    of view, and its dependencies are instead required by the main task.
+    The way this works is that the subtask is run as a part of the task, but
+    the task itself doesn't have to care about the requirements of the subtasks.
+    The subtask doesn't exist from the scheduler's point of view, and
+    its dependencies are instead required by the main task.
 
-    Example::
+    Example:
+
+    .. code-block:: python
 
         class PowersOfN(luigi.Task):
             n = luigi.IntParameter()
@@ -148,7 +166,7 @@ def f(self, x): return x ** self.n
         class T(luigi.Task):
             def subtasks(self): return PowersOfN(5)
             def run(self): print self.subtasks().f(42)
-    '''
+    """
     if not hasattr(task_that_delegates, 'subtasks'):
         # This method can (optionally) define a couple of delegate tasks that
         # will be accessible as interfaces, meaning that the task can access
@@ -157,6 +175,7 @@ def run(self): print self.subtasks().f(42)
 
     @task_wraps(task_that_delegates)
     class Wrapped(task_that_delegates):
+
         def deps(self):
             # Overrides method in base class
             return task.flatten(self.requires()) + task.flatten([t.deps() for t in task.flatten(self.subtasks())])
@@ -169,158 +188,15 @@ def run(self):
     return Wrapped
 
 
-def Derived(parent_cls):
-    ''' This is a class factory function. It returns a new class with same parameters as
-    the parent class, sets the internal value self.parent_obj to an instance of it, and
-    lets you override the rest of it. Useful if you have a class that's an immediate result
-    of a previous class and you don't want to reimplement everything. Also useful if you
-    want to wrap a class (see wrap_test.py for an example).
-
-    Note 1: The derived class does not inherit from the parent class
-    Note 2: You can add more parameters in the derived class
-
-    Usage::
-
-        class AnotherTask(luigi.Task):
-            n = luigi.IntParameter()
-            # ...
-
-        class MyTask(luigi.util.Derived(AnotherTask)):
-            def requires(self):
-               return self.parent_obj
-            def run(self):
-               print self.n # this will be defined
-               # ...
-    '''
-    class DerivedCls(task.Task):
-        def __init__(self, *args, **kwargs):
-            param_values = {}
-            for k, v in self.get_param_values(self.get_nonglobal_params(), args, kwargs):
-                param_values[k] = v
-
-            # Figure out which params the parent need (it's always a subset)
-            parent_param_values = {}
-            for k, v in parent_cls.get_nonglobal_params():
-                parent_param_values[k] = param_values[k]
-
-            self.parent_obj = parent_cls(**parent_param_values)
-            super(DerivedCls, self).__init__(*args, **kwargs)
-
-    warnings.warn(
-        'Derived is deprecated, please use the @inherits decorator instead',
-        DeprecationWarning,
-        stacklevel=2
-    )
-
-    # Copy parent's params to child
-    for param_name, param_obj in parent_cls.get_params():
-        setattr(DerivedCls, param_name, param_obj)
-    return DerivedCls
-
-
-def Copy(parent_cls):
-    ''' Creates a new Task that copies the old task.
-
-    Usage::
-
-        class CopyOfMyTask(Copy(MyTask)):
-            def output(self):
-               return LocalTarget(self.date.strftime('/var/xyz/report-%Y-%m-%d'))
-    '''
-
-    class CopyCls(Derived(parent_cls)):
-        def requires(self):
-            return self.parent_obj
-
-        output = NotImplemented
-
-        def run(self):
-            i, o = self.input(), self.output()
-            f = o.open('w')  # TODO: assert that i, o are Target objects and not complex datastructures
-            for line in i.open('r'):
-                f.write(line)
-            f.close()
-
-    warnings.warn(
-        'Copy is deprecated, please use the @copies decorator instead',
-        DeprecationWarning,
-        stacklevel=2
-    )
-    return CopyCls
-
-
-class CompositionTask(task.Task):
-    # Experimental support for composition task. This is useful if you have two tasks where
-    # X has a dependency on Y and X wants to invoke methods on Y. The problem with a normal
-    # requires() style dependency is that if X and Y are run in different processes then
-    # X can not access Y. To solve this, you can let X own a reference to an Y and have it
-    # run it as a part of its own run method.
-
-    def __init__(self, *args, **kwargs):
-        warnings.warn('CompositionTask is deprecated, please use the @delegates decorator instead', DeprecationWarning)
-        super(CompositionTask, self).__init__(*args, **kwargs)
-
-    def subtasks(self):
-        # This method can (optionally) define a couple of delegate tasks that
-        # will be accessible as interfaces, meaning that the task can access
-        # those tasks and run methods defined on them, etc
-        return []  # default impl
-
-    def deps(self):
-        # Overrides method in base class
-        return task.flatten(self.requires()) + task.flatten([t.deps() for t in task.flatten(self.subtasks())])
-
-    def run_subtasks(self):
-        for t in task.flatten(self.subtasks()):
-            t.run()
-
-    # Note that your run method must also initialize subtasks
-    # def run(self):
-    #    self.run_subtasks()
-    #    ...
-
-
-def deprecate_kwarg(old_name, new_name, kw_value):
-    """ Rename keyword arguments, but keep backwards compatibility.
-
-    Usage:
-
-    >>> @deprecate_kwarg('old', 'new', 'defval')
-    ... def some_func(old='defval'):
-    ...     print(old)
-    ...
-    >>> some_func(new='yay')
-    yay
-    >>> some_func(old='yaay')
-    yaay
-    >>> some_func()
-    defval
-
-    """
-    def real_decorator(function):
-        def new_function(*args, **kwargs):
-            value = kw_value
-            if old_name in kwargs:
-                warnings.warn('Keyword argument {0} is deprecated, use {1}'
-                              .format(old_name, new_name))
-                value = kwargs[old_name]
-            if new_name in kwargs:
-                value = kwargs[new_name]
-                del kwargs[new_name]
-            kwargs[old_name] = value
-            return function(*args, **kwargs)
-        return new_function
-    return real_decorator
-
-
 def previous(task):
-    """Return a previous Task of the same family.
+    """
+    Return a previous Task of the same family.
 
     By default checks if this task family only has one non-global parameter and if
     it is a DateParameter, DateHourParameter or DateIntervalParameter in which case
     it returns with the time decremented by 1 (hour, day or interval)
     """
-    params = task.get_nonglobal_params()
+    params = task.get_params()
     previous_params = {}
     previous_date_params = {}
 
@@ -329,6 +205,8 @@ def previous(task):
 
         if isinstance(param_obj, parameter.DateParameter):
             previous_date_params[param_name] = param_value - datetime.timedelta(days=1)
+        elif isinstance(param_obj, parameter.DateMinuteParameter):
+            previous_date_params[param_name] = param_value - datetime.timedelta(minutes=1)
         elif isinstance(param_obj, parameter.DateHourParameter):
             previous_date_params[param_name] = param_value - datetime.timedelta(hours=1)
         elif isinstance(param_obj, parameter.DateIntervalParameter):
@@ -348,9 +226,9 @@ def previous(task):
 
 def get_previous_completed(task, max_steps=10):
     prev = task
-    for i in xrange(max_steps):
+    for _ in xrange(max_steps):
         prev = previous(prev)
-        logger.debug("Checking if %s is complete" % prev.task_id)
+        logger.debug("Checking if %s is complete", prev.task_id)
         if prev.complete():
             return prev
     return None
diff --git a/luigi/webhdfs.py b/luigi/webhdfs.py
index 344b274013..9fb2ea817b 100644
--- a/luigi/webhdfs.py
+++ b/luigi/webhdfs.py
@@ -1,5 +1,20 @@
+# Copyright (c) 2015 Spotify AB
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
 import warnings
-warnings.warn("luigi.webhdfs module has been moved to luigi.contrib.webhdfs",
-              DeprecationWarning)
 
 from luigi.contrib.webhdfs import *
+
+warnings.warn("luigi.webhdfs module has been moved to luigi.contrib.webhdfs",
+              DeprecationWarning)
diff --git a/luigi/worker.py b/luigi/worker.py
index 74678bc907..9bbcba4186 100644
--- a/luigi/worker.py
+++ b/luigi/worker.py
@@ -12,26 +12,26 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import random
-from scheduler import (CentralPlannerScheduler, PENDING, RUNNING, FAILED,
-                       SUSPENDED, DONE, DISABLED)
 import collections
-import threading
-import time
+import getpass
+import logging
+import multiprocessing  # Note: this seems to have some stability issues: https://github.com/spotify/luigi/pull/438
 import os
+import Queue
+import random
 import socket
-import configuration
+import threading
+import time
 import traceback
-import logging
-import notifications
-import getpass
-import multiprocessing # Note: this seems to have some stability issues: https://github.com/spotify/luigi/pull/438
-import Queue
 import types
+
+import configuration
 import interface
+import notifications
+from event import Event
+from scheduler import DISABLED, DONE, FAILED, PENDING, RUNNING, SUSPENDED, CentralPlannerScheduler
 from target import Target
 from task import Task, flatten, getpaths
-from event import Event
 
 try:
     import simplejson as json
@@ -50,15 +50,21 @@ class TaskException(Exception):
 
 
 class TaskProcess(multiprocessing.Process):
-    ''' Wrap all task execution in this class.
+    """
+    Wrap all task execution in this class.
+
+    Mainly for convenience since this is run in a separate process.
+    """
 
-    Mainly for convenience since this is run in a separate process. '''
-    def __init__(self, task, worker_id, result_queue, random_seed=False):
+    def __init__(self, task, worker_id, result_queue, random_seed=False, worker_timeout=0):
         super(TaskProcess, self).__init__()
         self.task = task
         self.worker_id = worker_id
         self.result_queue = result_queue
         self.random_seed = random_seed
+        if task.worker_timeout is not None:
+            worker_timeout = task.worker_timeout
+        self.timeout_time = time.time() + worker_timeout if worker_timeout else None
 
     def run(self):
         logger.info('[pid %s] Worker %s running   %s', os.getpid(), self.worker_id, self.task.task_id)
@@ -96,13 +102,14 @@ def run(self):
                         new_req = flatten(requires)
                         status = (RUNNING if all(t.complete() for t in new_req)
                                   else SUSPENDED)
-                        new_deps = [(t.task_family, t.to_str_params())
+                        new_deps = [(t.task_module, t.task_family, t.to_str_params())
                                     for t in new_req]
                         if status == RUNNING:
                             self.result_queue.put(
                                 (self.task.task_id, status, '', missing,
                                  new_deps))
                             next_send = getpaths(requires)
+                            new_deps = []
                         else:
                             logger.info(
                                 '[pid %s] Worker %s new requirements      %s',
@@ -133,36 +140,47 @@ def run(self):
 
 
 class SingleProcessPool(object):
-    """ Dummy process pool for using a single processor
+    """
+    Dummy process pool for using a single processor.
 
-    Imitates the api of multiprocessing.Pool using single-processor equivalents
+    Imitates the api of multiprocessing.Pool using single-processor equivalents.
     """
 
     def apply_async(self, function, args):
-        return apply(function, args)
+        return function(*args)
 
 
 class DequeQueue(collections.deque):
-    """ deque wrapper implementing the Queue interface """
+    """
+    deque wrapper implementing the Queue interface.
+    """
 
     put = collections.deque.append
     get = collections.deque.pop
 
 
 class AsyncCompletionException(Exception):
-    """ Exception indicating that something went wrong with checking complete """
+    """
+    Exception indicating that something went wrong with checking complete.
+    """
+
     def __init__(self, trace):
         self.trace = trace
 
 
 class TracebackWrapper(object):
-    """ Class to wrap tracebacks so we can know they're not just strings """
+    """
+    Class to wrap tracebacks so we can know they're not just strings.
+    """
+
     def __init__(self, trace):
         self.trace = trace
 
 
 def check_complete(task, out_queue):
-    """ Checks if task is complete, puts the result to out_queue """
+    """
+    Checks if task is complete, puts the result to out_queue.
+    """
     logger.debug("Checking if %s is complete", task)
     try:
         is_complete = task.complete()
@@ -172,16 +190,19 @@ def check_complete(task, out_queue):
 
 
 class Worker(object):
-    """ Worker object communicates with a scheduler.
+    """
+    Worker object communicates with a scheduler.
 
     Simple class that talks to a scheduler and:
-    - Tells the scheduler what it has to do + its dependencies
-    - Asks for stuff to do (pulls it in a loop and runs it)
+
+    * tells the scheduler what it has to do + its dependencies
+    * asks for stuff to do (pulls it in a loop and runs it)
     """
 
     def __init__(self, scheduler=CentralPlannerScheduler(), worker_id=None,
                  worker_processes=1, ping_interval=None, keep_alive=None,
-                 wait_interval=None, max_reschedules=None, count_uniques=None):
+                 wait_interval=None, max_reschedules=None, count_uniques=None,
+                 worker_timeout=None):
         self.worker_processes = int(worker_processes)
         self._worker_info = self._generate_worker_info()
 
@@ -211,6 +232,10 @@ def __init__(self, scheduler=CentralPlannerScheduler(), worker_id=None,
             max_reschedules = config.getint('core', 'max-reschedules', 1)
         self.__max_reschedules = max_reschedules
 
+        if worker_timeout is None:
+            worker_timeout = configuration.get_config().getint('core', 'worker-timeout', 0)
+        self.__worker_timeout = worker_timeout
+
         self._id = worker_id
         self._scheduler = scheduler
 
@@ -225,7 +250,10 @@ def __init__(self, scheduler=CentralPlannerScheduler(), worker_id=None,
         self.unfulfilled_counts = collections.defaultdict(int)
 
         class KeepAliveThread(threading.Thread):
-            """ Periodically tell the scheduler that the worker still lives """
+            """
+            Periodically tell the scheduler that the worker still lives.
+            """
+
             def __init__(self):
                 super(KeepAliveThread, self).__init__()
                 self._should_stop = threading.Event()
@@ -256,13 +284,16 @@ def run(self):
         self._running_tasks = {}
 
     def stop(self):
-        """ Stop the KeepAliveThread associated with this Worker
-            This should be called whenever you are done with a worker instance to clean up
+        """
+        Stop the KeepAliveThread associated with this Worker.
+
+        This should be called whenever you are done with a worker instance to clean up.
 
         Warning: this should _only_ be performed if you are sure this worker
         is not performing any work or will perform any work after this has been called
 
         TODO: also kill all currently running tasks
+
         TODO (maybe): Worker should be/have a context manager to enforce calling this
             whenever you stop using a Worker instance
         """
@@ -307,24 +338,27 @@ def _log_complete_error(self, task, tb):
         logger.warning(log_msg)
 
     def _log_unexpected_error(self, task):
-        logger.exception("Luigi unexpected framework error while scheduling %s", task) # needs to be called from within except clause
+        logger.exception("Luigi unexpected framework error while scheduling %s", task)  # needs to be called from within except clause
 
     def _email_complete_error(self, task, formatted_traceback):
-          # like logger.exception but with WARNING level
+        # like logger.exception but with WARNING level
         formatted_traceback = notifications.wrap_traceback(formatted_traceback)
-        subject = "Luigi: {task} failed scheduling".format(task=task)
+        subject = "Luigi: {task} failed scheduling. Host: {host}".format(task=task, host=self.host)
         message = "Will not schedule {task} or any dependencies due to error in complete() method:\n{traceback}".format(task=task, traceback=formatted_traceback)
         notifications.send_error_email(subject, message)
 
     def _email_unexpected_error(self, task, formatted_traceback):
         formatted_traceback = notifications.wrap_traceback(formatted_traceback)
-        subject = "Luigi: Framework error while scheduling {task}".format(task=task)
+        subject = "Luigi: Framework error while scheduling {task}. Host: {host}".format(task=task, host=self.host)
         message = "Luigi framework error:\n{traceback}".format(traceback=formatted_traceback)
         notifications.send_error_email(subject, message)
 
     def add(self, task, multiprocess=False):
-        """ Add a Task for the worker to check and possibly schedule and run.
-         Returns True if task and its dependencies were successfully scheduled or completed before"""
+        """
+        Add a Task for the worker to check and possibly schedule and run.
+
+        Returns True if task and its dependencies were successfully scheduled or completed before.
+        """
         if self._first_task is None and hasattr(task, 'task_id'):
             self._first_task = task.task_id
         self.add_succeeded = True
@@ -409,7 +443,7 @@ def _add(self, task, is_complete):
             for d in deps:
                 self._validate_dependency(d)
                 task.trigger_event(Event.DEPENDENCY_DISCOVERED, task, d)
-                yield d # return additional tasks to add
+                yield d  # return additional tasks to add
 
             deps = [d.task_id for d in deps]
 
@@ -471,7 +505,8 @@ def _get_work(self):
     def _run_task(self, task_id):
         task = self._scheduled_tasks[task_id]
         p = TaskProcess(task, self._id, self._task_result_queue,
-                        random_seed=bool(self.worker_processes > 1))
+                        random_seed=bool(self.worker_processes > 1),
+                        worker_timeout=self.__worker_timeout)
         self._running_tasks[task_id] = p
 
         if self.worker_processes > 1:
@@ -485,22 +520,32 @@ def _run_task(self, task_id):
             p.run()
 
     def _purge_children(self):
-        ''' Find dead children and put a response on the result queue '''
+        """
+        Find dead children and put a response on the result queue.
+
+        :return:
+        """
         for task_id, p in self._running_tasks.iteritems():
             if not p.is_alive() and p.exitcode:
                 error_msg = 'Worker task %s died unexpectedly with exit code %s' % (task_id, p.exitcode)
-                logger.info(error_msg)
-                self._task_result_queue.put(
-                        (task_id, FAILED, error_msg, [], []))
+            elif p.timeout_time is not None and time.time() > p.timeout_time and p.is_alive():
+                p.terminate()
+                error_msg = 'Worker task %s timed out and was terminated.' % task_id
+            else:
+                continue
+
+            logger.info(error_msg)
+            self._task_result_queue.put((task_id, FAILED, error_msg, [], []))
 
     def _handle_next_task(self):
-        ''' We have to catch three ways a task can be "done"
-        1. Normal execution: the task runs/fails and puts a result back on the
-           queue
-        2. New dependencies: the task yielded new deps that were not complete
-           and will be rescheduled and dependencies added.
-        3. Child process dies: we need to catch this separately
-        '''
+        """
+        We have to catch three ways a task can be "done":
+
+        1. normal execution: the task runs/fails and puts a result back on the queue,
+        2. new dependencies: the task yielded new deps that were not complete and
+           will be rescheduled and dependencies added,
+        3. child process dies: we need to catch this separately.
+        """
         while True:
             self._purge_children()  # Deal with subprocess failures
 
@@ -518,8 +563,8 @@ def _handle_next_task(self):
                 # Maybe it yielded something?
             new_deps = []
             if new_requirements:
-                new_req = [interface.load_task(task, name, params)
-                           for name, params in new_requirements]
+                new_req = [interface.load_task(module, name, params)
+                           for module, name, params in new_requirements]
                 for t in new_req:
                     self.add(t)
                 new_deps = [t.task_id for t in new_req]
@@ -563,17 +608,22 @@ def _sleeper(self):
             yield
 
     def _keep_alive(self, n_pending_tasks, n_unique_pending):
-        """ Returns true if a worker should stay alive given
+        """
+        Returns true if a worker should stay alive given.
 
-        If worker-keep-alive is not set, this will always return false. Otherwise, it will return
-        true for nonzero n_pending_tasks. If worker-count-uniques is true, it will also
+        If worker-keep-alive is not set, this will always return false.
+        Otherwise, it will return true for nonzero n_pending_tasks.
+
+        If worker-count-uniques is true, it will also
         require that one of the tasks is unique to this worker.
         """
         return (self.__keep_alive and n_pending_tasks
                 and (n_unique_pending or not self.__count_uniques))
 
     def run(self):
-        """Returns True if all scheduled tasks were executed successfully"""
+        """
+        Returns True if all scheduled tasks were executed successfully.
+        """
         logger.info('Running Worker with %d processes', self.worker_processes)
 
         sleeper = self._sleeper()
diff --git a/scripts/ci/run_tests.sh b/scripts/ci/run_tests.sh
deleted file mode 100755
index 6a3d832e4b..0000000000
--- a/scripts/ci/run_tests.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/env bash
-
-if [ -z "$HADOOP_HOME" ]; then
-    echo "HADOOP_HOME not set - abort" >&2
-    exit 1
-fi
-
-echo "Using ${HADOOP_DISTRO} distribution of Hadoop from ${HADOOP_HOME}"
-
-python test/runtests.py -v
diff --git a/scripts/ci/setup_env.sh b/scripts/ci/setup_hadoop_env.sh
similarity index 100%
rename from scripts/ci/setup_env.sh
rename to scripts/ci/setup_hadoop_env.sh
diff --git a/setup.py b/setup.py
index 0a2fb0e3d5..7bb86b061c 100644
--- a/setup.py
+++ b/setup.py
@@ -40,13 +40,9 @@ def get_static_files(path):
     long_description = readme_note + fobj.read()
 
 install_requires = [
-    'boto',
     'pyparsing',
-    'requests',
-    'sqlalchemy',
     'tornado',
-    'whoops',
-    'snakebite>=2.4.10',
+    'snakebite>=2.5.0',
 ]
 
 if sys.version_info[:2] < (2, 7):
@@ -74,4 +70,15 @@ def get_static_files(path):
         'bin/luigi'
     ],
     install_requires=install_requires,
+    classifiers=[
+        'Development Status :: 5 - Production/Stable',
+        'Environment :: Console',
+        'Environment :: Web Environment',
+        'Intended Audience :: Developers',
+        'Intended Audience :: System Administrators',
+        'License :: OSI Approved :: Apache Software License',
+        'Programming Language :: Python :: 2.6',
+        'Programming Language :: Python :: 2.7',
+        'Topic :: System :: Monitoring',
+    ],
 )
diff --git a/test/_esindex_test.py b/test/_esindex_test.py
index 63472ca37a..195b78bb15 100644
--- a/test/_esindex_test.py
+++ b/test/_esindex_test.py
@@ -17,15 +17,15 @@
 """
 
 # pylint: disable=C0103,E1101,F0401
-from elasticsearch.connection import Urllib3HttpConnection
-from luigi.contrib.esindex import ElasticsearchTarget, CopyToIndex
 import collections
 import datetime
-import elasticsearch
-import luigi
 import os
 import unittest
 
+import elasticsearch
+import luigi
+from elasticsearch.connection import Urllib3HttpConnection
+from luigi.contrib.esindex import CopyToIndex, ElasticsearchTarget
 
 HOST = os.getenv('ESINDEX_TEST_HOST', 'localhost')
 PORT = os.getenv('ESINDEX_TEST_PORT', 9200)
@@ -50,7 +50,9 @@ def _create_test_index():
 
 
 class ElasticsearchTargetTest(unittest.TestCase):
+
     """ Test touch and exists. """
+
     def test_touch_and_exists(self):
         """ Basic test. """
         delete()
@@ -71,6 +73,7 @@ def delete():
 
 
 class CopyToTestIndex(CopyToIndex):
+
     """ Override the default `marker_index` table with a test name. """
     host = HOST
     port = PORT
@@ -89,23 +92,27 @@ def output(self):
             doc_type=self.doc_type,
             update_id=self.update_id(),
             marker_index_hist_size=self.marker_index_hist_size
-         )
+        )
         target.marker_index = MARKER_INDEX
         target.marker_doc_type = MARKER_DOC_TYPE
         return target
 
 
 class IndexingTask1(CopyToTestIndex):
+
     """ Test the redundant version, where `_index` and `_type` are
     given in the `docs` as well. A more DRY example is `IndexingTask2`. """
+
     def docs(self):
         """ Return a list with a single doc. """
         return [{'_id': 123, '_index': self.index, '_type': self.doc_type,
-                'name': 'sample', 'date': 'today'}]
+                 'name': 'sample', 'date': 'today'}]
 
 
 class IndexingTask2(CopyToTestIndex):
+
     """ Just another task. """
+
     def docs(self):
         """ Return a list with a single doc. """
         return [{'_id': 234, '_index': self.index, '_type': self.doc_type,
@@ -113,8 +120,10 @@ def docs(self):
 
 
 class IndexingTask3(CopyToTestIndex):
+
     """ This task will request an empty index to start with. """
     purge_existing_index = True
+
     def docs(self):
         """ Return a list with a single doc. """
         return [{'_id': 234, '_index': self.index, '_type': self.doc_type,
@@ -131,6 +140,7 @@ def _cleanup():
 
 
 class CopyToIndexTest(unittest.TestCase):
+
     """ Test indexing tasks. """
 
     @classmethod
@@ -156,7 +166,7 @@ def test_copy_to_index(self):
         self.assertEqual(1, self.es.count(index=task.index).get('count'))
         self.assertEqual({u'date': u'today', u'name': u'sample'},
                          self.es.get_source(index=task.index,
-                                        doc_type=task.doc_type, id=123))
+                                            doc_type=task.doc_type, id=123))
 
     def test_copy_to_index_incrementally(self):
         """ Test two tasks that upload docs into the same index. """
@@ -176,11 +186,11 @@ def test_copy_to_index_incrementally(self):
 
         self.assertEqual({u'date': u'today', u'name': u'sample'},
                          self.es.get_source(index=task1.index,
-                                        doc_type=task1.doc_type, id=123))
+                                            doc_type=task1.doc_type, id=123))
 
         self.assertEqual({u'date': u'today', u'name': u'another'},
                          self.es.get_source(index=task2.index,
-                                        doc_type=task2.doc_type, id=234))
+                                            doc_type=task2.doc_type, id=234))
 
     def test_copy_to_index_purge_existing(self):
         """ Test purge_existing_index purges index. """
@@ -195,7 +205,7 @@ def test_copy_to_index_purge_existing(self):
 
         self.assertEqual({u'date': u'today', u'name': u'yet another'},
                          self.es.get_source(index=task3.index,
-                                        doc_type=task3.doc_type, id=234))
+                                            doc_type=task3.doc_type, id=234))
 
 
 class MarkerIndexTest(unittest.TestCase):
@@ -215,17 +225,17 @@ def tearDown(self):
     def test_update_marker(self):
         with self.assertRaises(elasticsearch.NotFoundError):
             result = self.es.count(index=MARKER_INDEX, doc_type=MARKER_DOC_TYPE,
-                              body={'query': {'match_all': {}}})
+                                   body={'query': {'match_all': {}}})
 
         task1 = IndexingTask1()
         luigi.build([task1], local_scheduler=True)
 
         result = self.es.count(index=MARKER_INDEX, doc_type=MARKER_DOC_TYPE,
-                           body={'query': {'match_all': {}}})
+                               body={'query': {'match_all': {}}})
         self.assertEqual(1, result.get('count'))
 
         result = self.es.search(index=MARKER_INDEX, doc_type=MARKER_DOC_TYPE,
-                           body={'query': {'match_all': {}}})
+                                body={'query': {'match_all': {}}})
         marker_doc = result.get('hits').get('hits')[0].get('_source')
         self.assertEqual('IndexingTask1()', marker_doc.get('update_id'))
         self.assertEqual(INDEX, marker_doc.get('target_index'))
@@ -236,12 +246,11 @@ def test_update_marker(self):
         luigi.build([task2], local_scheduler=True)
 
         result = self.es.count(index=MARKER_INDEX, doc_type=MARKER_DOC_TYPE,
-                           body={'query': {'match_all': {}}})
+                               body={'query': {'match_all': {}}})
         self.assertEqual(2, result.get('count'))
 
-
         result = self.es.search(index=MARKER_INDEX, doc_type=MARKER_DOC_TYPE,
-                           body={'query': {'match_all': {}}})
+                                body={'query': {'match_all': {}}})
         hits = result.get('hits').get('hits')
         Entry = collections.namedtuple('Entry', ['date', 'update_id'])
         dates_update_id = []
@@ -260,6 +269,7 @@ def test_update_marker(self):
 
 
 class IndexingTask4(CopyToTestIndex):
+
     """ Just another task. """
     date = luigi.DateParameter(default=datetime.date(1970, 1, 1))
     marker_index_hist_size = 1
@@ -269,6 +279,7 @@ def docs(self):
         return [{'_id': 234, '_index': self.index, '_type': self.doc_type,
                  'name': 'another', 'date': 'today'}]
 
+
 class IndexHistSizeTest(unittest.TestCase):
 
     @classmethod
@@ -295,10 +306,10 @@ def test_limited_history(self):
         luigi.build([task4_3], local_scheduler=True)
 
         result = self.es.count(index=MARKER_INDEX, doc_type=MARKER_DOC_TYPE,
-                          body={'query': {'match_all': {}}})
+                               body={'query': {'match_all': {}}})
         self.assertEqual(1, result.get('count'))
         marker_index_document_id = task4_3.output().marker_index_document_id()
         result = self.es.get(id=marker_index_document_id, index=MARKER_INDEX,
-                        doc_type=MARKER_DOC_TYPE)
+                             doc_type=MARKER_DOC_TYPE)
         self.assertEqual('IndexingTask4(date=2002-01-01)',
-                          result.get('_source').get('update_id'))
+                         result.get('_source').get('update_id'))
diff --git a/test/_hive_test.py b/test/_hive_test.py
index bcaa137a03..4128733efa 100644
--- a/test/_hive_test.py
+++ b/test/_hive_test.py
@@ -1,8 +1,10 @@
 from unittest import TestCase
+
 from luigi import hive
 
 
 class TestHiveTask(TestCase):
+
     def test_error_cmd(self):
         self.assertRaises(hive.HiveCommandError, hive.run_hive_cmd, "this is a bogus command and should cause an error;")
 
diff --git a/test/_mysqldb_test.py b/test/_mysqldb_test.py
index a1102584a1..4d0bf04536 100644
--- a/test/_mysqldb_test.py
+++ b/test/_mysqldb_test.py
@@ -1,6 +1,7 @@
+import unittest
+
 import mysql.connector
 from luigi.contrib.mysqldb import MySqlTarget
-import unittest
 
 host = 'localhost'
 port = 3306
@@ -24,6 +25,7 @@ def _create_test_database():
 
 
 class MySqlTargetTest(unittest.TestCase):
+
     def test_touch_and_exists(self):
         drop()
         self.assertFalse(target.exists(),
diff --git a/test/_postgres_test.py b/test/_postgres_test.py
index f551158f5b..f19cc8288d 100644
--- a/test/_postgres_test.py
+++ b/test/_postgres_test.py
@@ -1,7 +1,9 @@
 from unittest import TestCase
+
 import luigi
-from luigi import postgres
 import luigi.notifications
+from luigi import postgres
+
 luigi.notifications.DEBUG = True
 luigi.namespace('postgres_test')
 
@@ -17,6 +19,8 @@
 """
 
 # to avoid copying:
+
+
 class CopyToTestDB(postgres.CopyToTable):
     host = 'localhost'
     database = 'spotify'
@@ -29,11 +33,11 @@ class TestPostgresTask(CopyToTestDB):
     columns = (('test_text', 'text'),
                ('test_int', 'int'),
                ('test_float', 'float'))
-    
+
     def create_table(self, connection):
         connection.cursor().execute(
             "CREATE TABLE {table} (id SERIAL PRIMARY KEY, test_text TEXT, test_int INT, test_float FLOAT)"
-        .format(table=self.table))
+            .format(table=self.table))
 
     def rows(self):
         yield 'foo', 123, 123.45
@@ -41,12 +45,11 @@ def rows(self):
         yield '\t\n\r\\N', 0, 0
 
 
-
 class MetricBase(CopyToTestDB):
     table = 'metrics'
     columns = [('metric', 'text'),
                ('value', 'int')
-              ]
+               ]
 
 
 class Metric1(MetricBase):
@@ -57,6 +60,7 @@ def rows(self):
         yield 'metric1', 2
         yield 'metric1', 3
 
+
 class Metric2(MetricBase):
     param = luigi.Parameter()
 
@@ -67,12 +71,13 @@ def rows(self):
 
 
 class TestPostgresImportTask(TestCase):
+
     def test_default_escape(self):
         self.assertEqual(postgres.default_escape('foo'), 'foo')
         self.assertEqual(postgres.default_escape('\n'), '\\n')
         self.assertEqual(postgres.default_escape('\\\n'), '\\\\\\n')
         self.assertEqual(postgres.default_escape('\n\r\\\t\\N\\'),
-                                                 '\\n\\r\\\\\\t\\\\N\\\\')
+                         '\\n\\r\\\\\\t\\\\N\\\\')
 
     def test_repeat(self):
         task = TestPostgresTask()
@@ -83,7 +88,7 @@ def test_repeat(self):
         cursor.execute('DROP TABLE IF EXISTS {marker_table}'.format(marker_table=postgres.PostgresTarget.marker_table))
 
         luigi.build([task], local_scheduler=True)
-        luigi.build([task], local_scheduler=True) # try to schedule twice
+        luigi.build([task], local_scheduler=True)  # try to schedule twice
 
         cursor.execute("""SELECT test_text, test_int, test_float
                           FROM test_table
@@ -111,6 +116,7 @@ def test_multimetric(self):
 
     def test_clear(self):
         class Metric2Copy(Metric2):
+
             def init_copy(self, connection):
                 query = "TRUNCATE {0}".format(self.table)
                 connection.cursor().execute(query)
diff --git a/test/_s3_test.py b/test/_s3_test.py
index 045a19630c..2291d9b687 100644
--- a/test/_s3_test.py
+++ b/test/_s3_test.py
@@ -13,18 +13,16 @@
 # the License.
 
 import gc
-import tempfile
 import os
 import sys
+import tempfile
 import unittest
 
-from luigi import configuration
-from luigi.s3 import (S3Target, S3Client, InvalidDeleteException,
-                      FileNotFoundException)
 import luigi.format
-
-from boto.s3 import key
 from boto.exception import S3ResponseError
+from boto.s3 import key
+from luigi import configuration
+from luigi.s3 import FileNotFoundException, InvalidDeleteException, S3Client, S3Target
 
 # moto does not yet work with
 # python 2.6. Until it does,
@@ -187,7 +185,7 @@ def setUp(self):
             self._s3_config_path = f.name
             f.write('[s3]\n{}\n'.format(
                 '\n'.join(['{}: {}'.format(k, v)
-                           for k, v in self.s3_config.items()])))
+                           for k, v in self.s3_config.iteritems()])))
         self._old_config_paths = configuration.LuigiConfigParser._config_paths
         configuration.LuigiConfigParser._config_paths = self._s3_config_path
 
@@ -237,7 +235,7 @@ def test_put_multipart_multiple_parts_non_exact_fit(self):
         Test a multipart put with two parts, where the parts are not exactly the split size.
         """
         # 5MB is minimum part size
-        part_size = (1024**2)*5
+        part_size = (1024 ** 2) * 5
         file_size = (part_size * 2) - 5000
         self._run_multipart_test(part_size, file_size)
 
@@ -247,7 +245,7 @@ def test_put_multipart_multiple_parts_exact_fit(self):
         Test a multipart put with multiple parts, where the parts are exactly the split size.
         """
         # 5MB is minimum part size
-        part_size = (1024**2)*5
+        part_size = (1024 ** 2) * 5
         file_size = part_size * 2
         self._run_multipart_test(part_size, file_size)
 
@@ -257,7 +255,7 @@ def test_put_multipart_less_than_split_size(self):
         Test a multipart put with a file smaller than split size; should revert to regular put.
         """
         # 5MB is minimum part size
-        part_size = (1024**2)*5
+        part_size = (1024 ** 2) * 5
         file_size = 5000
         self._run_multipart_test(part_size, file_size)
 
@@ -267,7 +265,7 @@ def test_put_multipart_empty_file(self):
         Test a multipart put with an empty file.
         """
         # 5MB is minimum part size
-        part_size = (1024**2)*5
+        part_size = (1024 ** 2) * 5
         file_size = 0
         self._run_multipart_test(part_size, file_size)
 
@@ -343,7 +341,7 @@ def test_remove(self):
 
     def _run_multipart_test(self, part_size, file_size):
         file_contents = "a" * file_size
-        
+
         s3_path = 's3://mybucket/putMe'
         tmp_file = tempfile.NamedTemporaryFile(mode='wb', delete=True)
         tmp_file_path = tmp_file.name
@@ -354,7 +352,7 @@ def _run_multipart_test(self, part_size, file_size):
         s3_client.s3.create_bucket('mybucket')
         s3_client.put_multipart(tmp_file_path, s3_path, part_size=part_size)
         self.assertTrue(s3_client.exists(s3_path))
-        # b/c of https://github.com/spulec/moto/issues/131 have to 
+        # b/c of https://github.com/spulec/moto/issues/131 have to
         # get contents to check size
         key_contents = s3_client.get_key(s3_path).get_contents_as_string()
         self.assertEqual(len(file_contents), len(key_contents))
diff --git a/test/_test_ftp.py b/test/_test_ftp.py
index 0bbf33b077..9529af0994 100644
--- a/test/_test_ftp.py
+++ b/test/_test_ftp.py
@@ -12,13 +12,15 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import os
-from luigi.contrib.ftp import RemoteFileSystem, RemoteTarget
-
+import datetime
 import ftplib
+import os
+import time
 import unittest
 from cStringIO import StringIO
 
+from luigi.contrib.ftp import RemoteFileSystem, RemoteTarget
+
 # dumb files
 FILE1 = """this is file1"""
 FILE2 = """this is file2"""
@@ -28,7 +30,9 @@
 USER = "luigi"
 PWD = "some_password"
 
+
 class TestFTPFilesystem(unittest.TestCase):
+
     def setUp(self):
         """ Creates structure
 
@@ -81,6 +85,7 @@ def test_recursive_remove(self):
 
 
 class TestFTPFilesystemUpload(unittest.TestCase):
+
     def test_single(self):
         """ Test upload file with creation of intermediate folders """
         ftp_path = "/test/nest/luigi-test"
@@ -110,6 +115,7 @@ def test_single(self):
 
 
 class TestRemoteTarget(unittest.TestCase):
+
     def test_put(self):
         """ Test RemoteTarget put method with uploading to an FTP """
         local_filepath = "/tmp/luigi-remotetarget-write-test"
@@ -160,6 +166,15 @@ def test_get(self):
         # file is successfuly created
         self.assertTrue(os.path.exists(local_filepath))
 
+        # test RemoteTarget with mtime
+        ts = datetime.datetime.now() - datetime.timedelta(minutes=2)
+        delayed_remotetarget = RemoteTarget(remote_file, HOST, username=USER, password=PWD, mtime=ts)
+        self.assertTrue(delayed_remotetarget.exists())
+
+        ts = datetime.datetime.now() + datetime.timedelta(minutes=2)
+        delayed_remotetarget = RemoteTarget(remote_file, HOST, username=USER, password=PWD, mtime=ts)
+        self.assertFalse(delayed_remotetarget.exists())
+
         # clean
         os.remove(local_filepath)
         os.remove(tmp_filepath)
@@ -167,4 +182,4 @@ def test_get(self):
         ftp.delete(remote_file)
         ftp.cwd("/")
         ftp.rmd("test")
-        ftp.close()
\ No newline at end of file
+        ftp.close()
diff --git a/test/_test_ssh.py b/test/_test_ssh.py
index 40843ffc04..7bfc6c0961 100644
--- a/test/_test_ssh.py
+++ b/test/_test_ssh.py
@@ -3,12 +3,12 @@
 import gzip
 import os
 import random
-import luigi.format
+import socket
+import subprocess
+import unittest
 
+import luigi.format
 from luigi.contrib.ssh import RemoteContext, RemoteTarget
-import unittest
-import subprocess
-import socket
 
 working_ssh_host = None  # set this to a working ssh host string (e.g. "localhost") to activate integration tests
 # The following tests require a working ssh server at `working_ssh_host`
@@ -31,6 +31,7 @@
 
 
 class TestRemoteContext(unittest.TestCase):
+
     def setUp(self):
         self.context = RemoteContext(working_ssh_host)
 
@@ -71,9 +72,11 @@ def test_tunnel(self):
 
 
 class TestRemoteTarget(unittest.TestCase):
+
     """ These tests assume RemoteContext working
     in order for setUp and tearDown to work
     """
+
     def setUp(self):
         self.ctx = RemoteContext(working_ssh_host)
         self.filepath = "/tmp/luigi_remote_test.dat"
@@ -123,7 +126,7 @@ class TestRemoteTargetAtomicity(unittest.TestCase):
     def _exists(self, path):
         try:
             self.ctx.check_output(["test", "-e", path])
-        except subprocess.CalledProcessError, e:
+        except subprocess.CalledProcessError as e:
             if e.returncode == 1:
                 return False
             else:
diff --git a/test/central_planner_test.py b/test/central_planner_test.py
index 2fa7fe8f47..090ea16f44 100644
--- a/test/central_planner_test.py
+++ b/test/central_planner_test.py
@@ -13,14 +13,17 @@
 # the License.
 
 import time
-from luigi.scheduler import CentralPlannerScheduler, DONE, FAILED, DISABLED
 import unittest
+
 import luigi.notifications
+from luigi.scheduler import DISABLED, DONE, FAILED, CentralPlannerScheduler
+
 luigi.notifications.DEBUG = True
 WORKER = 'myworker'
 
 
 class CentralPlannerTest(unittest.TestCase):
+
     def setUp(self):
         self.sch = CentralPlannerScheduler(retry_delay=100,
                                            remove_delay=1000,
@@ -480,6 +483,28 @@ def test_disable_by_worker(self):
         self.sch.add_task(WORKER, 'A')
         self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')
 
+    def test_task_list_beyond_limit(self):
+        sch = CentralPlannerScheduler(max_shown_tasks=3)
+        for c in 'ABCD':
+            sch.add_task(WORKER, c)
+        self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '', False).keys()))
+        self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', ''))
+
+    def test_task_list_within_limit(self):
+        sch = CentralPlannerScheduler(max_shown_tasks=4)
+        for c in 'ABCD':
+            sch.add_task(WORKER, c)
+        self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '').keys()))
+
+    def test_task_lists_some_beyond_limit(self):
+        sch = CentralPlannerScheduler(max_shown_tasks=3)
+        for c in 'ABCD':
+            sch.add_task(WORKER, c, 'DONE')
+        for c in 'EFG':
+            sch.add_task(WORKER, c)
+        self.assertEqual(set('EFG'), set(sch.task_list('PENDING', '').keys()))
+        self.assertEqual({'num_tasks': 4}, sch.task_list('DONE', ''))
+
     def test_priority_update_dependency_chain(self):
         self.sch.add_task(WORKER, 'A', priority=10, deps=['B'])
         self.sch.add_task(WORKER, 'B', priority=5, deps=['C'])
diff --git a/test/clone_test.py b/test/clone_test.py
index 72d52cd9a9..eab52d164b 100644
--- a/test/clone_test.py
+++ b/test/clone_test.py
@@ -13,27 +13,30 @@
 # the License.
 
 import unittest
+
 import luigi
 import luigi.notifications
+
 luigi.notifications.DEBUG = True
 
+
 class LinearSum(luigi.Task):
     lo = luigi.IntParameter()
     hi = luigi.IntParameter()
 
     def requires(self):
         if self.hi > self.lo:
-            return self.clone(hi=self.hi-1)
+            return self.clone(hi=self.hi - 1)
 
     def run(self):
         if self.hi > self.lo:
             self.s = self.requires().s + self.f(self.hi - 1)
         else:
             self.s = 0
-        self.complete = lambda: True # workaround since we don't write any output
+        self.complete = lambda: True  # workaround since we don't write any output
 
     complete = lambda self: False
-    
+
     def f(self, x):
         return x
 
@@ -50,6 +53,7 @@ class PowerSum2(PowerSum):
 
 
 class CloneTest(unittest.TestCase):
+
     def test_args(self):
         t = LinearSum(lo=42, hi=45)
         self.assertEqual(t.param_args, (42, 45))
@@ -63,9 +67,9 @@ def test_recursion(self):
     def test_inheritance(self):
         t = PowerSum(lo=42, hi=45, p=2)
         luigi.build([t], local_scheduler=True)
-        self.assertEqual(t.s, 42**2 + 43**2 + 44**2)
+        self.assertEqual(t.s, 42 ** 2 + 43 ** 2 + 44 ** 2)
 
     def test_inheritance_and_global(self):
         t = PowerSum2(lo=42, hi=45, p=2)
         luigi.build([t], local_scheduler=True)
-        self.assertEqual(t.s, 42**2 + 43**2 + 44**2)
+        self.assertEqual(t.s, 42 ** 2 + 43 ** 2 + 44 ** 2)
diff --git a/test/cmdline_test.py b/test/cmdline_test.py
index daa15a93cd..1619735c44 100644
--- a/test/cmdline_test.py
+++ b/test/cmdline_test.py
@@ -14,13 +14,15 @@
 
 import ConfigParser
 import logging
-import luigi
-from luigi.mock import MockFile
-import mock
+import os
+import subprocess
 import unittest
 import warnings
-import subprocess
-import os
+
+import luigi
+import mock
+from luigi.mock import MockFile
+
 
 class SomeTask(luigi.Task):
     n = luigi.IntParameter()
@@ -47,23 +49,27 @@ class NonAmbiguousClass(luigi.ExternalTask):
 
 
 class NonAmbiguousClass(luigi.Task):
+
     def run(self):
         NonAmbiguousClass.has_run = True
 
 
 class TaskWithSameName(luigi.Task):
+
     def run(self):
         self.x = 42
 
 
 class TaskWithSameName(luigi.Task):
     # there should be no ambiguity
+
     def run(self):
         self.x = 43
 
 
 class WriteToFile(luigi.Task):
     filename = luigi.Parameter()
+
     def output(self):
         return luigi.LocalTarget(self.filename)
 
@@ -74,6 +80,7 @@ def run(self):
 
 
 class CmdlineTest(unittest.TestCase):
+
     def setUp(self):
         global File
         File = MockFile
@@ -113,7 +120,7 @@ def test_setup_interface_logging(self, handler, logger):
     @mock.patch("warnings.warn")
     @mock.patch("luigi.interface.setup_interface_logging")
     def test_cmdline_logger(self, setup_mock, warn):
-        with mock.patch("luigi.interface.EnvironmentParamsContainer.env_params") as env_params:
+        with mock.patch("luigi.interface.EnvironmentParamsContainer") as env_params:
             env_params.return_value.logging_conf_file = None
             luigi.run(['SomeTask', '--n', '7', '--local-scheduler', '--no-lock'])
             self.assertEqual([mock.call(None)], setup_mock.call_args_list)
@@ -138,5 +145,9 @@ def test_bin_luigi(self):
         subprocess.check_call(cmd, env=env, stderr=subprocess.STDOUT)
         self.assertTrue(t.exists())
 
+    @mock.patch('argparse.ArgumentParser.print_usage')
+    def test_no_task(self, print_usage):
+        self.assertRaises(SystemExit, luigi.run, ['--local-scheduler', '--no-lock'])
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/contrib/_webhdfs_test.py b/test/contrib/_webhdfs_test.py
index 210cde43cc..d0a4a067eb 100644
--- a/test/contrib/_webhdfs_test.py
+++ b/test/contrib/_webhdfs_test.py
@@ -1,5 +1,5 @@
-import os
 import datetime
+import os
 import posixpath
 import time
 import unittest
@@ -8,6 +8,7 @@
 
 
 class TestWebHdfsTarget(unittest.TestCase):
+
     '''
     This test requires a running Hadoop cluster with WebHdfs enabled
     This test requires the client.cfg file to have a `hdfs` section
diff --git a/test/contrib/pig_test.py b/test/contrib/pig_test.py
new file mode 100644
index 0000000000..0710fc5e2f
--- /dev/null
+++ b/test/contrib/pig_test.py
@@ -0,0 +1,157 @@
+import StringIO
+import subprocess
+import tempfile
+import unittest
+
+import luigi
+from helpers import with_config
+from luigi.contrib.pig import PigJobError, PigJobTask
+from mock import patch
+
+
+class SimpleTestJob(PigJobTask):
+    def output(self):
+        return luigi.LocalTarget('simple-output')
+
+    def pig_script_path(self):
+        return "my_simple_pig_script.pig"
+
+
+class ComplexTestJob(PigJobTask):
+    def output(self):
+        return luigi.LocalTarget('complex-output')
+
+    def pig_script_path(self):
+        return "my_complex_pig_script.pig"
+
+    def pig_env_vars(self):
+        return {'PIG_CLASSPATH': '/your/path'}
+
+    def pig_properties(self):
+        return {'pig.additional.jars': '/path/to/your/jar'}
+
+    def pig_parameters(self):
+        return {'YOUR_PARAM_NAME': 'Your param value'}
+
+    def pig_options(self):
+        return ['-x', 'local']
+
+
+class SimplePigTest(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    @patch('subprocess.Popen')
+    def test_run__success(self, mock):
+        arglist_result = []
+        p = subprocess.Popen
+        subprocess.Popen = _get_fake_Popen(arglist_result, 0)
+        try:
+            job = SimpleTestJob()
+            job.run()
+            self.assertEqual([['/usr/share/pig/bin/pig', '-f', 'my_simple_pig_script.pig']], arglist_result)
+        finally:
+            subprocess.Popen = p
+
+    @patch('subprocess.Popen')
+    def test_run__fail(self, mock):
+        arglist_result = []
+        p = subprocess.Popen
+        subprocess.Popen = _get_fake_Popen(arglist_result, 1)
+        try:
+            job = SimpleTestJob()
+            job.run()
+            self.assertEqual([['/usr/share/pig/bin/pig', '-f', 'my_simple_pig_script.pig']], arglist_result)
+        except PigJobError as p:
+            self.assertEqual('stderr', p.err)
+        else:
+            self.fail("Should have thrown PigJobError")
+        finally:
+            subprocess.Popen = p
+
+
+class ComplexPigTest(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    @patch('subprocess.Popen')
+    def test_run__success(self, mock):
+        arglist_result = []
+        p = subprocess.Popen
+        subprocess.Popen = _get_fake_Popen(arglist_result, 0)
+        try:
+            job = ComplexTestJob()
+            job.run()
+            self.assertEqual([['/usr/share/pig/bin/pig', '-x', 'local', '-p', 'YOUR_PARAM_NAME=Your param value', '-propertyFile', 'pig_property_file', '-f', 'my_complex_pig_script.pig']], arglist_result)
+
+            # Check property file
+            with open('pig_property_file') as pprops_file:
+                pprops = pprops_file.readlines()
+                self.assertEqual(1, len(pprops))
+                self.assertEqual('pig.additional.jars=/path/to/your/jar\n', pprops[0])
+        finally:
+            subprocess.Popen = p
+
+    @patch('subprocess.Popen')
+    def test_run__fail(self, mock):
+        arglist_result = []
+        p = subprocess.Popen
+        subprocess.Popen = _get_fake_Popen(arglist_result, 1)
+        try:
+            job = ComplexTestJob()
+            job.run()
+        except PigJobError as p:
+            self.assertEqual('stderr', p.err)
+            self.assertEqual([['/usr/share/pig/bin/pig', '-x', 'local', '-p', 'YOUR_PARAM_NAME=Your param value', '-propertyFile', 'pig_property_file', '-f', 'my_complex_pig_script.pig']], arglist_result)
+
+            # Check property file
+            with open('pig_property_file') as pprops_file:
+                pprops = pprops_file.readlines()
+                self.assertEqual(1, len(pprops))
+                self.assertEqual('pig.additional.jars=/path/to/your/jar\n', pprops[0])
+        else:
+            self.fail("Should have thrown PigJobError")
+        finally:
+            subprocess.Popen = p
+
+
+def _get_fake_Popen(arglist_result, return_code, *args, **kwargs):
+    def Popen_fake(arglist, shell=None, stdout=None, stderr=None, env=None, close_fds=True):
+        arglist_result.append(arglist)
+
+        class P(object):
+
+            def wait(self):
+                pass
+
+            def poll(self):
+                return 0
+
+            def communicate(self):
+                return 'end'
+
+            def env(self):
+                return self.env
+
+        p = P()
+        p.returncode = return_code
+
+        p.stderr = tempfile.TemporaryFile()
+        p.stdout = tempfile.TemporaryFile()
+
+        p.stdout.write('stdout')
+        p.stderr.write('stderr')
+
+        # Reset temp files so the output can be read.
+        p.stdout.seek(0)
+        p.stderr.seek(0)
+
+        return p
+
+    return Popen_fake
diff --git a/test/contrib/sqla_test.py b/test/contrib/sqla_test.py
new file mode 100644
index 0000000000..6c836c80da
--- /dev/null
+++ b/test/contrib/sqla_test.py
@@ -0,0 +1,262 @@
+# Copyright (c) 2015 Gouthaman Balaraman
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+"""
+This file implements unit test cases for luigi/contrib/sqla.py
+Author: Gouthaman Balaraman
+Date: 01/02/2015
+"""
+import os
+import shutil
+import tempfile
+import unittest
+
+import luigi
+import sqlalchemy
+from luigi.contrib import sqla
+from luigi.mock import MockFile
+
+#################################
+# Globals part of the test case #
+#################################
+TEMPDIR = tempfile.mkdtemp()
+SQLITEPATH = os.path.join(TEMPDIR, "sqlatest.db")
+CONNECTION_STRING = "sqlite:///%s" % SQLITEPATH
+TASK_LIST = ["item%d\tproperty%d\n" % (i, i) for i in range(10)]
+
+
+class BaseTask(luigi.Task):
+
+    def output(self):
+        return MockFile("BaseTask", mirror_on_stderr=True)
+
+    def run(self):
+        out = self.output().open("w")
+        for task in TASK_LIST:
+            out.write(task)
+        out.close()
+
+
+class SQLATask(sqla.CopyToTable):
+    columns = [
+        (["item", sqlalchemy.String(64)], {}),
+        (["property", sqlalchemy.String(64)], {})
+    ]
+    connection_string = CONNECTION_STRING
+    table = "item_property"
+
+    def requires(self):
+        return BaseTask()
+
+
+class TestSQLA(unittest.TestCase):
+
+    def _clear_tables(self):
+        meta = sqlalchemy.MetaData()
+        meta.reflect(bind=self.engine)
+        for table in reversed(meta.sorted_tables):
+            self.engine.execute(table.delete())
+
+    def setUp(self):
+        if not os.path.exists(TEMPDIR):
+            os.mkdir(TEMPDIR)
+        self.engine = sqlalchemy.create_engine(CONNECTION_STRING)
+
+    def tearDown(self):
+        self._clear_tables()
+        if os.path.exists(TEMPDIR):
+            shutil.rmtree(TEMPDIR)
+
+    def test_create_table(self):
+        """
+        Test that this method creates table that we require
+        :return:
+        """
+        class TestSQLData(sqla.CopyToTable):
+            connection_string = CONNECTION_STRING
+            table = "test_table"
+            columns = [
+                (["id", sqlalchemy.Integer], dict(primary_key=True)),
+                (["name", sqlalchemy.String(64)], {}),
+                (["value", sqlalchemy.String(64)], {})
+            ]
+
+            def output(self):
+                pass
+
+        sql_copy = TestSQLData()
+        eng = sqlalchemy.create_engine(TestSQLData.connection_string)
+        self.assertFalse(eng.dialect.has_table(eng.connect(), TestSQLData.table))
+        sql_copy.create_table(eng)
+        self.assertTrue(eng.dialect.has_table(eng.connect(), TestSQLData.table))
+        # repeat and ensure it just binds to existing table
+        sql_copy.create_table(eng)
+
+    def test_create_table_raises_no_columns(self):
+        """
+        Check that the test fails when the columns are not set
+        :return:
+        """
+        class TestSQLData(sqla.CopyToTable):
+            connection_string = CONNECTION_STRING
+            table = "test_table"
+            columns = []
+
+        def output(self):
+            pass
+
+        sql_copy = TestSQLData()
+        eng = sqlalchemy.create_engine(TestSQLData.connection_string)
+        self.assertRaises(NotImplementedError, sql_copy.create_table, eng)
+
+    def _check_entries(self, engine):
+        with engine.begin() as conn:
+            meta = sqlalchemy.MetaData()
+            meta.reflect(bind=engine)
+            self.assertSetEqual(set([u'table_updates', u'item_property']), set(meta.tables.keys()))
+            table = meta.tables[SQLATask.table]
+            s = sqlalchemy.select([sqlalchemy.func.count(table.c.item)])
+            result = conn.execute(s).fetchone()
+            self.assertEqual(len(TASK_LIST), result[0])
+            s = sqlalchemy.select([table]).order_by(table.c.item)
+            result = conn.execute(s).fetchall()
+            for i in range(len(TASK_LIST)):
+                given = TASK_LIST[i].strip("\n").split("\t")
+                given = (unicode(given[0]), unicode(given[1]))
+                self.assertTupleEqual(given, tuple(result[i]))
+
+    def test_rows(self):
+        task, task0 = SQLATask(), BaseTask()
+        luigi.build([task, task0], local_scheduler=True)
+
+        for i, row in enumerate(task.rows()):
+            given = TASK_LIST[i].strip("\n").split("\t")
+            self.assertListEqual(row, given)
+
+    def test_run(self):
+        """
+        Checking that the runs go as expected. Rerunning the same shouldn't end up
+        inserting more rows into the db.
+        :return:
+        """
+        task, task0 = SQLATask(), BaseTask()
+        self.engine = sqlalchemy.create_engine(task.connection_string)
+        luigi.build([task0, task], local_scheduler=True)
+        self._check_entries(self.engine)
+
+        # rerun and the num entries should be the same
+        luigi.build([task0, task], local_scheduler=True)
+        self._check_entries(self.engine)
+
+    def test_run_with_chunk_size(self):
+        """
+        The chunk_size can be specified in order to control the batch size for inserts.
+        :return:
+        """
+        task, task0 = SQLATask(), BaseTask()
+        self.engine = sqlalchemy.create_engine(task.connection_string)
+        task.chunk_size = 2  # change chunk size and check it runs ok
+        luigi.build([task, task0], local_scheduler=True)
+        self._check_entries(self.engine)
+
+    def test_reflect(self):
+        """
+        If the table is setup already, then one can set reflect to True, and
+        completely skip the columns part. It is not even required at that point.
+        :return:
+        """
+
+        class AnotherSQLATask(sqla.CopyToTable):
+            connection_string = CONNECTION_STRING
+            table = "item_property"
+            reflect = True
+
+            def requires(self):
+                return SQLATask()
+
+        task0, task1, task2 = AnotherSQLATask(), SQLATask(), BaseTask()
+        luigi.build([task0, task1, task2], local_scheduler=True)
+        self._check_entries(self.engine)
+
+    def test_create_marker_table(self):
+        """
+        Is the marker table created as expected for the SQLAlchemyTarget
+        :return:
+        """
+        target = sqla.SQLAlchemyTarget(CONNECTION_STRING, "test_table", "12312123")
+        target.create_marker_table()
+        self.assertTrue(target.engine.dialect.has_table(target.engine.connect(), target.marker_table))
+
+    def test_touch(self):
+        """
+        Touch takes care of creating a checkpoint for task completion
+        :return:
+        """
+        target = sqla.SQLAlchemyTarget(CONNECTION_STRING, "test_table", "12312123")
+        target.create_marker_table()
+        self.assertFalse(target.exists())
+        target.touch()
+        self.assertTrue(target.exists())
+
+    def test_row_overload(self):
+        """Overload the rows method and we should be able to insert data into database"""
+
+        class SQLARowOverloadTest(sqla.CopyToTable):
+            columns = [
+                (["item", sqlalchemy.String(64)], {}),
+                (["property", sqlalchemy.String(64)], {})
+            ]
+            connection_string = CONNECTION_STRING
+            table = "item_property"
+
+            def rows(self):
+                tasks = [("item0", "property0"), ("item1", "property1"), ("item2", "property2"), ("item3", "property3"),
+                         ("item4", "property4"), ("item5", "property5"), ("item6", "property6"), ("item7", "property7"),
+                         ("item8", "property8"), ("item9", "property9")]
+                for row in tasks:
+                    yield row
+
+        task = SQLARowOverloadTest()
+        luigi.build([task], local_scheduler=True)
+        self._check_entries(self.engine)
+
+    def test_column_row_separator(self):
+
+        class ModBaseTask(luigi.Task):
+
+            def output(self):
+                return MockFile("ModBaseTask", mirror_on_stderr=True)
+
+            def run(self):
+                out = self.output().open("w")
+                tasks = ["item%d,property%d\n" % (i, i) for i in range(10)]
+                for task in tasks:
+                    out.write(task)
+                out.close()
+
+        class ModSQLATask(sqla.CopyToTable):
+            columns = [
+                (["item", sqlalchemy.String(64)], {}),
+                (["property", sqlalchemy.String(64)], {})
+            ]
+            connection_string = CONNECTION_STRING
+            table = "item_property"
+            column_separator = ","
+
+            def requires(self):
+                return ModBaseTask()
+
+        task1, task2 = ModBaseTask(), ModSQLATask()
+        luigi.build([task1, task2], local_scheduler=True)
+        self._check_entries(self.engine)
diff --git a/test/contrib/target_test.py b/test/contrib/target_test.py
index 7fea07d613..7ccb4c1a88 100644
--- a/test/contrib/target_test.py
+++ b/test/contrib/target_test.py
@@ -13,14 +13,16 @@
 # the License.
 
 import unittest
-from luigi.contrib.target import CascadingClient
+
 import luigi.target
+from luigi.contrib.target import CascadingClient
 
 
 class CascadingClientTest(unittest.TestCase):
 
     def setUp(self):
         class FirstClient:
+
             def exists(self, pos_arg, kw_arg='first'):
                 if pos_arg < 10:
                     return pos_arg
@@ -32,6 +34,7 @@ def exists(self, pos_arg, kw_arg='first'):
                     raise Exception()
 
         class SecondClient:
+
             def exists(self, pos_arg, other_kw_arg='second',
                        kw_arg='for-backwards-compatibility'):
                 if pos_arg < 30:
diff --git a/test/copy_test.py b/test/copy_test.py
deleted file mode 100644
index 7a5e7f220e..0000000000
--- a/test/copy_test.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2012 Spotify AB
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not
-# use this file except in compliance with the License. You may obtain a copy of
-# the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations under
-# the License.
-
-import luigi
-from luigi.mock import MockFile
-import unittest
-import datetime
-from luigi.util import Copy
-
-# TODO: this is deprecated, replaced by luigi.util.copies decorator instead
-# See unit tests in decorator_test.py
-
-File = MockFile
-
-class A(luigi.Task):
-    date = luigi.DateParameter()
-
-    def output(self):
-        return File(self.date.strftime('/tmp/data-%Y-%m-%d.txt'))
-
-    def run(self):
-        f = self.output().open('w')
-        print >>f, 'hello, world'
-        f.close()
-
-class ACopy(Copy(A)):
-    def output(self):
-        return File(self.date.strftime('/tmp/copy-data-%Y-%m-%d.txt'))
-
-class UtilTest(unittest.TestCase):
-    def test_a(self):
-        luigi.build([ACopy(date=datetime.date(2012, 1, 1))], local_scheduler=True)
-        self.assertEqual(MockFile.fs.get_data('/tmp/data-2012-01-01.txt'), 'hello, world\n')
-
-if __name__ == '__main__':
-    luigi.run()
diff --git a/test/create_packages_archive_root/package/submodule_with_absolute_import.py b/test/create_packages_archive_root/package/submodule_with_absolute_import.py
index 4ed7ec1cfc..e336c60821 100644
--- a/test/create_packages_archive_root/package/submodule_with_absolute_import.py
+++ b/test/create_packages_archive_root/package/submodule_with_absolute_import.py
@@ -1,3 +1,4 @@
 # Copyright (c) 2014 Spotify AB
 from __future__ import absolute_import
+
 import os
diff --git a/test/customized_run_test.py b/test/customized_run_test.py
index b486ef7d41..d49881338b 100644
--- a/test/customized_run_test.py
+++ b/test/customized_run_test.py
@@ -12,14 +12,15 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
+import logging
+import time
+import unittest
+
 import luigi
-import luigi.scheduler
+import luigi.hadoop
 import luigi.rpc
+import luigi.scheduler
 import luigi.worker
-import luigi.hadoop
-import unittest
-import time
-import logging
 
 
 class DummyTask(luigi.Task):
@@ -38,6 +39,7 @@ def run(self):
 
 
 class CustomizedLocalScheduler(luigi.scheduler.CentralPlannerScheduler):
+
     def __init__(self, *args, **kwargs):
         super(CustomizedLocalScheduler, self).__init__(*args, **kwargs)
         self.has_run = False
@@ -52,6 +54,7 @@ def complete(self):
 
 
 class CustomizedRemoteScheduler(luigi.rpc.RemoteScheduler):
+
     def __init__(self, *args, **kwargs):
         super(CustomizedRemoteScheduler, self).__init__(*args, **kwargs)
         self.has_run = False
@@ -66,6 +69,7 @@ def complete(self):
 
 
 class CustomizedWorker(luigi.worker.Worker):
+
     def __init__(self, *args, **kwargs):
         super(CustomizedWorker, self).__init__(*args, **kwargs)
         self.has_run = False
@@ -79,6 +83,7 @@ def complete(self):
 
 
 class CustomizedWorkerSchedulerFactory(object):
+
     def __init__(self, *args, **kwargs):
         self.scheduler = CustomizedLocalScheduler()
         self.worker = CustomizedWorker(self.scheduler)
@@ -94,7 +99,9 @@ def create_worker(self, scheduler, worker_processes=None):
 
 
 class CustomizedWorkerTest(unittest.TestCase):
+
     ''' Test that luigi's build method (and ultimately the run method) can accept a customized worker and scheduler '''
+
     def setUp(self):
         self.worker_scheduler_factory = CustomizedWorkerSchedulerFactory()
         self.time = time.time
diff --git a/test/date_hour_test.py b/test/date_hour_test.py
index 9bd90c198d..86d2810dab 100644
--- a/test/date_hour_test.py
+++ b/test/date_hour_test.py
@@ -12,15 +12,19 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import luigi, luigi.interface
-import unittest
 import datetime
+import unittest
+
+import luigi
+import luigi.interface
+
 
 class DateHourTask(luigi.Task):
     dh = luigi.DateHourParameter()
 
 
 class DateHourTest(unittest.TestCase):
+
     def test_parse(self):
         dh = luigi.DateHourParameter().parse('2013-01-01T18')
         self.assertEqual(dh, datetime.datetime(2013, 1, 1, 18, 0, 0))
diff --git a/test/date_interval_test.py b/test/date_interval_test.py
index 24aaa09e94..553e6878e0 100644
--- a/test/date_interval_test.py
+++ b/test/date_interval_test.py
@@ -12,13 +12,15 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
+import datetime
+import unittest
+
 import luigi
 from luigi.parameter import DateIntervalParameter as DI
-import unittest
-import datetime
 
 
 class DateIntervalTest(unittest.TestCase):
+
     def test_date(self):
         di = DI().parse('2012-01-01')
         self.assertEqual(di.dates(), [datetime.date(2012, 1, 1)])
diff --git a/test/date_minute_test.py b/test/date_minute_test.py
new file mode 100644
index 0000000000..276146a889
--- /dev/null
+++ b/test/date_minute_test.py
@@ -0,0 +1,36 @@
+import datetime
+import unittest
+
+import luigi
+import luigi.interface
+
+
+class DateMinuteTask(luigi.Task):
+    dh = luigi.DateMinuteParameter()
+
+
+class DateMinuteTest(unittest.TestCase):
+
+    def test_parse(self):
+        dh = luigi.DateMinuteParameter().parse('2013-01-01T18H42')
+        self.assertEqual(dh, datetime.datetime(2013, 1, 1, 18, 42, 0))
+
+    def test_parse_padding_zero(self):
+        dh = luigi.DateMinuteParameter().parse('2013-01-01T18H07')
+        self.assertEqual(dh, datetime.datetime(2013, 1, 1, 18, 7, 0))
+
+    def test_serialize(self):
+        dh = luigi.DateMinuteParameter().serialize(datetime.datetime(2013, 1, 1, 18, 42, 0))
+        self.assertEqual(dh, '2013-01-01T18H42')
+
+    def test_serialize_padding_zero(self):
+        dh = luigi.DateMinuteParameter().serialize(datetime.datetime(2013, 1, 1, 18, 7, 0))
+        self.assertEqual(dh, '2013-01-01T18H07')
+
+    def test_parse_interface(self):
+        task = luigi.interface.ArgParseInterface().parse(["DateMinuteTask", "--dh", "2013-01-01T18H42"])[0]
+        self.assertEqual(task.dh, datetime.datetime(2013, 1, 1, 18, 42, 0))
+
+    def test_serialize_task(self):
+        t = DateMinuteTask(datetime.datetime(2013, 1, 1, 18, 42, 0))
+        self.assertEqual(str(t), 'DateMinuteTask(dh=2013-01-01T18H42)')
diff --git a/test/db_task_history_test.py b/test/db_task_history_test.py
index 61610589b7..ac513155e5 100644
--- a/test/db_task_history_test.py
+++ b/test/db_task_history_test.py
@@ -12,13 +12,12 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import helpers
 import unittest
 
+import helpers
 import luigi
-
-from luigi.task_status import PENDING, RUNNING, DONE
 from luigi.db_task_history import DbTaskHistory
+from luigi.task_status import DONE, PENDING, RUNNING
 
 
 class DummyTask(luigi.Task):
@@ -31,6 +30,7 @@ class ParamTask(luigi.Task):
 
 
 class DbTaskHistoryTest(unittest.TestCase):
+
     @helpers.with_config(dict(task_history=dict(db_connection='sqlite:///:memory:')))
     def setUp(self):
         self.history = DbTaskHistory()
diff --git a/test/decorator_test.py b/test/decorator_test.py
index 7feb18225a..ecb7a82a4b 100644
--- a/test/decorator_test.py
+++ b/test/decorator_test.py
@@ -1,39 +1,49 @@
+import datetime
+import pickle
 import unittest
+
 import luigi
 import luigi.notifications
-import datetime
-import pickle
+from luigi.interface import ArgParseInterface
+from luigi.mock import MockFile
 from luigi.parameter import MissingParameterException
+from luigi.util import common_params, copies, delegates, inherits, requires
+
 luigi.notifications.DEBUG = True
-from luigi.util import inherits, common_params, requires, copies, delegates
-from luigi.mock import MockFile
-from luigi.interface import ArgParseInterface
+
 
 class A(luigi.Task):
     param1 = luigi.Parameter("class A-specific default")
 
+
 @inherits(A)
 class B(luigi.Task):
     param2 = luigi.Parameter("class B-specific default")
 
+
 @inherits(B)
 class C(luigi.Task):
     param3 = luigi.Parameter("class C-specific default")
 
+
 @inherits(B)
 class D(luigi.Task):
     param1 = luigi.Parameter("class D overwriting class A's default")
 
+
 @inherits(B)
 class D_null(luigi.Task):
     param1 = None
 
+
 @inherits(A)
 @inherits(B)
 class E(luigi.Task):
     param4 = luigi.Parameter("class E-specific default")
 
+
 class InheritTest(unittest.TestCase):
+
     def setUp(self):
         self.a = A()
         self.a_changed = A(param1=34)
@@ -75,59 +85,75 @@ def test_removing_parameter(self):
     def test_wrapper_preserve_attributes(self):
         self.assertEqual(B.__name__, 'B')
 
+
 class F(luigi.Task):
     param1 = luigi.Parameter("A parameter on a base task, that will be required later.")
 
+
 @inherits(F)
 class G(luigi.Task):
     param2 = luigi.Parameter("A separate parameter that doesn't affect 'F'")
-    
+
     def requires(self):
         return F(**common_params(self, F))
 
+
 @inherits(G)
 class H(luigi.Task):
     param2 = luigi.Parameter("OVERWRITING")
+
     def requires(self):
         return G(**common_params(self, G))
 
+
 @inherits(G)
 class H_null(luigi.Task):
     param2 = None
+
     def requires(self):
         special_param2 = str(datetime.datetime.now())
         return G(param2=special_param2, **common_params(self, G))
 
+
 @inherits(G)
 class I(luigi.Task):
+
     def requires(self):
         return F(**common_params(self, F))
 
+
 class J(luigi.Task):
-    param1 = luigi.Parameter() # something required, with no default
+    param1 = luigi.Parameter()  # something required, with no default
+
 
 @inherits(J)
 class K_shouldnotinstantiate(luigi.Task):
     param2 = luigi.Parameter("A K-specific parameter")
 
+
 @inherits(J)
 class K_shouldfail(luigi.Task):
     param1 = None
     param2 = luigi.Parameter("A K-specific parameter")
+
     def requires(self):
         return J(**common_params(self, J))
 
+
 @inherits(J)
 class K_shouldsucceed(luigi.Task):
     param1 = None
     param2 = luigi.Parameter("A K-specific parameter")
+
     def requires(self):
         return J(param1="Required parameter", **common_params(self, J))
 
+
 @inherits(J)
 class K_wrongparamsorder(luigi.Task):
     param1 = None
     param2 = luigi.Parameter("A K-specific parameter")
+
     def requires(self):
         return J(param1="Required parameter", **common_params(J, self))
 
@@ -186,26 +212,34 @@ def test_wrong_common_params_order(self):
 class X(luigi.Task):
     n = luigi.IntParameter(default=42)
 
+
 @inherits(X)
 class Y(luigi.Task):
+
     def requires(self):
         return self.clone_parent()
 
+
 @requires(X)
 class Y2(luigi.Task):
     pass
 
+
 @inherits(X)
 class Z(luigi.Task):
     n = None
+
     def requires(self):
         return self.clone_parent()
 
+
 @requires(X)
 class Y3(luigi.Task):
     n = luigi.IntParameter(default=43)
 
+
 class CloneParentTest(unittest.TestCase):
+
     def test_clone_parent(self):
         y = Y()
         x = X()
@@ -249,10 +283,13 @@ def run(self):
 
 @copies(P)
 class PCopy(luigi.Task):
+
     def output(self):
         return MockFile(self.date.strftime('/tmp/copy-data-%Y-%m-%d.txt'))
 
+
 class CopyTest(unittest.TestCase):
+
     def test_copy(self):
         luigi.build([PCopy(date=datetime.date(2012, 1, 1))], local_scheduler=True)
         self.assertEqual(MockFile.fs.get_data('/tmp/data-2012-01-01.txt'), 'hello, world\n')
@@ -260,6 +297,7 @@ def test_copy(self):
 
 
 class PickleTest(unittest.TestCase):
+
     def test_pickle(self):
         # similar to CopyTest.test_copy
         p = PCopy(date=datetime.date(2013, 1, 1))
@@ -277,8 +315,10 @@ class Subtask(luigi.Task):
     def f(self, x):
         return x ** self.k
 
+
 @delegates
 class SubtaskDelegator(luigi.Task):
+
     def subtasks(self):
         return [Subtask(1), Subtask(2)]
 
@@ -289,6 +329,7 @@ def run(self):
 
 
 class SubtaskTest(unittest.TestCase):
+
     def test_subtasks(self):
         sd = SubtaskDelegator()
         luigi.build([sd], local_scheduler=True)
@@ -311,4 +352,3 @@ def test_cmdline(self):
 
 if __name__ == '__main__':
     unittest.main()
-
diff --git a/test/dynamic_import_test.py b/test/dynamic_import_test.py
index 915329cdfe..1fd4826dca 100644
--- a/test/dynamic_import_test.py
+++ b/test/dynamic_import_test.py
@@ -13,15 +13,20 @@
 # the License.
 
 import unittest
-import luigi, luigi.interface
+
+import luigi
+import luigi.interface
+
 
 class ExtraArgs(luigi.Task):
     blah = luigi.Parameter(is_global=True, default=444)
 
+
 class CmdlineTest(unittest.TestCase):
+
     def test_dynamic_loading(self):
         interface = luigi.interface.ArgParseInterface()
-        self.assertRaises(Exception, interface.parse, (['FooTask', '--blah', 'xyz', '--x', '123'],)) # should raise since it's not imported
+        self.assertRaises(SystemExit, interface.parse, (['FooTask', '--blah', 'xyz', '--x', '123'],))  # should raise since it's not imported
 
         interface = luigi.interface.DynamicArgParseInterface()
         tasks = interface.parse(['--module', 'foo_module', 'FooTask', '--blah', 'xyz', '--x', '123'])
@@ -40,4 +45,4 @@ def test_dynamic_loading(self):
     def test_run(self):
         # TODO: this needs to run after the existing module, since by now foo_module is already imported
 
-        luigi.run(['--local-scheduler', '--module', 'foo_module', 'FooTask', '--x', '100'], use_dynamic_argparse=True)
+        luigi.run(['--local-scheduler', '--no-lock', '--module', 'foo_module', 'FooTask', '--x', '100'], use_dynamic_argparse=True)
diff --git a/test/email_test.py b/test/email_test.py
index 3cda35763e..8a5637e2a2 100644
--- a/test/email_test.py
+++ b/test/email_test.py
@@ -13,8 +13,9 @@
 # the License.
 
 import unittest
-from luigi import notifications
+
 from helpers import with_config
+from luigi import notifications
 
 
 class TestEmail(unittest.TestCase):
diff --git a/test/factorial_test.py b/test/factorial_test.py
index 51eb7b1d96..d4452f3b05 100644
--- a/test/factorial_test.py
+++ b/test/factorial_test.py
@@ -12,10 +12,13 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import luigi
 import unittest
 
+import luigi
+
+
 class Factorial(luigi.Task):
+
     ''' This calculates factorials *online* and does not write its results anywhere
 
     Demonstrates the ability for dependencies between Tasks and not just between their output.
@@ -24,7 +27,7 @@ class Factorial(luigi.Task):
 
     def requires(self):
         if self.n > 1:
-            return Factorial(self.n-1)
+            return Factorial(self.n - 1)
 
     def run(self):
         if self.n > 1:
@@ -36,11 +39,12 @@ def run(self):
     def complete(self):
         return False
 
+
 class FactorialTest(unittest.TestCase):
+
     def test_invoke(self):
         luigi.build([Factorial(100)], local_scheduler=True)
         self.assertEqual(Factorial(42).value, 1405006117752879898543142606244511569936384000000000)
 
 if __name__ == '__main__':
     luigi.run()
-
diff --git a/test/fib_test.py b/test/fib_test.py
index f1fef3c8b7..4b884ca3f0 100644
--- a/test/fib_test.py
+++ b/test/fib_test.py
@@ -12,9 +12,10 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
+import unittest
+
 import luigi
 import luigi.interface
-import unittest
 from luigi.mock import MockFile
 
 File = MockFile
@@ -51,6 +52,7 @@ def run(self):
 
 
 class FibTestBase(unittest.TestCase):
+
     def setUp(self):
         global File
         File = MockFile
@@ -58,6 +60,7 @@ def setUp(self):
 
 
 class FibTest(FibTestBase):
+
     def test_invoke(self):
         w = luigi.worker.Worker()
         w.add(Fib(100))
@@ -67,7 +70,7 @@ def test_invoke(self):
         self.assertEqual(MockFile.fs.get_data('/tmp/fib_100'), '354224848179261915075\n')
 
     def test_cmdline(self):
-        luigi.run(['--local-scheduler', 'Fib', '--n', '100'])
+        luigi.run(['--local-scheduler', '--no-lock', 'Fib', '--n', '100'])
 
         self.assertEqual(MockFile.fs.get_data('/tmp/fib_10'), '55\n')
         self.assertEqual(MockFile.fs.get_data('/tmp/fib_100'), '354224848179261915075\n')
diff --git a/test/file_test.py b/test/file_test.py
index 174ff78cad..c1f527e2df 100644
--- a/test/file_test.py
+++ b/test/file_test.py
@@ -12,16 +12,17 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-from luigi import File
-from luigi.file import LocalFileSystem 
-import unittest
-import os
-import gzip
 import bz2
-import luigi.format
-import random
 import gc
+import gzip
+import os
+import random
 import shutil
+import unittest
+
+import luigi.format
+from luigi import File
+from luigi.file import LocalFileSystem
 
 
 class FileTest(unittest.TestCase):
@@ -117,7 +118,6 @@ def test_bzip2(self):
         self.assertTrue(test_data == f.read())
         f.close()
 
-
     def test_copy(self):
         t = File(self.path)
         f = t.open('w')
@@ -133,6 +133,7 @@ def test_copy(self):
 
     def test_format_injection(self):
         class CustomFormat(luigi.format.Format):
+
             def pipe_reader(self, input_pipe):
                 input_pipe.foo = "custom read property"
                 return input_pipe
@@ -173,6 +174,7 @@ class FileRelativeTest(FileTest):
 
 
 class TmpFileTest(unittest.TestCase):
+
     def test_tmp(self):
         t = File(is_tmp=True)
         self.assertFalse(t.exists())
diff --git a/test/foo_module.py b/test/foo_module.py
index b238b5a3fd..1b8db7352f 100644
--- a/test/foo_module.py
+++ b/test/foo_module.py
@@ -14,5 +14,6 @@
 
 import luigi
 
+
 class FooTask(luigi.Task):
     x = luigi.IntParameter()
diff --git a/test/hadoop_test.py b/test/hadoop_test.py
index d6569c38d5..dd63e9c463 100644
--- a/test/hadoop_test.py
+++ b/test/hadoop_test.py
@@ -12,25 +12,51 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import mock
 import os
+import StringIO
+import subprocess
 import sys
 import unittest
-import subprocess
+
 import luigi
 import luigi.hadoop
 import luigi.hdfs
 import luigi.mrrunner
-from luigi.mock import MockFile
-import StringIO
 import luigi.notifications
+import minicluster
+import mock
+from luigi.mock import MockFile
+from nose.plugins.attrib import attr
+
 luigi.notifications.DEBUG = True
 File = MockFile
 
+luigi.hadoop.attach(minicluster)
+
+
+class OutputMixin(luigi.Task):
+    use_hdfs = luigi.BoolParameter(default=False)
+
+    def get_output(self, fn):
+        if self.use_hdfs:
+            return luigi.hdfs.HdfsTarget('/tmp/' + fn, format=luigi.hdfs.PlainDir)
+        else:
+            return File(fn)
+
+
+class HadoopJobTask(luigi.hadoop.JobTask, OutputMixin):
+
+    def job_runner(self):
+        if self.use_hdfs:
+            return minicluster.MiniClusterHadoopJobRunner()
+        else:
+            return luigi.hadoop.LocalJobRunner()
+
+
+class Words(OutputMixin):
 
-class Words(luigi.Task):
     def output(self):
-        return File('words')
+        return self.get_output('words')
 
     def run(self):
         f = self.output().open('w')
@@ -39,12 +65,8 @@ def run(self):
         f.close()
 
 
-class TestJobTask(luigi.hadoop.JobTask):
-    def job_runner(self):
-        return luigi.hadoop.LocalJobRunner()
+class WordCountJob(HadoopJobTask):
 
-
-class WordCountJob(TestJobTask):
     def mapper(self, line):
         for word in line.strip().split():
             self.incr_counter('word', word, 1)
@@ -54,18 +76,14 @@ def reducer(self, word, occurences):
         yield word, sum(occurences)
 
     def requires(self):
-        return Words()
+        return Words(self.use_hdfs)
 
     def output(self):
-        return File("luigitest")
-
+        return self.get_output('wordcount')
 
-class WordCountJobReal(WordCountJob):
-    def job_runner(self):
-        return luigi.hadoop.HadoopJobRunner(streaming_jar='test.jar')
 
+class WordFreqJob(HadoopJobTask):
 
-class WordFreqJob(TestJobTask):
     def init_local(self):
         self.n = 0
         for line in self.input_local().open('r'):
@@ -83,27 +101,30 @@ def reducer(self, word, occurences):
         yield word, sum(occurences)
 
     def requires_local(self):
-        return WordCountJob()
+        return WordCountJob(self.use_hdfs)
 
     def requires_hadoop(self):
-        return Words()
+        return Words(self.use_hdfs)
 
     def output(self):
-        return File("luigitest-2")
+        return self.get_output('luigitest-2')
+
 
+class MapOnlyJob(HadoopJobTask):
 
-class MapOnlyJob(TestJobTask):
     def mapper(self, line):
         for word in line.strip().split():
             yield (word,)
 
     def requires_hadoop(self):
-        return Words()
+        return Words(self.use_hdfs)
 
     def output(self):
-        return File("luigitest-3")
+        return self.get_output('luigitest-3')
+
+
+class UnicodeJob(HadoopJobTask):
 
-class UnicodeJob(TestJobTask):
     def mapper(self, line):
         yield u'test', 1
         yield 'test', 1
@@ -112,157 +133,126 @@ def reducer(self, word, occurences):
         yield word, sum(occurences)
 
     def requires(self):
-        return Words()
+        return Words(self.use_hdfs)
 
     def output(self):
-        return File("luigitest-4")
+        return self.get_output('luigitest-4')
 
-class HadoopJobTest(unittest.TestCase):
-    def setUp(self):
-        MockFile.fs.clear()
 
-    def read_output(self, p):
-        count = {}
-        for line in p.open('r'):
-            k, v = line.strip().split()
-            count[k] = v
-        return count
+class FailingJobException(Exception):
+    pass
 
-    def test_run(self):
-        luigi.build([WordCountJob()], local_scheduler=True)
-        c = self.read_output(File('luigitest'))
-        self.assertEqual(int(c['jk']), 6)
 
-    def test_run_2(self):
-        luigi.build([WordFreqJob()], local_scheduler=True)
-        c = self.read_output(File('luigitest-2'))
-        self.assertAlmostEquals(float(c['jk']), 6.0 / 33.0)
+class FailingJob(HadoopJobTask):
 
-    def test_map_only(self):
-        luigi.build([MapOnlyJob()], local_scheduler=True)
+    def init_hadoop(self):
+        raise FailingJobException('failure')
+
+    def output(self):
+        return self.get_output('failing')
+
+
+def read_wordcount_output(p):
+    count = {}
+    for line in p.open('r'):
+        k, v = line.strip().split()
+        count[k] = v
+    return count
+
+
+class CommonTests(object):
+
+    @staticmethod
+    def test_run(test_case):
+        job = WordCountJob(use_hdfs=test_case.use_hdfs)
+        luigi.build([job], local_scheduler=True)
+        c = read_wordcount_output(job.output())
+        test_case.assertEqual(int(c['jk']), 6)
+
+    @staticmethod
+    def test_run_2(test_case):
+        job = WordFreqJob(use_hdfs=test_case.use_hdfs)
+        luigi.build([job], local_scheduler=True)
+        c = read_wordcount_output(job.output())
+        test_case.assertAlmostEquals(float(c['jk']), 6.0 / 33.0)
+
+    @staticmethod
+    def test_map_only(test_case):
+        job = MapOnlyJob(use_hdfs=test_case.use_hdfs)
+        luigi.build([job], local_scheduler=True)
         c = []
-        for line in File('luigitest-3').open('r'):
+        for line in job.output().open('r'):
             c.append(line.strip())
-        self.assertEqual(c[0], 'kj')
-        self.assertEqual(c[4], 'ljoi')
+        test_case.assertEqual(c[0], 'kj')
+        test_case.assertEqual(c[4], 'ljoi')
 
-    def test_unicode_job(self):
-        luigi.build([UnicodeJob()], local_scheduler=True)
+    @staticmethod
+    def test_unicode_job(test_case):
+        job = UnicodeJob(use_hdfs=test_case.use_hdfs)
+        luigi.build([job], local_scheduler=True)
         c = []
-        for line in File('luigitest-4').open('r'):
+        for line in job.output().open('r'):
             c.append(line)
         # Make sure unicode('test') isnt grouped with str('test')
         # Since this is what happens when running on cluster
-        self.assertEqual(len(c), 2)
-        self.assertEqual(c[0], "test\t2\n")
-        self.assertEqual(c[0], "test\t2\n")
-
-
-    def test_run_hadoop_job_failure(self):
-        def Popen_fake(arglist, stdout=None, stderr=None, env=None, close_fds=True):
-            class P(object):
-                def wait(self):
-                    pass
-
-                def poll(self):
-                    return 1
-
-
-            p = P()
-            p.returncode = 1
-            if stdout == subprocess.PIPE:
-                p.stdout = StringIO.StringIO('stdout')
-            else:
-                stdout.write('stdout')
-            if stderr == subprocess.PIPE:
-                p.stderr = StringIO.StringIO('stderr')
-            else:
-                stderr.write('stderr')
-            return p
-
-        p = subprocess.Popen
-        subprocess.Popen = Popen_fake
-        try:
-            luigi.hadoop.run_and_track_hadoop_job([])
-        except luigi.hadoop.HadoopJobError as e:
-            self.assertEqual(e.out, 'stdout')
-            self.assertEqual(e.err, 'stderr')
-        else:
-            self.fail("Should have thrown HadoopJobError")
-        finally:
-            subprocess.Popen = p
-
-
-    def test_run_real(self):
-        # Will attempt to run a real hadoop job, but we will secretly mock subprocess.Popen
-        arglist_result = []
-
-        def Popen_fake(arglist, stdout=None, stderr=None, env=None, close_fds=True):
-            arglist_result.append(arglist)
-
-            class P(object):
-                def wait(self):
-                    pass
-                def poll(self):
-                    return 0
-            p = P()
-            p.returncode = 0
-            p.stderr = StringIO.StringIO()
-            p.stdout = StringIO.StringIO()
-            return p
-
-        h, p = luigi.hdfs.HdfsTarget, subprocess.Popen
-        luigi.hdfs.HdfsTarget, subprocess.Popen = MockFile, Popen_fake
-        try:
-            MockFile.move = lambda *args, **kwargs: None
-            WordCountJobReal().run()
-            self.assertEqual(len(arglist_result), 1)
-            self.assertEqual(arglist_result[0][0:3], ['hadoop', 'jar', 'test.jar'])
-        finally:
-            luigi.hdfs.HdfsTarget, subprocess.Popen = h, p  # restore
+        test_case.assertEqual(len(c), 2)
+        test_case.assertEqual(c[0], "test\t2\n")
+        test_case.assertEqual(c[0], "test\t2\n")
 
+    @staticmethod
+    def test_failing_job(test_case):
+        job = FailingJob(use_hdfs=test_case.use_hdfs)
 
-class FailingJobException(Exception):
-    pass
+        success = luigi.build([job], local_scheduler=True)
+        test_case.assertFalse(success)
 
 
-class FailingJob(TestJobTask):
-    def init_hadoop(self):
-        raise FailingJobException('failure')
+class MapreduceLocalTest(unittest.TestCase):
+    use_hdfs = False
+
+    def test_run(self):
+        CommonTests.test_run(self)
+
+    def test_run_2(self):
+        CommonTests.test_run_2(self)
+
+    def test_map_only(self):
+        CommonTests.test_map_only(self)
+
+    def test_unicode_job(self):
+        CommonTests.test_unicode_job(self)
+
+    def test_failing_job(self):
+        CommonTests.test_failing_job(self)
+
+    def setUp(self):
+        MockFile.fs.clear()
+
 
+@attr('minicluster')
+class MapreduceIntegrationTest(minicluster.MiniClusterTestCase):
+
+    """ Uses the Minicluster functionality to test this against Hadoop """
+    use_hdfs = True
+
+    def test_run(self):
+        CommonTests.test_run(self)
+
+    def test_run_2(self):
+        CommonTests.test_run_2(self)
+
+    def test_map_only(self):
+        CommonTests.test_map_only(self)
+
+    # TODO(erikbern): some really annoying issue with minicluster causes
+    # test_unicode_job to hang
+
+    def test_failing_job(self):
+        CommonTests.test_failing_job(self)
 
-class MrrunnerTest(unittest.TestCase):
-    def test_mrrunner(self):
-        # TODO: we're doing a lot of stuff here that depends on the internals of how
-        # we run Hadoop streaming job (in particular the create_packages_archive).
-        # We should abstract these things out into helper methods in luigi.hadoop so
-        # that we don't have to recreate all steps
-        job = WordCountJob()
-        packages = [__import__(job.__module__, None, None, 'dummy')]
-        luigi.hadoop.create_packages_archive(packages, 'packages.tar')
-        job._dump()
-        input = StringIO.StringIO('xyz fdklslkjsdf kjfdk jfdkj kdjf kjdkfj dkjf fdj j j k k l l')
-        output = StringIO.StringIO()
-        luigi.mrrunner.main(args=['mrrunner.py', 'map'], stdin=input, stdout=output)
-
-    def test_mrrunner_failure(self):
-        job = FailingJob()
-        packages = [__import__(job.__module__, None, None, 'dummy')]
-        luigi.hadoop.create_packages_archive(packages, 'packages.tar')
-        job._dump()
-        excs = []
-        def print_exception(traceback):
-            excs.append(traceback)
-
-        def run():
-            input = StringIO.StringIO()
-            output = StringIO.StringIO()
-            luigi.mrrunner.main(args=['mrrunner.py', 'map'], stdin=input, stdout=output, print_exception=print_exception)
-        self.assertRaises(FailingJobException, run)        
-        self.assertEqual(len(excs), 1) # should have been set
-        self.assertTrue(type(excs[0]), FailingJobException)
 
 class CreatePackagesArchive(unittest.TestCase):
+
     def setUp(self):
         sys.path.append(os.path.join('test', 'create_packages_archive_root'))
 
@@ -331,5 +321,6 @@ def test_create_packages_archive_package_subpackage_submodule(self, tar):
         luigi.hadoop.create_packages_archive([package_subpackage_submodule], '/dev/null')
         self._assert_package_subpackage(tar.return_value.add)
 
+
 if __name__ == '__main__':
     HadoopJobTest.test_run_real()
diff --git a/test/hdfs_test.py b/test/hdfs_test.py
index b709962530..93dce3b181 100644
--- a/test/hdfs_test.py
+++ b/test/hdfs_test.py
@@ -12,63 +12,63 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import os
-from calendar import timegm
+import functools
+import re
 from datetime import datetime
-import getpass
+
+import helpers
 import luigi
-from luigi import hdfs
 import mock
-import re
-import functools
+from luigi import hdfs
+from minicluster import MiniClusterTestCase
 from nose.plugins.attrib import attr
-from snakebite.minicluster import MiniCluster
 
 try:
     import unittest2 as unittest
 except ImportError:
     import unittest
 
+
 class TestException(Exception):
     pass
 
 
 @attr('minicluster')
-class HdfsTestCase(unittest.TestCase):
-    cluster = None
-
-    @classmethod
-    def setupClass(cls):
-        if not cls.cluster:
-            cls.cluster = MiniCluster(None, nnport=50030)
-        cls.cluster.mkdir("/tmp")
-
-    @classmethod
-    def tearDownClass(cls):
-        if cls.cluster:
-            cls.cluster.terminate()
-
-    def setUp(self):
-        self.fs = hdfs.client
-        cfg_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testconfig")
-        hadoop_bin = os.path.join(os.environ['HADOOP_HOME'], 'bin/hadoop')
-        hdfs.load_hadoop_cmd = lambda: [hadoop_bin, '--config', cfg_path]
-
-    def tearDown(self):
-        if self.fs.exists(self._test_dir()):
-            self.fs.remove(self._test_dir(), skip_trash=True)
-
-    @staticmethod
-    def _test_dir():
-        return '/tmp/luigi_tmp_testdir_%s' % getpass.getuser()
-
-    @staticmethod
-    def _test_file(suffix=""):
-        return '%s/luigi_tmp_testfile%s' % (HdfsTestCase._test_dir(), suffix)
+class ConfigurationTest(MiniClusterTestCase):
+
+    def tezt_rename_dont_move(self, client):
+        """ I happen to just want to test this, Since I know the codepaths will
+        be quite different for the three kinds of clients """
+        if client.exists('d'):
+            client.remove('d')
+        client.mkdir('d/a')
+        client.mkdir('d/b')
+        self.assertEqual(2, len(list(client.listdir('d'))))
+        target = hdfs.HdfsTarget('d/a', fs=client)
+        self.assertFalse(target.move_dir('d/b'))
+        self.assertEqual(2, len(list(client.listdir('d'))))
+        self.assertTrue(target.move_dir('d/c'))
+        self.assertEqual(2, len(list(client.listdir('d'))))
+
+    @helpers.with_config({"hdfs": {"client": "hadoopcli"}})
+    def test_hadoopcli(self):
+        client = hdfs.get_autoconfig_client()
+        self.tezt_rename_dont_move(client)
+
+    @helpers.with_config({"hdfs": {"client": "snakebite"}})
+    def test_snakebite(self):
+        client = hdfs.get_autoconfig_client()
+        self.tezt_rename_dont_move(client)
+
+    @helpers.with_config({"hdfs": {"client": "snakebite_with_hadoopcli_fallback"}})
+    def test_snakebite_with_hadoopcli_fallback(self):
+        client = hdfs.get_autoconfig_client()
+        self.tezt_rename_dont_move(client)
 
 
 @attr('minicluster')
-class ErrorHandling(HdfsTestCase):
+class ErrorHandling(MiniClusterTestCase):
+
     def test_connection_refused(self):
         """ The point of this test is to see if file existence checks
         can distinguish file non-existence from errors
@@ -95,7 +95,7 @@ def test_mkdir_exists(self):
 
 
 @attr('minicluster')
-class AtomicHdfsOutputPipeTests(HdfsTestCase):
+class AtomicHdfsOutputPipeTests(MiniClusterTestCase):
 
     def test_atomicity(self):
         testpath = self._test_dir()
@@ -139,7 +139,8 @@ def foo():
 
 
 @attr('minicluster')
-class HdfsAtomicWriteDirPipeTests(HdfsTestCase):
+class HdfsAtomicWriteDirPipeTests(MiniClusterTestCase):
+
     def setUp(self):
         super(HdfsAtomicWriteDirPipeTests, self).setUp()
         self.path = self._test_file()
@@ -207,12 +208,12 @@ def dummy():
 
 
 @attr('minicluster')
-class PlainFormatTest(_HdfsFormatTest, HdfsTestCase):
+class PlainFormatTest(_HdfsFormatTest, MiniClusterTestCase):
     format = hdfs.Plain
 
 
 @attr('minicluster')
-class PlainDirFormatTest(_HdfsFormatTest, HdfsTestCase):
+class PlainDirFormatTest(_HdfsFormatTest, MiniClusterTestCase):
     format = hdfs.PlainDir
 
     def test_multifile(self):
@@ -229,13 +230,12 @@ def test_multifile(self):
         self.assertTrue(invisible.exists())
         self.assertTrue(self.target.exists())
         with self.target.open('r') as fobj:
-            parts = fobj.read().strip('\n').split('\n')
-            parts.sort()
+            parts = sorted(fobj.read().strip('\n').split('\n'))
         self.assertEqual(tuple(parts), ('bar', 'foo'))
 
 
 @attr('minicluster')
-class HdfsTargetTests(HdfsTestCase):
+class HdfsTargetTests(MiniClusterTestCase):
 
     def test_slow_exists(self):
         target = hdfs.HdfsTarget(self._test_file())
@@ -406,7 +406,7 @@ def assertRegexpMatches(self, text, expected_regexp, msg=None):
             raise self.failureException(msg)
 
     def test_tmppath_not_configured(self):
-        #Given: several target paths to test
+        # Given: several target paths to test
         path1 = "/dir1/dir2/file"
         path2 = "hdfs:///dir1/dir2/file"
         path3 = "hdfs://somehost/dir1/dir2/file"
@@ -417,7 +417,7 @@ def test_tmppath_not_configured(self):
         path8 = None
         path9 = "/tmpdir/file"
 
-        #When: I create a temporary path for targets
+        # When: I create a temporary path for targets
         res1 = hdfs.tmppath(path1, include_unix_username=False)
         res2 = hdfs.tmppath(path2, include_unix_username=False)
         res3 = hdfs.tmppath(path3, include_unix_username=False)
@@ -428,28 +428,27 @@ def test_tmppath_not_configured(self):
         res8 = hdfs.tmppath(path8, include_unix_username=False)
         res9 = hdfs.tmppath(path9, include_unix_username=False)
 
-        #Then: I should get correct results relative to Luigi temporary directory
-        self.assertRegexpMatches(res1,"^/tmp/dir1/dir2/file-luigitemp-\d+")
-        #it would be better to see hdfs:///path instead of hdfs:/path, but single slash also works well
+        # Then: I should get correct results relative to Luigi temporary directory
+        self.assertRegexpMatches(res1, "^/tmp/dir1/dir2/file-luigitemp-\d+")
+        # it would be better to see hdfs:///path instead of hdfs:/path, but single slash also works well
         self.assertRegexpMatches(res2, "^hdfs:/tmp/dir1/dir2/file-luigitemp-\d+")
         self.assertRegexpMatches(res3, "^hdfs://somehost/tmp/dir1/dir2/file-luigitemp-\d+")
         self.assertRegexpMatches(res4, "^file:///tmp/dir1/dir2/file-luigitemp-\d+")
         self.assertRegexpMatches(res5, "^/tmp/dir/file-luigitemp-\d+")
-        #known issue with duplicated "tmp" if schema is present
+        # known issue with duplicated "tmp" if schema is present
         self.assertRegexpMatches(res6, "^file:///tmp/tmp/dir/file-luigitemp-\d+")
-        #known issue with duplicated "tmp" if schema is present
+        # known issue with duplicated "tmp" if schema is present
         self.assertRegexpMatches(res7, "^hdfs://somehost/tmp/tmp/dir/file-luigitemp-\d+")
         self.assertRegexpMatches(res8, "^/tmp/luigitemp-\d+")
-        self.assertRegexpMatches(res9,  "/tmp/tmpdir/file")
+        self.assertRegexpMatches(res9, "/tmp/tmpdir/file")
 
     def test_tmppath_username(self):
         self.assertRegexpMatches(hdfs.tmppath('/path/to/stuff', include_unix_username=True),
                                  "^/tmp/[a-z0-9_]+/path/to/stuff-luigitemp-\d+")
 
 
-TIMESTAMP_DELAY = 60 # Big enough for `hadoop fs`?
 @attr('minicluster')
-class _HdfsClientTest(HdfsTestCase):
+class HdfsClientTest(MiniClusterTestCase):
 
     def create_file(self, target):
         fobj = target.open("w")
@@ -668,8 +667,6 @@ def test_listdir_base_list_get_times(self):
         self.assertEqual(4, len(entries), msg="%r" % entries)
         self.assertEqual(2, len(entries[0]), msg="%r" % entries)
         self.assertEqual(path + '/file1.dat', entries[0][0], msg="%r" % entries)
-        self.assertTrue(timegm(datetime.now().timetuple()) -
-                        timegm(entries[0][1].timetuple()) < TIMESTAMP_DELAY) 
 
     def test_listdir_full_list_get_everything(self):
         """Verify we get all the values, even if we can't fully check them."""
@@ -684,8 +681,6 @@ def test_listdir_full_list_get_everything(self):
         self.assertEqual(path + '/file1.dat', entries[0][0], msg="%r" % entries)
         self.assertEqual(0, entries[0][1], msg="%r" % entries)
         self.assertTrue(re.match(r'[-f]', entries[0][2]), msg="%r" % entries)
-        self.assertTrue(timegm(datetime.now().timetuple()) -
-                        timegm(entries[0][3].timetuple()) < TIMESTAMP_DELAY)
         self.assertEqual(4, len(entries[1]), msg="%r" % entries)
         self.assertEqual(path + '/file2.dat', entries[1][0], msg="%r" % entries)
         self.assertEqual(4, len(entries[2]), msg="%r" % entries)
@@ -701,10 +696,10 @@ def test_listdir_full_list_get_everything(self):
     def test_cdh3_client(self, call_check):
         cdh3_client = luigi.hdfs.HdfsClientCdh3()
         cdh3_client.remove("/some/path/here")
-        call_check.assert_called_once_with(['hadoop', 'fs', '-rmr', '/some/path/here'])
+        self.assertEqual(['fs', '-rmr', '/some/path/here'], call_check.call_args[0][0][-3:])
 
         cdh3_client.remove("/some/path/here", recursive=False)
-        self.assertEqual(mock.call(['hadoop', 'fs', '-rm', '/some/path/here']), call_check.call_args_list[-1])
+        self.assertEqual(['fs', '-rm', '/some/path/here'], call_check.call_args[0][0][-3:])
 
     @mock.patch('subprocess.Popen')
     def test_apache1_client(self, popen):
@@ -726,14 +721,3 @@ def test_apache1_client(self, popen):
 
         preturn.returncode = 13
         self.assertRaises(luigi.hdfs.HDFSCliError, apache_client.exists, "/some/path/somewhere")
-
-if __name__ == "__main__":
-    unittest.main()
-    # Uncomment to run a single test
-    # unittest.TextTestRunner(failfast=True, verbosity=2).run(suite())
-
-# def suite():
-#     suite = unittest.TestSuite()
-#     suite.addTest(unittest.makeSuite(HdfsTargetTests, prefix='test_tmppath'))
-#     return suite
-
diff --git a/test/helpers.py b/test/helpers.py
index 706863de6b..841f20e5b6 100644
--- a/test/helpers.py
+++ b/test/helpers.py
@@ -2,32 +2,33 @@
 
 
 class with_config(object):
-  """Decorator to override config settings for the length of a function. Example:
 
-    >>> @with_config({'foo': {'bar': 'baz'}})
-    >>> def test():
-    >>>  print luigi.configuration.get_config.get("foo", "bar")
-    >>> test()
-    baz
-  """
+    """Decorator to override config settings for the length of a function. Example:
 
-  def __init__(self, config):
-    self.config = config
+      >>> @with_config({'foo': {'bar': 'baz'}})
+      >>> def test():
+      >>>  print luigi.configuration.get_config.get("foo", "bar")
+      >>> test()
+      baz
+    """
 
-  def __call__(self, fun):
-    @functools.wraps(fun)
-    def wrapper(*args, **kwargs):
-      import luigi.configuration
-      orig_conf = luigi.configuration.get_config()
-      luigi.configuration.LuigiConfigParser._instance = None
-      conf = luigi.configuration.get_config()
-      for (section, settings) in self.config.iteritems():
-        if not conf.has_section(section):
-          conf.add_section(section)
-        for (name, value) in settings.iteritems():
-          conf.set(section, name, value)
-      try:
-        return fun(*args, **kwargs)
-      finally:
-        luigi.configuration.LuigiConfigParser._instance = orig_conf
-    return wrapper
+    def __init__(self, config):
+        self.config = config
+
+    def __call__(self, fun):
+        @functools.wraps(fun)
+        def wrapper(*args, **kwargs):
+            import luigi.configuration
+            orig_conf = luigi.configuration.get_config()
+            luigi.configuration.LuigiConfigParser._instance = None
+            conf = luigi.configuration.get_config()
+            for (section, settings) in self.config.iteritems():
+                if not conf.has_section(section):
+                    conf.add_section(section)
+                for (name, value) in settings.iteritems():
+                    conf.set(section, name, value)
+            try:
+                return fun(*args, **kwargs)
+            finally:
+                luigi.configuration.LuigiConfigParser._instance = orig_conf
+        return wrapper
diff --git a/test/hive_test.py b/test/hive_test.py
index e38506494a..44b5c939fc 100644
--- a/test/hive_test.py
+++ b/test/hive_test.py
@@ -2,13 +2,13 @@
     from collections import OrderedDict
 except ImportError:
     from ordereddict import OrderedDict
-import mock
 import os
 import sys
 import tempfile
 import unittest
 
 import luigi.hive
+import mock
 from luigi import LocalTarget
 
 
@@ -31,7 +31,7 @@ def test_run_hive_command(self):
         pre_count = self.count
         res = luigi.hive.run_hive_cmd("foo")
         self.assertEqual(["-e", "foo"], self.last_hive_cmd)
-        self.assertEqual("statement{0}".format(pre_count+1), res)
+        self.assertEqual("statement{0}".format(pre_count + 1), res)
 
     def test_run_hive_script_not_exists(self):
         def test():
@@ -43,12 +43,13 @@ def test_run_hive_script_exists(self):
             pre_count = self.count
             res = luigi.hive.run_hive_script(f.name)
             self.assertEqual(["-f", f.name], self.last_hive_cmd)
-            self.assertEqual("statement{0}".format(pre_count+1), res)
+            self.assertEqual("statement{0}".format(pre_count + 1), res)
 
     def test_create_parent_dirs(self):
         dirname = "/tmp/hive_task_test_dir"
 
         class FooHiveTask(object):
+
             def output(self):
                 return LocalTarget(os.path.join(dirname, "foo"))
 
@@ -56,7 +57,9 @@ def output(self):
         runner.prepare_outputs(FooHiveTask())
         self.assertTrue(os.path.exists(dirname))
 
+
 class HiveCommandClientTest(unittest.TestCase):
+
     """Note that some of these tests are really for the CDH releases of Hive, to which I do not currently have access.
     Hopefully there are no significant differences in the expected output"""
 
@@ -91,11 +94,11 @@ def test_table_exists(self, run_command):
                                    "day=2013-07-07/hour=2\n"
         self.client.partition_spec = mock.Mock(name="partition_spec")
         self.client.partition_spec.return_value = "somepart"
-        returned = self.client.table_exists("mytable", partition={'a':'b'})
+        returned = self.client.table_exists("mytable", partition={'a': 'b'})
         self.assertTrue(returned)
 
         run_command.return_value = ""
-        returned = self.client.table_exists("mytable", partition={'a':'b'})
+        returned = self.client.table_exists("mytable", partition={'a': 'b'})
         self.assertFalse(returned)
 
     @mock.patch("luigi.hive.run_hive_cmd")
@@ -151,11 +154,11 @@ def test_apacheclient_table_exists(self, run_command):
                                    "day=2013-07-07/hour=2\n"
         self.apacheclient.partition_spec = mock.Mock(name="partition_spec")
         self.apacheclient.partition_spec.return_value = "somepart"
-        returned = self.apacheclient.table_exists("mytable", partition={'a':'b'})
+        returned = self.apacheclient.table_exists("mytable", partition={'a': 'b'})
         self.assertTrue(returned)
 
         run_command.return_value = ""
-        returned = self.apacheclient.table_exists("mytable", partition={'a':'b'})
+        returned = self.apacheclient.table_exists("mytable", partition={'a': 'b'})
         self.assertFalse(returned)
 
     @mock.patch("luigi.hive.run_hive_cmd")
@@ -199,7 +202,6 @@ def test_metastoreclient_partition_existence_regardless_of_order(self, thrift_co
         thrift_context.__enter__ = client_mock
         client_mock.get_partition_names = mock.Mock(return_value=["p1=x/p2=y", "p1=a/p2=b"])
 
-
         partition_spec = OrderedDict([("p1", "a"), ("p2", "b")])
         self.assertTrue(self.metastoreclient.table_exists("table", "default", partition_spec))
 
diff --git a/test/instance_test.py b/test/instance_test.py
index 2bac49d366..24a1a8a05c 100644
--- a/test/instance_test.py
+++ b/test/instance_test.py
@@ -12,14 +12,17 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
+import unittest
+
 import luigi
 import luigi.date_interval
-import unittest
 import luigi.notifications
+
 luigi.notifications.DEBUG = True
 
 
 class InstanceTest(unittest.TestCase):
+
     def test_simple(self):
         class DummyTask(luigi.Task):
             x = luigi.Parameter()
@@ -35,6 +38,7 @@ def test_dep(self):
         test = self
 
         class A(luigi.Task):
+
             def __init__(self):
                 self.has_run = False
                 super(A, self).__init__()
diff --git a/test/instance_wrap_test.py b/test/instance_wrap_test.py
index a43570c1fd..8c7f8c2236 100644
--- a/test/instance_wrap_test.py
+++ b/test/instance_wrap_test.py
@@ -12,12 +12,14 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import luigi
-from luigi.mock import MockFile
-import unittest
-import decimal
 import datetime
+import decimal
+import unittest
+
+import luigi
 import luigi.notifications
+from luigi.mock import MockFile
+
 luigi.notifications.DEBUG = True
 File = MockFile
 
@@ -72,6 +74,7 @@ def complete(self):
 
 
 class InstanceWrapperTest(unittest.TestCase):
+
     ''' This test illustrates that tasks can have tasks as parameters
 
     This is a more complicated variant of factorial_test.py which is an example of
@@ -84,6 +87,7 @@ class InstanceWrapperTest(unittest.TestCase):
     pretty handy to be able to do that. I'm adding it as a unit test to make sure that
     new code doesn't break the expected behavior.
     '''
+
     def test(self):
         d = datetime.date(2012, 1, 1)
         r = ReportReader(d)
diff --git a/test/interface_test.py b/test/interface_test.py
index dc6060b069..60acb3d8c3 100644
--- a/test/interface_test.py
+++ b/test/interface_test.py
@@ -1,15 +1,17 @@
-from mock import Mock
+import unittest
+
 import luigi
 import luigi.date_interval
-import unittest
-from luigi.interface import Interface, WorkerSchedulerFactory, EnvironmentParamsContainer
 import luigi.notifications
+from luigi.interface import EnvironmentParamsContainer, Interface, WorkerSchedulerFactory
 from luigi.worker import Worker
+from mock import Mock
 
 luigi.notifications.DEBUG = True
 
 
 class InterfaceTest(unittest.TestCase):
+
     def setUp(self):
         self.worker = Worker()
         self.worker.stop = Mock()
@@ -18,8 +20,6 @@ def setUp(self):
         self.worker_scheduler_factory.create_worker = Mock(return_value=self.worker)
         self.worker_scheduler_factory.create_local_scheduler = Mock()
 
-        EnvironmentParamsContainer.no_lock = Mock(return_value=True)
-
         class NoOpTask(luigi.Task):
             param = luigi.Parameter()
 
@@ -45,8 +45,8 @@ def test_interface_run_with_run_failure(self):
         self.assertFalse(self._run_interface())
 
     def _run_interface(self):
-        return Interface.run([self.task_a, self.task_b], self.worker_scheduler_factory)
+        return Interface.run([self.task_a, self.task_b], self.worker_scheduler_factory, {'no_lock': True})
 
 
 if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file
+    unittest.main()
diff --git a/test/lock_test.py b/test/lock_test.py
index e7a74494fe..2d4ff2efd1 100644
--- a/test/lock_test.py
+++ b/test/lock_test.py
@@ -12,19 +12,21 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
+import hashlib
+import os
+import subprocess
+import tempfile
 import unittest
+
 import luigi
 import luigi.lock
 import luigi.notifications
-import tempfile
-import os
-import hashlib
-import subprocess
 
 luigi.notifications.DEBUG = True
 
 
 class TestCmd(unittest.TestCase):
+
     def test_getpcmd(self):
         p = subprocess.Popen(["sleep", "1"])
         self.assertTrue(
@@ -62,7 +64,7 @@ def test_acquiring_partially_taken_lock(self):
         self.assertTrue(acquired)
 
         s = os.stat(self.pid_file)
-        self.assertEqual(s.st_mode & 0777, 0777)
+        self.assertEqual(s.st_mode & 0o777, 0o777)
 
     def test_acquiring_lock_from_missing_process(self):
         fake_pid = 99999
@@ -73,4 +75,4 @@ def test_acquiring_lock_from_missing_process(self):
         self.assertTrue(acquired)
 
         s = os.stat(self.pid_file)
-        self.assertEqual(s.st_mode & 0777, 0777)
+        self.assertEqual(s.st_mode & 0o777, 0o777)
diff --git a/test/minicluster.py b/test/minicluster.py
new file mode 100644
index 0000000000..29961a9b20
--- /dev/null
+++ b/test/minicluster.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2015 Spotify AB
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+import getpass
+import os
+import unittest
+
+from snakebite.minicluster import MiniCluster
+
+from luigi import hadoop, hdfs
+from nose.plugins.attrib import attr
+
+
+@attr('minicluster')
+class MiniClusterTestCase(unittest.TestCase):
+
+    """ Base class for test cases that rely on Hadoop's minicluster functionality. This
+    in turn depends on Snakebite's minicluster setup:
+
+    http://hadoop.apache.org/docs/r2.5.1/hadoop-project-dist/hadoop-common/CLIMiniCluster.html
+    https://github.com/spotify/snakebite"""
+    cluster = None
+
+    @classmethod
+    def setupClass(cls):
+        if not cls.cluster:
+            cls.cluster = MiniCluster(None, nnport=50030)
+        cls.cluster.mkdir("/tmp")
+
+    @classmethod
+    def tearDownClass(cls):
+        if cls.cluster:
+            cls.cluster.terminate()
+
+    def setUp(self):
+        self.fs = hdfs.client
+        cfg_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testconfig")
+        hadoop_bin = os.path.join(os.environ['HADOOP_HOME'], 'bin/hadoop')
+        hdfs.load_hadoop_cmd = lambda: [hadoop_bin, '--config', cfg_path]
+
+    def tearDown(self):
+        if self.fs.exists(self._test_dir()):
+            self.fs.remove(self._test_dir(), skip_trash=True)
+
+    @staticmethod
+    def _test_dir():
+        return '/tmp/luigi_tmp_testdir_%s' % getpass.getuser()
+
+    @staticmethod
+    def _test_file(suffix=""):
+        return '%s/luigi_tmp_testfile%s' % (MiniClusterTestCase._test_dir(), suffix)
+
+
+class MiniClusterHadoopJobRunner(hadoop.HadoopJobRunner):
+
+    ''' The default job runner just reads from config and sets stuff '''
+
+    def __init__(self):
+        # Locate the hadoop streaming jar in the hadoop directory
+        hadoop_tools_lib = os.path.join(os.environ['HADOOP_HOME'], 'share/hadoop/tools/lib')
+
+        for path in os.listdir(hadoop_tools_lib):
+            if path.startswith('hadoop-streaming') and path.endswith('.jar'):
+                streaming_jar = os.path.join(hadoop_tools_lib, path)
+                break
+        else:
+            raise Exception('Could not locate streaming jar in ' + hadoop_tools_lib)
+
+        super(MiniClusterHadoopJobRunner, self).__init__(streaming_jar=streaming_jar)
diff --git a/test/mock_test.py b/test/mock_test.py
index 7873f34f81..758a26d9b2 100644
--- a/test/mock_test.py
+++ b/test/mock_test.py
@@ -12,11 +12,13 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-from luigi.mock import MockFile, MockFileSystem
 import unittest
 
+from luigi.mock import MockFile, MockFileSystem
+
 
 class MockFileTest(unittest.TestCase):
+
     def test_1(self):
         t = MockFile('test')
         p = t.open('w')
diff --git a/test/most_common_test.py b/test/most_common_test.py
new file mode 100644
index 0000000000..ccfe589db5
--- /dev/null
+++ b/test/most_common_test.py
@@ -0,0 +1,20 @@
+import unittest
+
+from luigi.tools.range import most_common
+
+
+class MostCommonTest(unittest.TestCase):
+
+    def setUp(self):
+        self.runs = [
+            ([1], (1, 1)),
+            ([1, 1], (1, 2)),
+            ([1, 1, 2], (1, 2)),
+            ([1, 1, 2, 2, 2], (2, 3))
+        ]
+
+    def test_runs(self):
+        for args, result in self.runs:
+            actual = most_common(args)
+            expected = result
+            self.assertEqual(expected, actual)
diff --git a/test/namespace_test.py b/test/namespace_test.py
index daca5d54ff..460b571755 100644
--- a/test/namespace_test.py
+++ b/test/namespace_test.py
@@ -12,22 +12,22 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import luigi
 import unittest
 
+import luigi
+import namespace_test_helper  # declares another Foo in namespace mynamespace
+
 
 class Foo(luigi.Task):
     pass
 
 
-import namespace_test_helper  # declares another Foo in namespace mynamespace
-
-
 class FooSubclass(Foo):
     pass
 
 
 class TestNamespacing(unittest.TestCase):
+
     def test_vanilla(self):
         self.assertEqual(Foo.task_namespace, None)
         self.assertEqual(Foo.task_family, "Foo")
diff --git a/test/optparse_test.py b/test/optparse_test.py
index 20d30bfc7b..8a46efd5ac 100644
--- a/test/optparse_test.py
+++ b/test/optparse_test.py
@@ -13,13 +13,14 @@
 # the License.
 
 import luigi
-from luigi.mock import MockFile
 from fib_test import FibTestBase
+from luigi.mock import MockFile
 
 
 class OptParseTest(FibTestBase):
+
     def test_cmdline_optparse(self):
-        luigi.run(['--local-scheduler', '--task', 'Fib', '--n', '100'], use_optparse=True)
+        luigi.run(['--local-scheduler', '--no-lock', '--task', 'Fib', '--n', '100'], use_optparse=True)
 
         self.assertEqual(MockFile.fs.get_data('/tmp/fib_10'), '55\n')
         self.assertEqual(MockFile.fs.get_data('/tmp/fib_100'), '354224848179261915075\n')
@@ -29,7 +30,7 @@ def test_cmdline_optparse_existing(self):
         parser = optparse.OptionParser()
         parser.add_option('--blaha')
 
-        luigi.run(['--local-scheduler', '--task', 'Fib', '--n', '100'], use_optparse=True, existing_optparse=parser)
+        luigi.run(['--local-scheduler', '--no-lock', '--task', 'Fib', '--n', '100'], use_optparse=True, existing_optparse=parser)
 
         self.assertEqual(MockFile.fs.get_data('/tmp/fib_10'), '55\n')
         self.assertEqual(MockFile.fs.get_data('/tmp/fib_100'), '354224848179261915075\n')
diff --git a/test/other_module.py b/test/other_module.py
new file mode 100644
index 0000000000..aeced51c4b
--- /dev/null
+++ b/test/other_module.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2015 Spotify AB
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+import luigi
+
+
+class OtherModuleTask(luigi.Task):
+    p = luigi.Parameter()
+
+    def output(self):
+        return luigi.LocalTarget(self.p)
+
+    def run(self):
+        with self.output().open('w') as f:
+            f.write('Done!')
diff --git a/test/parameter_test.py b/test/parameter_test.py
index ce8e8c0168..6338452d35 100644
--- a/test/parameter_test.py
+++ b/test/parameter_test.py
@@ -13,18 +13,19 @@
 # the License.
 
 import datetime
+import unittest
 from datetime import timedelta
-import luigi.date_interval
+
 import luigi
+import luigi.date_interval
 import luigi.interface
-from worker_test import EmailTest
 import luigi.notifications
-from luigi.parameter import ParameterException
-luigi.notifications.DEBUG = True
-import unittest
 from helpers import with_config
+from luigi.mock import MockFile, MockFileSystem
+from luigi.parameter import ParameterException
+from worker_test import email_patch
 
-EMAIL_CONFIG = {"core": {"error-email": "not-a-real-email-address-for-test-only"}}
+luigi.notifications.DEBUG = True
 
 
 class A(luigi.Task):
@@ -43,14 +44,14 @@ class Foo(luigi.Task):
 
 
 class Bar(luigi.Task):
-    multibool = luigi.BooleanParameter(is_list=True)
+    multibool = luigi.BoolParameter(is_list=True)
 
     def run(self):
         Bar._val = self.multibool
 
 
 class Baz(luigi.Task):
-    bool = luigi.BooleanParameter()
+    bool = luigi.BoolParameter()
 
     def run(self):
         Baz._val = self.bool
@@ -64,6 +65,7 @@ def run(self):
 
 
 class ForgotParamDep(luigi.Task):
+
     def requires(self):
         return ForgotParam()
 
@@ -74,7 +76,7 @@ def run(self):
 class HasGlobalParam(luigi.Task):
     x = luigi.Parameter()
     global_param = luigi.IntParameter(is_global=True, default=123)  # global parameters need default values
-    global_bool_param = luigi.BooleanParameter(is_global=True, default=False)
+    global_bool_param = luigi.BoolParameter(is_global=True, default=False)
 
     def run(self):
         self.complete = lambda: True
@@ -100,7 +102,57 @@ class SharedGlobalParamB(luigi.Task):
     shared_global_param = _shared_global_param
 
 
-class ParameterTest(EmailTest):
+class BananaDep(luigi.Task):
+    x = luigi.Parameter()
+    y = luigi.Parameter(default='def')
+
+    def output(self):
+        return MockFile('banana-dep-%s-%s' % (self.x, self.y))
+
+    def run(self):
+        self.output().open('w').close()
+
+
+class Banana(luigi.Task):
+    x = luigi.Parameter()
+    y = luigi.Parameter()
+    style = luigi.Parameter(default=None)
+
+    def requires(self):
+        if self.style is None:
+            return BananaDep()  # will fail
+        elif self.style == 'x-arg':
+            return BananaDep(self.x)
+        elif self.style == 'y-kwarg':
+            return BananaDep(y=self.y)
+        elif self.style == 'x-arg-y-arg':
+            return BananaDep(self.x, self.y)
+        else:
+            raise Exception('unknown style')
+
+    def output(self):
+        return MockFile('banana-%s-%s' % (self.x, self.y))
+
+    def run(self):
+        self.output().open('w').close()
+
+
+class MyConfig(luigi.Config):
+    mc_p = luigi.IntParameter()
+    mc_q = luigi.IntParameter(default=73)
+
+
+class MyConfigWithoutSection(luigi.ConfigWithoutSection):
+    mc_r = luigi.IntParameter()
+    mc_s = luigi.IntParameter(default=99)
+
+
+class NoopTask(luigi.Task):
+    pass
+
+
+class ParameterTest(unittest.TestCase):
+
     def setUp(self):
         super(ParameterTest, self).setUp()
         # Need to restore some defaults for the global params since they are overriden
@@ -142,32 +194,32 @@ def test_task_creation(self):
         self.assertEqual(f.not_a_param, "lol")
 
     def test_multibool(self):
-        luigi.run(['--local-scheduler', 'Bar', '--multibool', 'true', '--multibool', 'false'])
+        luigi.run(['--local-scheduler', '--no-lock', 'Bar', '--multibool', 'true', '--multibool', 'false'])
         self.assertEqual(Bar._val, (True, False))
 
     def test_multibool_empty(self):
-        luigi.run(['--local-scheduler', 'Bar'])
+        luigi.run(['--local-scheduler', '--no-lock', 'Bar'])
         self.assertEqual(Bar._val, tuple())
 
     def test_bool_false(self):
-        luigi.run(['--local-scheduler', 'Baz'])
+        luigi.run(['--local-scheduler', '--no-lock', 'Baz'])
         self.assertEqual(Baz._val, False)
 
     def test_bool_true(self):
-        luigi.run(['--local-scheduler', 'Baz', '--bool'])
+        luigi.run(['--local-scheduler', '--no-lock', 'Baz', '--bool'])
         self.assertEqual(Baz._val, True)
 
     def test_forgot_param(self):
-        self.assertRaises(luigi.parameter.MissingParameterException, luigi.run, ['--local-scheduler', 'ForgotParam'],)
+        self.assertRaises(luigi.parameter.MissingParameterException, luigi.run, ['--local-scheduler', '--no-lock', 'ForgotParam'],)
 
-    @with_config(EMAIL_CONFIG)
-    def test_forgot_param_in_dep(self):
+    @email_patch
+    def test_forgot_param_in_dep(self, emails):
         # A programmatic missing parameter will cause an error email to be sent
-        luigi.run(['--local-scheduler', 'ForgotParamDep'])
-        self.assertNotEquals(self.last_email, None)
+        luigi.run(['--local-scheduler', '--no-lock', 'ForgotParamDep'])
+        self.assertNotEquals(emails, [])
 
     def test_default_param_cmdline(self):
-        luigi.run(['--local-scheduler', 'WithDefault'])
+        luigi.run(['--local-scheduler', '--no-lock', 'WithDefault'])
         self.assertEqual(WithDefault().x, 'xyz')
 
     def test_global_param_defaults(self):
@@ -176,36 +228,43 @@ def test_global_param_defaults(self):
         self.assertEqual(h.global_bool_param, False)
 
     def test_global_param_cmdline(self):
-        luigi.run(['--local-scheduler', 'HasGlobalParam', '--x', 'xyz', '--global-param', '124'])
+        luigi.run(['--local-scheduler', '--no-lock', 'HasGlobalParam', '--x', 'xyz', '--global-param', '124'])
         h = HasGlobalParam(x='xyz')
         self.assertEqual(h.global_param, 124)
         self.assertEqual(h.global_bool_param, False)
 
+    def test_global_param_cmdline_flipped(self):
+        luigi.run(['--local-scheduler', '--no-lock', '--global-param', '125', 'HasGlobalParam', '--x', 'xyz'])
+        h = HasGlobalParam(x='xyz')
+        self.assertEqual(h.global_param, 125)
+        self.assertEqual(h.global_bool_param, False)
+
     def test_global_param_override(self):
-        def f():
-            return HasGlobalParam(x='xyz', global_param=124)
-        self.assertRaises(luigi.parameter.ParameterException, f)  # can't override a global parameter
+        h1 = HasGlobalParam(x='xyz', global_param=124)
+        h2 = HasGlobalParam(x='xyz')
+        self.assertEquals(h1.global_param, 124)
+        self.assertEquals(h2.global_param, 123)
 
     def test_global_param_dep_cmdline(self):
-        luigi.run(['--local-scheduler', 'HasGlobalParamDep', '--x', 'xyz', '--global-param', '124'])
+        luigi.run(['--local-scheduler', '--no-lock', 'HasGlobalParamDep', '--x', 'xyz', '--global-param', '124'])
         h = HasGlobalParam(x='xyz')
         self.assertEqual(h.global_param, 124)
         self.assertEqual(h.global_bool_param, False)
 
     def test_global_param_dep_cmdline_optparse(self):
-        luigi.run(['--local-scheduler', '--task', 'HasGlobalParamDep', '--x', 'xyz', '--global-param', '124'], use_optparse=True)
+        luigi.run(['--local-scheduler', '--no-lock', '--task', 'HasGlobalParamDep', '--x', 'xyz', '--global-param', '124'], use_optparse=True)
         h = HasGlobalParam(x='xyz')
         self.assertEqual(h.global_param, 124)
         self.assertEqual(h.global_bool_param, False)
 
     def test_global_param_dep_cmdline_bool(self):
-        luigi.run(['--local-scheduler', 'HasGlobalParamDep', '--x', 'xyz', '--global-bool-param'])
+        luigi.run(['--local-scheduler', '--no-lock', 'HasGlobalParamDep', '--x', 'xyz', '--global-bool-param'])
         h = HasGlobalParam(x='xyz')
         self.assertEqual(h.global_param, 123)
         self.assertEqual(h.global_bool_param, True)
 
     def test_global_param_shared(self):
-        luigi.run(['--local-scheduler', 'SharedGlobalParamA', '--shared-global-param', 'abc'])
+        luigi.run(['--local-scheduler', '--no-lock', 'SharedGlobalParamA', '--shared-global-param', 'abc'])
         b = SharedGlobalParamB()
         self.assertEqual(b.shared_global_param, 'abc')
 
@@ -218,6 +277,115 @@ class InsignificantParameterTask(luigi.Task):
         self.assertEqual(t.task_id, 'InsignificantParameterTask(bar=y)')
 
 
+class TestNewStyleGlobalParameters(unittest.TestCase):
+
+    def setUp(self):
+        super(TestNewStyleGlobalParameters, self).setUp()
+        MockFile.fs.clear()
+        BananaDep.y.reset_global()
+
+    def expect_keys(self, expected):
+        self.assertEquals(set(MockFile.fs.get_all_data().keys()), set(expected))
+
+    def test_x_arg(self):
+        luigi.run(['--local-scheduler', '--no-lock', 'Banana', '--x', 'foo', '--y', 'bar', '--style', 'x-arg'])
+        self.expect_keys(['banana-foo-bar', 'banana-dep-foo-def'])
+
+    def test_x_arg_override(self):
+        luigi.run(['--local-scheduler', '--no-lock', 'Banana', '--x', 'foo', '--y', 'bar', '--style', 'x-arg', '--BananaDep-y', 'xyz'])
+        self.expect_keys(['banana-foo-bar', 'banana-dep-foo-xyz'])
+
+    def test_x_arg_override_stupid(self):
+        luigi.run(['--local-scheduler', '--no-lock', 'Banana', '--x', 'foo', '--y', 'bar', '--style', 'x-arg', '--BananaDep-x', 'blabla'])
+        self.expect_keys(['banana-foo-bar', 'banana-dep-foo-def'])
+
+    def test_x_arg_y_arg(self):
+        luigi.run(['--local-scheduler', '--no-lock', 'Banana', '--x', 'foo', '--y', 'bar', '--style', 'x-arg-y-arg'])
+        self.expect_keys(['banana-foo-bar', 'banana-dep-foo-bar'])
+
+    def test_x_arg_y_arg_override(self):
+        luigi.run(['--local-scheduler', '--no-lock', 'Banana', '--x', 'foo', '--y', 'bar', '--style', 'x-arg-y-arg', '--BananaDep-y', 'xyz'])
+        self.expect_keys(['banana-foo-bar', 'banana-dep-foo-bar'])
+
+    def test_x_arg_y_arg_override_all(self):
+        luigi.run(['--local-scheduler', '--no-lock', 'Banana', '--x', 'foo', '--y', 'bar', '--style', 'x-arg-y-arg', '--BananaDep-y', 'xyz', '--BananaDep-x', 'blabla'])
+        self.expect_keys(['banana-foo-bar', 'banana-dep-foo-bar'])
+
+    def test_y_arg_override(self):
+        luigi.run(['--local-scheduler', '--no-lock', 'Banana', '--x', 'foo', '--y', 'bar', '--style', 'y-kwarg', '--BananaDep-x', 'xyz'])
+        self.expect_keys(['banana-foo-bar', 'banana-dep-xyz-bar'])
+
+    def test_y_arg_override_both(self):
+        luigi.run(['--local-scheduler', '--no-lock', 'Banana', '--x', 'foo', '--y', 'bar', '--style', 'y-kwarg', '--BananaDep-x', 'xyz', '--BananaDep-y', 'blah'])
+        self.expect_keys(['banana-foo-bar', 'banana-dep-xyz-bar'])
+
+    def test_y_arg_override_banana(self):
+        luigi.run(['--local-scheduler', '--no-lock', 'Banana', '--y', 'bar', '--style', 'y-kwarg', '--BananaDep-x', 'xyz', '--Banana-x', 'baz'])
+        self.expect_keys(['banana-baz-bar', 'banana-dep-xyz-bar'])
+
+
+class TestRemoveGlobalParameters(unittest.TestCase):
+
+    def setUp(self):
+        super(TestRemoveGlobalParameters, self).setUp()
+        MyConfig.mc_p.reset_global()
+        MyConfig.mc_q.reset_global()
+        MyConfigWithoutSection.mc_r.reset_global()
+        MyConfigWithoutSection.mc_s.reset_global()
+
+    def test_use_config_class_1(self):
+        luigi.run(['--local-scheduler', '--no-lock', '--MyConfig-mc-p', '99', '--mc-r', '55', 'NoopTask'])
+        self.assertEqual(MyConfig().mc_p, 99)
+        self.assertEqual(MyConfig().mc_q, 73)
+        self.assertEqual(MyConfigWithoutSection().mc_r, 55)
+        self.assertEqual(MyConfigWithoutSection().mc_s, 99)
+
+    def test_use_config_class_2(self):
+        luigi.run(['--local-scheduler', '--no-lock', 'NoopTask', '--MyConfig-mc-p', '99', '--mc-r', '55'])
+        self.assertEqual(MyConfig().mc_p, 99)
+        self.assertEqual(MyConfig().mc_q, 73)
+        self.assertEqual(MyConfigWithoutSection().mc_r, 55)
+        self.assertEqual(MyConfigWithoutSection().mc_s, 99)
+
+    def test_use_config_class_more_args(self):
+        luigi.run(['--local-scheduler', '--no-lock', '--MyConfig-mc-p', '99', '--mc-r', '55', 'NoopTask', '--mc-s', '123', '--MyConfig-mc-q', '42'])
+        self.assertEqual(MyConfig().mc_p, 99)
+        self.assertEqual(MyConfig().mc_q, 42)
+        self.assertEqual(MyConfigWithoutSection().mc_r, 55)
+        self.assertEqual(MyConfigWithoutSection().mc_s, 123)
+
+    @with_config({"MyConfig": {"mc_p": "666", "mc_q": "777"}})
+    def test_use_config_class_with_configuration(self):
+        luigi.run(['--local-scheduler', '--no-lock', '--mc-r', '555', 'NoopTask'])
+        self.assertEqual(MyConfig().mc_p, 666)
+        self.assertEqual(MyConfig().mc_q, 777)
+        self.assertEqual(MyConfigWithoutSection().mc_r, 555)
+        self.assertEqual(MyConfigWithoutSection().mc_s, 99)
+
+    @with_config({"MyConfigWithoutSection": {"mc_r": "999", "mc_s": "888"}})
+    def test_use_config_class_with_configuration_2(self):
+        luigi.run(['--local-scheduler', '--no-lock', 'NoopTask', '--MyConfig-mc-p', '222', '--mc-r', '555'])
+        self.assertEqual(MyConfig().mc_p, 222)
+        self.assertEqual(MyConfig().mc_q, 73)
+        self.assertEqual(MyConfigWithoutSection().mc_r, 555)
+        self.assertEqual(MyConfigWithoutSection().mc_s, 888)
+
+    def test_misc_1(self):
+        class Dogs(luigi.Config):
+            n_dogs = luigi.IntParameter()
+
+        class CatsWithoutSection(luigi.ConfigWithoutSection):
+            n_cats = luigi.IntParameter()
+
+        luigi.run(['--local-scheduler', '--no-lock', '--n-cats', '123', '--Dogs-n-dogs', '456', 'WithDefault'])
+        self.assertEqual(Dogs().n_dogs, 456)
+        self.assertEqual(CatsWithoutSection().n_cats, 123)
+
+        luigi.run(['--local-scheduler', '--no-lock', 'WithDefault', '--n-cats', '321', '--Dogs-n-dogs', '654'])
+        self.assertEqual(Dogs().n_dogs, 654)
+        self.assertEqual(CatsWithoutSection().n_cats, 321)
+
+
 class TestParamWithDefaultFromConfig(unittest.TestCase):
 
     def testNoSection(self):
@@ -235,16 +403,6 @@ class A(luigi.Task):
         self.assertEqual("baz", A().p)
         self.assertEqual("boo", A(p="boo").p)
 
-
-    @with_config({"foo": {"bar": "baz"}})
-    def testDefaultFromConfig(self):
-        # Use deprecated argument for "config_path"
-        class A(luigi.Task):
-            p = luigi.Parameter(default_from_config=dict(section="foo", name="bar"))
-
-        self.assertEqual("baz", A().p)
-        self.assertEqual("boo", A(p="boo").p)
-
     @with_config({"foo": {"bar": "2001-02-03T04"}})
     def testDateHour(self):
         p = luigi.DateHourParameter(config_path=dict(section="foo", name="bar"))
@@ -262,7 +420,7 @@ def testInt(self):
 
     @with_config({"foo": {"bar": "true"}})
     def testBool(self):
-        p = luigi.BooleanParameter(config_path=dict(section="foo", name="bar"))
+        p = luigi.BoolParameter(config_path=dict(section="foo", name="bar"))
         self.assertEqual(True, p.value)
 
     @with_config({"foo": {"bar": "2001-02-03-2001-02-28"}})
@@ -274,32 +432,32 @@ def testDateInterval(self):
     @with_config({"foo": {"bar": "1 day"}})
     def testTimeDelta(self):
         p = luigi.TimeDeltaParameter(config_path=dict(section="foo", name="bar"))
-        self.assertEqual(timedelta(days = 1), p.value)
+        self.assertEqual(timedelta(days=1), p.value)
 
     @with_config({"foo": {"bar": "2 seconds"}})
     def testTimeDeltaPlural(self):
         p = luigi.TimeDeltaParameter(config_path=dict(section="foo", name="bar"))
-        self.assertEqual(timedelta(seconds = 2), p.value)
+        self.assertEqual(timedelta(seconds=2), p.value)
 
     @with_config({"foo": {"bar": "3w 4h 5m"}})
     def testTimeDeltaMultiple(self):
         p = luigi.TimeDeltaParameter(config_path=dict(section="foo", name="bar"))
-        self.assertEqual(timedelta(weeks = 3, hours = 4, minutes = 5), p.value)
+        self.assertEqual(timedelta(weeks=3, hours=4, minutes=5), p.value)
 
     @with_config({"foo": {"bar": "P4DT12H30M5S"}})
     def testTimeDelta8601(self):
         p = luigi.TimeDeltaParameter(config_path=dict(section="foo", name="bar"))
-        self.assertEqual(timedelta(days = 4, hours = 12, minutes = 30, seconds = 5), p.value)
+        self.assertEqual(timedelta(days=4, hours=12, minutes=30, seconds=5), p.value)
 
     @with_config({"foo": {"bar": "P5D"}})
     def testTimeDelta8601NoTimeComponent(self):
         p = luigi.TimeDeltaParameter(config_path=dict(section="foo", name="bar"))
-        self.assertEqual(timedelta(days = 5), p.value)
+        self.assertEqual(timedelta(days=5), p.value)
 
     @with_config({"foo": {"bar": "P5W"}})
     def testTimeDelta8601Weeks(self):
         p = luigi.TimeDeltaParameter(config_path=dict(section="foo", name="bar"))
-        self.assertEqual(timedelta(weeks = 5), p.value)
+        self.assertEqual(timedelta(weeks=5), p.value)
 
     @with_config({"foo": {"bar": "P3Y6M4DT12H30M5S"}})
     def testTimeDelta8601YearMonthNotSupported(self):
@@ -310,7 +468,7 @@ def f():
     @with_config({"foo": {"bar": "PT6M"}})
     def testTimeDelta8601MAfterT(self):
         p = luigi.TimeDeltaParameter(config_path=dict(section="foo", name="bar"))
-        self.assertEqual(timedelta(minutes = 6), p.value)
+        self.assertEqual(timedelta(minutes=6), p.value)
 
     @with_config({"foo": {"bar": "P6M"}})
     def testTimeDelta8601MBeforeT(self):
@@ -343,7 +501,7 @@ def testDefaultIntList(self):
     @with_config({"foo": {"bar": "baz"}})
     def testWithDefault(self):
         p = luigi.Parameter(config_path=dict(section="foo", name="bar"), default='blah')
-        self.assertEqual('baz', p.value) # config overrides default
+        self.assertEqual('baz', p.value)  # config overrides default
 
     def testWithDefaultAndMissing(self):
         p = luigi.Parameter(config_path=dict(section="foo", name="bar"), default='blah')
@@ -362,15 +520,48 @@ def testGlobalAndMissing(self):
         p.set_global('meh')
         self.assertEqual('meh', p.value)
 
+    @with_config({"A": {"p": "p_default"}})
+    def testDefaultFromTaskName(self):
+        class A(luigi.Task):
+            p = luigi.Parameter()
+
+        self.assertEqual("p_default", A().p)
+        self.assertEqual("boo", A(p="boo").p)
+
+    @with_config({"A": {"p": "999"}})
+    def testDefaultFromTaskNameInt(self):
+        class A(luigi.Task):
+            p = luigi.IntParameter()
+
+        self.assertEqual(999, A().p)
+        self.assertEqual(777, A(p=777).p)
+
+    @with_config({"A": {"p": "p_default"}, "foo": {"bar": "baz"}})
+    def testDefaultFromConfigWithTaskNameToo(self):
+        class A(luigi.Task):
+            p = luigi.Parameter(config_path=dict(section="foo", name="bar"))
+
+        self.assertEqual("baz", A().p)
+        self.assertEqual("boo", A(p="boo").p)
+
+    @with_config({"A": {"p": "p_default_2"}})
+    def testDefaultFromTaskNameWithDefault(self):
+        class A(luigi.Task):
+            p = luigi.Parameter(default="banana")
+
+        self.assertEqual("p_default_2", A().p)
+        self.assertEqual("boo_2", A(p="boo_2").p)
+
 
 class OverrideEnvStuff(unittest.TestCase):
+
     def setUp(self):
         env_params_cls = luigi.interface.EnvironmentParamsContainer
         env_params_cls.scheduler_port.reset_global()
 
     @with_config({"core": {"default-scheduler-port": '6543'}})
     def testOverrideSchedulerPort(self):
-        env_params = luigi.interface.EnvironmentParamsContainer.env_params()
+        env_params = luigi.interface.EnvironmentParamsContainer()
         self.assertEqual(env_params.scheduler_port, 6543)
 
 
diff --git a/test/priority_test.py b/test/priority_test.py
index e38d23ffe6..8ddc4b6fca 100644
--- a/test/priority_test.py
+++ b/test/priority_test.py
@@ -13,10 +13,13 @@
 # the License.
 
 import unittest
+
 import luigi
 import luigi.notifications
+
 luigi.notifications.DEBUG = True
 
+
 class PrioTask(luigi.Task):
     prio = luigi.Parameter()
     run_counter = 0
@@ -27,7 +30,7 @@ def priority(self):
 
     def requires(self):
         if self.prio > 10:
-            return PrioTask(self.prio-10)
+            return PrioTask(self.prio - 10)
 
     def run(self):
         self.t = PrioTask.run_counter
@@ -36,13 +39,14 @@ def run(self):
     def complete(self):
         return hasattr(self, 't')
 
+
 class PriorityTest(unittest.TestCase):
+
     def test_priority(self):
         p, q, r = PrioTask(1), PrioTask(2), PrioTask(3)
         luigi.build([p, q, r], local_scheduler=True)
         self.assertTrue(r.t < q.t < p.t)
 
-
     def test_priority_w_dep(self):
         x, y, z = PrioTask(25), PrioTask(15), PrioTask(5)
         a, b, c = PrioTask(24), PrioTask(14), PrioTask(4)
diff --git a/test/range_test.py b/test/range_test.py
index 69b202fafd..b6bc184be4 100644
--- a/test/range_test.py
+++ b/test/range_test.py
@@ -12,14 +12,15 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import fnmatch
 import datetime
+import fnmatch
+import unittest
+
 import luigi
-from luigi.tools.range import RangeEvent, RangeHourly, RangeHourlyBase, _constrain_glob
-from luigi.mock import MockFile, MockFileSystem
 import mock
-import time
-import unittest
+from luigi.mock import MockFile, MockFileSystem
+from luigi.tools.range import (RangeDaily, RangeDailyBase, RangeEvent, RangeHourly, RangeHourlyBase, _constrain_glob,
+                               _get_filesystems_and_globs)
 
 
 class CommonDateHourTask(luigi.Task):
@@ -29,7 +30,14 @@ def output(self):
         return MockFile(self.dh.strftime('/n2000y01a05n/%Y_%m-_-%daww/21mm%Hdara21/ooo'))
 
 
-mock_contents = [
+class CommonDateTask(luigi.Task):
+    d = luigi.DateParameter()
+
+    def output(self):
+        return MockFile(self.d.strftime('/n2000y01a05n/%Y_%m-_-%daww/21mm01dara21/ooo'))
+
+
+task_a_paths = [
     'TaskA/2014-03-20/18',
     'TaskA/2014-03-20/21',
     'TaskA/2014-03-20/23',
@@ -46,6 +54,9 @@ def output(self):
     'TaskA/2014-03-21/04.attempt-temp-2014-03-21T13-23-09.078249',
     'TaskA/2014-03-21/12',
     'TaskA/2014-03-23/12',
+]
+
+task_b_paths = [
     'TaskB/no/worries2014-03-20/23',
     'TaskB/no/worries2014-03-21/01',
     'TaskB/no/worries2014-03-21/03',
@@ -53,6 +64,9 @@ def output(self):
     'TaskB/no/worries2014-03-21/05',
 ]
 
+mock_contents = task_a_paths + task_b_paths
+
+
 expected_a = [
     'TaskA(dh=2014-03-20T17)',
     'TaskA(dh=2014-03-20T19)',
@@ -86,6 +100,13 @@ def output(self):
         return MockFile(self.dh.strftime('TaskB/%%s%Y-%m-%d/%H') % self.complicator)
 
 
+class TaskC(luigi.Task):
+    dh = luigi.DateHourParameter()
+
+    def output(self):
+        return MockFile(self.dh.strftime('not/a/real/path/%Y-%m-%d/%H'))
+
+
 class CommonWrapperTask(luigi.WrapperTask):
     dh = luigi.DateHourParameter()
 
@@ -99,7 +120,16 @@ def mock_listdir(_, glob):
         yield path
 
 
+def mock_exists_always_true(_, _2):
+    yield True
+
+
+def mock_exists_always_false(_, _2):
+    yield False
+
+
 class ConstrainGlobTest(unittest.TestCase):
+
     def test_limit(self):
         glob = '/[0-9][0-9][0-9][0-9]/[0-9][0-9]/[0-9][0-9]/[0-9][0-9]'
         paths = [(datetime.datetime(2013, 12, 31, 5) + datetime.timedelta(hours=h)).strftime('/%Y/%m/%d/%H') for h in xrange(40)]
@@ -139,6 +169,119 @@ def datetime_to_epoch(dt):
     return td.days * 86400 + td.seconds + td.microseconds / 1E6
 
 
+class RangeDailyBaseTest(unittest.TestCase):
+    maxDiff = None
+
+    def setUp(self):
+        # yucky to create separate callbacks; would be nicer if the callback received an instance of a subclass of Event, so one callback could accumulate all types
+        @RangeDailyBase.event_handler(RangeEvent.DELAY)
+        def callback_delay(*args):
+            self.events.setdefault(RangeEvent.DELAY, []).append(args)
+
+        @RangeDailyBase.event_handler(RangeEvent.COMPLETE_COUNT)
+        def callback_complete_count(*args):
+            self.events.setdefault(RangeEvent.COMPLETE_COUNT, []).append(args)
+
+        @RangeDailyBase.event_handler(RangeEvent.COMPLETE_FRACTION)
+        def callback_complete_fraction(*args):
+            self.events.setdefault(RangeEvent.COMPLETE_FRACTION, []).append(args)
+
+        self.events = {}
+
+    def test_consistent_formatting(self):
+        task = RangeDailyBase(of='CommonDateTask',
+                              start=datetime.date(2016, 1, 1))
+        self.assertEqual(task._format_range([datetime.datetime(2016, 1, 2, 13), datetime.datetime(2016, 2, 29, 23)]), '[2016-01-02, 2016-02-29]')
+
+    def _empty_subcase(self, kwargs, expected_events):
+        calls = []
+
+        class RangeDailyDerived(RangeDailyBase):
+            def missing_datetimes(*args):
+                calls.append(args)
+                return args[-1][:5]
+
+        task = RangeDailyDerived(of='CommonDateTask',
+                                 **kwargs)
+        self.assertEqual(task.requires(), [])
+        self.assertEqual(calls, [])
+        self.assertEqual(task.requires(), [])
+        self.assertEqual(calls, [])  # subsequent requires() should return the cached result, never call missing_datetimes
+        self.assertEqual(self.events, expected_events)
+        self.assertTrue(task.complete())
+
+    def test_stop_before_days_back(self):
+        # nothing to do because stop is earlier
+        self._empty_subcase(
+            {
+                'now': datetime_to_epoch(datetime.datetime(2015, 1, 1, 4)),
+                'stop': datetime.date(2014, 3, 20),
+                'days_back': 4,
+                'days_forward': 20,
+                'reverse': True,
+            },
+            {
+                'event.tools.range.delay': [
+                    ('CommonDateTask', 0),
+                ],
+                'event.tools.range.complete.count': [
+                    ('CommonDateTask', 0),
+                ],
+                'event.tools.range.complete.fraction': [
+                    ('CommonDateTask', 1.),
+                ],
+            }
+        )
+
+    def _nonempty_subcase(self, kwargs, expected_finite_datetimes_range, expected_requires, expected_events):
+        calls = []
+
+        class RangeDailyDerived(RangeDailyBase):
+            def missing_datetimes(*args):
+                calls.append(args)
+                return args[-1][:7]
+
+        task = RangeDailyDerived(of='CommonDateTask',
+                                 **kwargs)
+        self.assertEqual(map(str, task.requires()), expected_requires)
+        self.assertEqual(calls[0][1], CommonDateTask)
+        self.assertEqual((min(calls[0][2]), max(calls[0][2])), expected_finite_datetimes_range)
+        self.assertEqual(map(str, task.requires()), expected_requires)
+        self.assertEqual(len(calls), 1)  # subsequent requires() should return the cached result, not call missing_datetimes again
+        self.assertEqual(self.events, expected_events)
+        self.assertFalse(task.complete())
+
+    def test_start_long_before_long_days_back_and_with_long_days_forward(self):
+        self._nonempty_subcase(
+            {
+                'now': datetime_to_epoch(datetime.datetime(2017, 10, 22, 12, 4, 29)),
+                'start': datetime.date(2011, 3, 20),
+                'stop': datetime.date(2025, 1, 29),
+                'task_limit': 4,
+                'days_back': 3 * 365,
+                'days_forward': 3 * 365,
+            },
+            (datetime.datetime(2014, 10, 24), datetime.datetime(2020, 10, 21)),
+            [
+                'CommonDateTask(d=2014-10-24)',
+                'CommonDateTask(d=2014-10-25)',
+                'CommonDateTask(d=2014-10-26)',
+                'CommonDateTask(d=2014-10-27)',
+            ],
+            {
+                'event.tools.range.delay': [
+                    ('CommonDateTask', 3750),
+                ],
+                'event.tools.range.complete.count': [
+                    ('CommonDateTask', 5057),
+                ],
+                'event.tools.range.complete.fraction': [
+                    ('CommonDateTask', 5057. / (5057 + 7)),
+                ],
+            }
+        )
+
+
 class RangeHourlyBaseTest(unittest.TestCase):
     maxDiff = None
 
@@ -147,19 +290,27 @@ def setUp(self):
         @RangeHourlyBase.event_handler(RangeEvent.DELAY)
         def callback_delay(*args):
             self.events.setdefault(RangeEvent.DELAY, []).append(args)
+
         @RangeHourlyBase.event_handler(RangeEvent.COMPLETE_COUNT)
         def callback_complete_count(*args):
             self.events.setdefault(RangeEvent.COMPLETE_COUNT, []).append(args)
+
         @RangeHourlyBase.event_handler(RangeEvent.COMPLETE_FRACTION)
         def callback_complete_fraction(*args):
             self.events.setdefault(RangeEvent.COMPLETE_FRACTION, []).append(args)
+
         self.events = {}
 
+    def test_consistent_formatting(self):
+        task = RangeHourlyBase(of='CommonDateHourTask',
+                               start=datetime.datetime(2016, 1, 1))
+        self.assertEqual(task._format_range([datetime.datetime(2016, 1, 2, 13), datetime.datetime(2016, 2, 29, 23)]), '[2016-01-02T13, 2016-02-29T23]')
+
     def _empty_subcase(self, kwargs, expected_events):
         calls = []
 
         class RangeHourlyDerived(RangeHourlyBase):
-            def missing_datehours(*args):
+            def missing_datetimes(*args):
                 calls.append(args)
                 return args[-1][:5]
 
@@ -168,7 +319,7 @@ def missing_datehours(*args):
         self.assertEqual(task.requires(), [])
         self.assertEqual(calls, [])
         self.assertEqual(task.requires(), [])
-        self.assertEqual(calls, [])  # subsequent requires() should return the cached result, never call missing_datehours
+        self.assertEqual(calls, [])  # subsequent requires() should return the cached result, never call missing_datetimes
         self.assertEqual(self.events, expected_events)
         self.assertTrue(task.complete())
 
@@ -183,7 +334,7 @@ def test_start_after_hours_forward(self):
             },
             {
                 'event.tools.range.delay': [
-                    ('CommonDateHourTask', 0.),
+                    ('CommonDateHourTask', 0),
                 ],
                 'event.tools.range.complete.count': [
                     ('CommonDateHourTask', 0),
@@ -194,11 +345,11 @@ def test_start_after_hours_forward(self):
             }
         )
 
-    def _nonempty_subcase(self, kwargs, expected_finite_datehours_range, expected_requires, expected_events):
+    def _nonempty_subcase(self, kwargs, expected_finite_datetimes_range, expected_requires, expected_events):
         calls = []
 
         class RangeHourlyDerived(RangeHourlyBase):
-            def missing_datehours(*args):
+            def missing_datetimes(*args):
                 calls.append(args)
                 return args[-1][:7]
 
@@ -206,9 +357,9 @@ def missing_datehours(*args):
                                   **kwargs)
         self.assertEqual(map(str, task.requires()), expected_requires)
         self.assertEqual(calls[0][1], CommonDateHourTask)
-        self.assertEqual((min(calls[0][2]), max(calls[0][2])), expected_finite_datehours_range)
+        self.assertEqual((min(calls[0][2]), max(calls[0][2])), expected_finite_datetimes_range)
         self.assertEqual(map(str, task.requires()), expected_requires)
-        self.assertEqual(len(calls), 1)  # subsequent requires() should return the cached result, not call missing_datehours again
+        self.assertEqual(len(calls), 1)  # subsequent requires() should return the cached result, not call missing_datetimes again
         self.assertEqual(self.events, expected_events)
         self.assertFalse(task.complete())
 
@@ -220,7 +371,7 @@ def test_start_long_before_hours_back(self):
                 'hours_back': 5,
                 'hours_forward': 20,
             },
-            (datetime.datetime(1999, 12, 31, 23), datetime.datetime(2000, 1, 2, 0)),
+            (datetime.datetime(1999, 12, 31, 23), datetime.datetime(2000, 1, 1, 23)),
             [
                 'CommonDateHourTask(dh=1999-12-31T23)',
                 'CommonDateHourTask(dh=2000-01-01T00)',
@@ -232,13 +383,13 @@ def test_start_long_before_hours_back(self):
             ],
             {
                 'event.tools.range.delay': [
-                    ('CommonDateHourTask', 26.),  # because of short hours_back we're oblivious to those 40 preceding years
+                    ('CommonDateHourTask', 25),  # because of short hours_back we're oblivious to those 40 preceding years
                 ],
                 'event.tools.range.complete.count': [
-                    ('CommonDateHourTask', 349193),
+                    ('CommonDateHourTask', 349192),
                 ],
                 'event.tools.range.complete.fraction': [
-                    ('CommonDateHourTask', 349193. / (349193 + 7)),
+                    ('CommonDateHourTask', 349192. / (349192 + 7)),
                 ],
             }
         )
@@ -260,7 +411,7 @@ def test_start_after_long_hours_back(self):
             ],
             {
                 'event.tools.range.delay': [
-                    ('CommonDateHourTask', 5180.),
+                    ('CommonDateHourTask', 5180),
                 ],
                 'event.tools.range.complete.count': [
                     ('CommonDateHourTask', 5173),
@@ -280,16 +431,16 @@ def test_start_long_before_long_hours_back_and_with_long_hours_forward(self):
                 'hours_back': 3 * 365 * 24,
                 'hours_forward': 3 * 365 * 24,
             },
-            (datetime.datetime(2014, 10, 23, 12), datetime.datetime(2020, 10, 21, 12)),
+            (datetime.datetime(2014, 10, 23, 13), datetime.datetime(2020, 10, 21, 12)),
             [
-                'CommonDateHourTask(dh=2014-10-23T12)',
                 'CommonDateHourTask(dh=2014-10-23T13)',
                 'CommonDateHourTask(dh=2014-10-23T14)',
                 'CommonDateHourTask(dh=2014-10-23T15)',
+                'CommonDateHourTask(dh=2014-10-23T16)',
             ],
             {
                 'event.tools.range.delay': [
-                    ('CommonDateHourTask', 52561.),
+                    ('CommonDateHourTask', 52560),
                 ],
                 'event.tools.range.complete.count': [
                     ('CommonDateHourTask', 84061),
@@ -301,9 +452,36 @@ def test_start_long_before_long_hours_back_and_with_long_hours_forward(self):
         )
 
 
+class RangeDailyTest(unittest.TestCase):
+    def test_bulk_complete_correctly_interfaced(self):
+        class BulkCompleteDailyTask(luigi.Task):
+            d = luigi.DateParameter()
+
+            @classmethod
+            def bulk_complete(self, parameter_tuples):
+                return parameter_tuples[:-2]
+
+            def output(self):
+                raise RuntimeError("Shouldn't get called while resolving deps via bulk_complete")
+
+        task = RangeDaily(now=datetime_to_epoch(datetime.datetime(2015, 12, 1)),
+                          of='BulkCompleteDailyTask',
+                          start=datetime.date(2015, 11, 1),
+                          stop=datetime.date(2015, 12, 1))
+
+        expected = [
+            'BulkCompleteDailyTask(d=2015-11-29)',
+            'BulkCompleteDailyTask(d=2015-11-30)',
+        ]
+
+        actual = [t.task_id for t in task.requires()]
+        self.assertEqual(actual, expected)
+
+
 class RangeHourlyTest(unittest.TestCase):
+
     def _test_filesystems_and_globs(self, task_cls, expected):
-        actual = list(RangeHourly._get_filesystems_and_globs(task_cls))
+        actual = list(_get_filesystems_and_globs(task_cls))
         self.assertEqual(len(actual), len(expected))
         for (actual_filesystem, actual_glob), (expected_filesystem, expected_glob) in zip(actual, expected):
             self.assertTrue(isinstance(actual_filesystem, expected_filesystem))
@@ -319,22 +497,66 @@ def test_successfully_inferred(self):
         ])
 
     @mock.patch('luigi.mock.MockFileSystem.listdir', new=mock_listdir)  # fishy to mock the mock, but MockFileSystem doesn't support globs yet
+    @mock.patch('luigi.mock.MockFileSystem.exists',
+                new=mock_exists_always_true)
     def test_missing_tasks_correctly_required(self):
-        task = RangeHourly(now=datetime_to_epoch(datetime.datetime(2040, 4, 1)),
+        for task_path in task_a_paths:
+            MockFile(task_path)
+        task = RangeHourly(now=datetime_to_epoch(datetime.datetime(2016, 4, 1)),
                            of='TaskA',
                            start=datetime.datetime(2014, 3, 20, 17),
                            task_limit=3,
-                           hours_back=30 * 365 * 24)  # this test takes around a minute for me. Since stop is not defined, finite_datehours constitute many years to consider
+                           hours_back=3 * 365 * 24)  # this test takes a few seconds. Since stop is not defined, finite_datetimes constitute many years to consider
         actual = [t.task_id for t in task.requires()]
-        self.assertEqual(str(actual), str(expected_a))
         self.assertEqual(actual, expected_a)
 
     @mock.patch('luigi.mock.MockFileSystem.listdir', new=mock_listdir)
+    @mock.patch('luigi.mock.MockFileSystem.exists',
+                new=mock_exists_always_true)
     def test_missing_wrapper_tasks_correctly_required(self):
-        task = RangeHourly(now=datetime_to_epoch(datetime.datetime(2040, 4, 1)),
-                           of='CommonWrapperTask',
-                           start=datetime.datetime(2014, 3, 20, 23),
-                           stop=datetime.datetime(2014, 3, 21, 6),
-                           hours_back=30 * 365 * 24)
+        task = RangeHourly(
+            now=datetime_to_epoch(datetime.datetime(2040, 4, 1)),
+            of='CommonWrapperTask',
+            start=datetime.datetime(2014, 3, 20, 23),
+            stop=datetime.datetime(2014, 3, 21, 6),
+            hours_back=30 * 365 * 24)
         actual = [t.task_id for t in task.requires()]
         self.assertEqual(actual, expected_wrapper)
+
+    def test_bulk_complete_correctly_interfaced(self):
+        class BulkCompleteHourlyTask(luigi.Task):
+            dh = luigi.DateHourParameter()
+
+            @classmethod
+            def bulk_complete(cls, parameter_tuples):
+                return parameter_tuples[:-2]
+
+            def output(self):
+                raise RuntimeError("Shouldn't get called while resolving deps via bulk_complete")
+
+        task = RangeHourly(now=datetime_to_epoch(datetime.datetime(2015, 12, 1)),
+                           of='BulkCompleteHourlyTask',
+                           start=datetime.datetime(2015, 11, 1),
+                           stop=datetime.datetime(2015, 12, 1))
+
+        expected = [
+            'BulkCompleteHourlyTask(dh=2015-11-30T22)',
+            'BulkCompleteHourlyTask(dh=2015-11-30T23)',
+        ]
+
+        actual = [t.task_id for t in task.requires()]
+        self.assertEqual(actual, expected)
+
+    @mock.patch('luigi.mock.MockFileSystem.exists',
+                new=mock_exists_always_false)
+    def test_missing_directory(self):
+        task = RangeHourly(now=datetime_to_epoch(
+                           datetime.datetime(2014, 4, 1)),
+                           of='TaskC',
+                           start=datetime.datetime(2014, 3, 20, 23),
+                           stop=datetime.datetime(2014, 3, 21, 1))
+        self.assertFalse(task.complete())
+        expected = [
+            'TaskC(dh=2014-03-20T23)',
+            'TaskC(dh=2014-03-21T00)']
+        self.assertEqual([t.task_id for t in task.requires()], expected)
diff --git a/test/recursion_test.py b/test/recursion_test.py
index 478cc99774..cd12036a3d 100644
--- a/test/recursion_test.py
+++ b/test/recursion_test.py
@@ -12,12 +12,13 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import sys
 import datetime
+import sys
+import unittest
+
 import luigi
 import luigi.interface
 from luigi.mock import MockFile
-import unittest
 
 File = MockFile
 
@@ -40,6 +41,7 @@ def run(self):
 
 
 class RecursionTest(unittest.TestCase):
+
     def setUp(self):
         MockFile.fs.get_all_data()['/tmp/popularity/2009-01-01.txt'] = '0\n'
 
diff --git a/test/redshift_test.py b/test/redshift_test.py
index d5c3bcaafd..4044d52066 100644
--- a/test/redshift_test.py
+++ b/test/redshift_test.py
@@ -1,8 +1,9 @@
-import luigi
 import json
+from unittest import TestCase
+
+import luigi
 import luigi.notifications
 
-from unittest import TestCase
 try:
     from luigi.contrib import redshift
     from moto import mock_s3
@@ -29,10 +30,11 @@ def generate_manifest_json(path_to_folders, file_names):
     for path_to_folder in path_to_folders:
         for file_name in file_names:
             entries.append({
-                'url' : '%s/%s' % (path_to_folder, file_name),
+                'url': '%s/%s' % (path_to_folder, file_name),
                 'mandatory': True
-                })
-    return {'entries' : entries}
+            })
+    return {'entries': entries}
+
 
 class TestRedshiftManifestTask(TestCase):
 
@@ -53,18 +55,18 @@ def test_run(self):
         luigi.build([t], local_scheduler=True)
 
         output = t.output().open('r').read()
-        expected_manifest_output = json.dumps(generate_manifest_json(folder_paths,FILES))
-        self.assertEqual(output,expected_manifest_output )
+        expected_manifest_output = json.dumps(generate_manifest_json(folder_paths, FILES))
+        self.assertEqual(output, expected_manifest_output)
 
     @mock_s3
     def test_run_multiple_paths(self):
         client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
         bucket = client.s3.create_bucket(BUCKET)
-        for parent  in [KEY, KEY_2]:
-          for key in FILES:
-              k = Key(bucket)
-              k.key = '%s/%s' % (parent, key)
-              k.set_contents_from_string('')
+        for parent in [KEY, KEY_2]:
+            for key in FILES:
+                k = Key(bucket)
+                k.key = '%s/%s' % (parent, key)
+                k.set_contents_from_string('')
         folder_path_1 = 's3://%s/%s' % (BUCKET, KEY)
         folder_path_2 = 's3://%s/%s' % (BUCKET, KEY_2)
         folder_paths = [folder_path_1, folder_path_2]
@@ -75,5 +77,5 @@ def test_run_multiple_paths(self):
         luigi.build([t], local_scheduler=True)
 
         output = t.output().open('r').read()
-        expected_manifest_output = json.dumps(generate_manifest_json(folder_paths,FILES))
-        self.assertEqual(output,expected_manifest_output )
+        expected_manifest_output = json.dumps(generate_manifest_json(folder_paths, FILES))
+        self.assertEqual(output, expected_manifest_output)
diff --git a/test/remote_scheduler_test.py b/test/remote_scheduler_test.py
index 0e8231acc3..2899ec4114 100644
--- a/test/remote_scheduler_test.py
+++ b/test/remote_scheduler_test.py
@@ -21,6 +21,7 @@
 
 tempdir = tempfile.mkdtemp()
 
+
 class DummyTask(luigi.Task):
     id = luigi.Parameter()
 
@@ -33,6 +34,7 @@ def output(self):
 
 
 class RemoteSchedulerTest(server_test.ServerTestBase):
+
     def _test_run(self, workers):
         tasks = [DummyTask(id) for id in xrange(20)]
         luigi.build(tasks, scheduler_host='localhost', scheduler_port=self._api_port, workers=workers)
@@ -49,4 +51,3 @@ def test_multiple_workers(self):
 
 if __name__ == '__main__':
     unittest.main()
-
diff --git a/test/requirements.txt b/test/requirements.txt
index 6ebcbac7ec..497afe8707 100644
--- a/test/requirements.txt
+++ b/test/requirements.txt
@@ -1,3 +1,5 @@
 mock
 moto
 nose
+boto
+sqlalchemy
diff --git a/test/rpc_test.py b/test/rpc_test.py
index 77d29085d4..5e00475473 100644
--- a/test/rpc_test.py
+++ b/test/rpc_test.py
@@ -15,11 +15,11 @@
 import unittest
 
 import luigi.rpc
-
 import server_test
 
 
 class RPCTest(server_test.ServerTestBase):
+
     def _get_sch(self):
         sch = luigi.rpc.RemoteScheduler(host='localhost', port=self._api_port)
         sch._wait = lambda: None
@@ -40,4 +40,3 @@ def test_raw_ping_extended(self):
 
 if __name__ == '__main__':
     unittest.main()
-
diff --git a/test/runtests.py b/test/runtests.py
index f4aac32b34..29c2d624cb 100644
--- a/test/runtests.py
+++ b/test/runtests.py
@@ -2,7 +2,6 @@
 
 import nose
 
-
 if __name__ == '__main__':
     with warnings.catch_warnings():
         warnings.simplefilter("default")
diff --git a/test/scheduler_test.py b/test/scheduler_test.py
index 3fad2eeec0..184ca9f8fd 100644
--- a/test/scheduler_test.py
+++ b/test/scheduler_test.py
@@ -12,16 +12,18 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import tempfile
-import luigi.scheduler
 import pickle
-import unittest
+import tempfile
 import time
+import unittest
+
+import luigi.scheduler
 
 luigi.notifications.DEBUG = True
 
 
 class SchedulerTest(unittest.TestCase):
+
     def test_load_old_state(self):
         tasks = {}
         active_workers = {'Worker1': 1e9, 'Worker2': time.time()}
@@ -31,7 +33,7 @@ def test_load_old_state(self):
                 state = (tasks, active_workers)
                 pickle.dump(state, fobj)
 
-            state= luigi.scheduler.SimpleTaskState(
+            state = luigi.scheduler.SimpleTaskState(
                 state_path=fn.name)
             state.load()
 
diff --git a/test/scheduler_visualisation_test.py b/test/scheduler_visualisation_test.py
index c4f31a4dc4..24de345a3a 100644
--- a/test/scheduler_visualisation_test.py
+++ b/test/scheduler_visualisation_test.py
@@ -12,14 +12,15 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import time
-import tempfile
 import os
+import tempfile
+import time
+import unittest
+
 import luigi
+import luigi.notifications
 import luigi.server
 import luigi.worker
-import unittest
-import luigi.notifications
 
 luigi.notifications.DEBUG = True
 
@@ -56,7 +57,7 @@ def output(self):
 
 
 class BadReqTask(luigi.Task):
-    succeed = luigi.BooleanParameter()
+    succeed = luigi.BoolParameter()
 
     def requires(self):
         assert self.succeed
@@ -78,6 +79,7 @@ def run(self):
 
 class SchedulerVisualisationTest(unittest.TestCase):
     # The following 2 are required to retain compatibility with python 2.6
+
     def assertGreaterEqual(self, a, b):
         self.assertTrue(a >= b)
 
@@ -240,22 +242,27 @@ class A(luigi.ExternalTask):
             pass
 
         class B(luigi.ExternalTask):
+
             def complete(self):
                 return True
 
         class C(luigi.Task):
+
             def requires(self):
                 return [A(), B()]
 
         class F(luigi.Task):
+
             def run(self):
                 raise Exception()
 
         class D(luigi.Task):
+
             def requires(self):
                 return [F()]
 
         class E(luigi.Task):
+
             def requires(self):
                 return [C(), D()]
 
@@ -327,6 +334,7 @@ class X(luigi.Task):
             pass
 
         class Y(luigi.Task):
+
             def requires(self):
                 return [X()]
 
@@ -337,6 +345,7 @@ def requires(self):
                 return [Y()]
 
         class ZZ(luigi.Task):
+
             def requires(self):
                 return [Z(1), Z(2)]
 
@@ -356,6 +365,7 @@ def assert_has_deps(task_id, deps):
 
     def test_simple_worker_list(self):
         class X(luigi.Task):
+
             def run(self):
                 self._complete = True
 
@@ -376,10 +386,12 @@ def complete(self):
 
     def test_worker_list_pending_uniques(self):
         class X(luigi.Task):
+
             def complete(self):
                 return False
 
         class Y(X):
+
             def requires(self):
                 return X()
 
@@ -422,6 +434,5 @@ class X(luigi.Task):
         self.assertEqual(1, worker['num_uniques'])
 
 
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/server_test.py b/test/server_test.py
index 6932c3b591..eb83f620d7 100644
--- a/test/server_test.py
+++ b/test/server_test.py
@@ -19,6 +19,7 @@
 
 
 class ServerTestBase(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
         # Pass IPv4 localhost to ensure that only a single address, and therefore single port, is bound
@@ -31,6 +32,7 @@ def tearDownClass(cls):
 
 
 class ServerTest(ServerTestBase):
+
     def test_visualizer(self):
         uri = 'http://localhost:%d' % self._api_port
         req = urllib2.Request(uri)
@@ -43,7 +45,7 @@ def _test_404(self, path):
         req = urllib2.Request(uri)
         try:
             response = urllib2.urlopen(req, timeout=10)
-        except urllib2.HTTPError, http_exc:
+        except urllib2.HTTPError as http_exc:
             pass
 
         self.assertEqual(http_exc.code, 404)
@@ -57,4 +59,3 @@ def test_api_404(self):
 
 if __name__ == '__main__':
     unittest.main()
-
diff --git a/test/set_task_name_test.py b/test/set_task_name_test.py
index b4c8a03d6b..9437e65d9e 100644
--- a/test/set_task_name_test.py
+++ b/test/set_task_name_test.py
@@ -12,9 +12,11 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import luigi
 import unittest
 
+import luigi
+
+
 def create_class(cls_name):
     class NewTask(luigi.WrapperTask):
         pass
@@ -28,6 +30,7 @@ class NewTask(luigi.WrapperTask):
 
 
 class SetTaskNameTest(unittest.TestCase):
+
     ''' I accidentally introduced an issue in this commit:
     https://github.com/spotify/luigi/commit/6330e9d0332e6152996292a39c42f752b9288c96
 
@@ -35,7 +38,7 @@ class SetTaskNameTest(unittest.TestCase):
     to resolve the issue. '''
 
     def test_set_task_name(self):
-        luigi.run(['--local-scheduler', 'MyNewTask'])
+        luigi.run(['--local-scheduler', '--no-lock', 'MyNewTask'])
 
 
 if __name__ == '__main__':
diff --git a/test/snakebite_test.py b/test/snakebite_test.py
index 3be655b40f..5161f2b335 100644
--- a/test/snakebite_test.py
+++ b/test/snakebite_test.py
@@ -2,12 +2,15 @@
 import os
 import posixpath
 import time
+
+from snakebite.client import AutoConfigClient as SnakebiteAutoConfigClient
+from snakebite.minicluster import MiniCluster
+
 import luigi.hdfs
 import luigi.interface
 from luigi.hdfs import SnakebiteHdfsClient
-from snakebite.client import AutoConfigClient as SnakebiteAutoConfigClient
+from minicluster import MiniClusterTestCase
 from nose.plugins.attrib import attr
-from snakebite.minicluster import MiniCluster
 
 try:
     import unittest2 as unittest
@@ -16,26 +19,18 @@
 
 
 @attr('minicluster')
-class TestSnakebiteClient(unittest.TestCase):
+class TestSnakebiteClient(MiniClusterTestCase):
+
     """This test requires a snakebite -- it finds it from your
     client.cfg"""
     snakebite = None
-    cluster = None
-
-    @classmethod
-    def setupClass(cls):
-        if not cls.cluster:
-            cls.cluster = MiniCluster(None, nnport=50030)
-
-    @classmethod
-    def tearDownClass(cls):
-        if cls.cluster:
-            cls.cluster.terminate()
 
     def get_client(self):
         return SnakebiteHdfsClient()
 
     def setUp(self):
+        """ We override setUp because we want to also use snakebite for
+        creating the testing directory.  """
         self.testDir = "/tmp/luigi-test-{0}-{1}".format(
             os.environ["USER"],
             time.mktime(datetime.datetime.now().timetuple())
@@ -74,3 +69,19 @@ def test_relativepath(self):
         finally:
             if self.snakebite.exists(rel_test_dir):
                 self.snakebite.remove(rel_test_dir, True)
+
+    def test_rename_dont_move(self):
+        foo = posixpath.join(self.testDir, "foo")
+        bar = posixpath.join(self.testDir, "bar")
+        self.assertTrue(self.snakebite.mkdir(foo))
+        self.assertTrue(self.snakebite.mkdir(bar))
+        self.assertTrue(self.snakebite.exists(foo))  # For sanity
+        self.assertTrue(self.snakebite.exists(bar))  # For sanity
+
+        self.assertFalse(self.snakebite.rename_dont_move(foo, bar))
+        self.assertTrue(self.snakebite.exists(foo))
+        self.assertTrue(self.snakebite.exists(bar))
+
+        self.assertTrue(self.snakebite.rename_dont_move(foo, foo + '2'))
+        self.assertFalse(self.snakebite.exists(foo))
+        self.assertTrue(self.snakebite.exists(foo + '2'))
diff --git a/test/spark_test.py b/test/spark_test.py
index b03df3ecb5..a029a3bbec 100644
--- a/test/spark_test.py
+++ b/test/spark_test.py
@@ -1,22 +1,23 @@
-import subprocess
 import StringIO
+import subprocess
 import unittest
 
-from mock import patch
-
 import luigi
 import luigi.hdfs
-from luigi.mock import MockFile
-from luigi.contrib.spark import SparkJobError, SparkJob, Spark1xJob, PySpark1xJob
 from helpers import with_config
+from luigi.contrib.spark import PySpark1xJob, Spark1xJob, SparkJob, SparkJobError
+from luigi.mock import MockFile
+from mock import patch
 
 
 class HdfsJob(luigi.ExternalTask):
+
     def output(self):
         return luigi.hdfs.HdfsTarget('test')
 
 
 class TestJob(SparkJob):
+
     def requires_hadoop(self):
         return HdfsJob()
 
@@ -51,6 +52,7 @@ def Popen_fake(arglist, stdout=None, stderr=None, env=None, close_fds=True):
             arglist_result.append(arglist)
 
             class P(object):
+
                 def wait(self):
                     pass
 
@@ -83,6 +85,7 @@ def communicate(self):
     def test_handle_failed_job(self):
         def Popen_fake(arglist, stdout=None, stderr=None, env=None, close_fds=True):
             class P(object):
+
                 def wait(self):
                     pass
 
@@ -92,7 +95,6 @@ def poll(self):
                 def communicate(self):
                     return 'end'
 
-
             p = P()
             p.returncode = 1
             if stdout == subprocess.PIPE:
@@ -111,7 +113,7 @@ def communicate(self):
             job = TestJob()
             job.run()
         except SparkJobError as e:
-            self.assertEqual(e.err, ['stderr'])
+            self.assertEqual(e.err, 'stderr')
         else:
             self.fail("Should have thrown SparkJobError")
         finally:
@@ -119,6 +121,7 @@ def communicate(self):
 
 
 class Test1xJob(Spark1xJob):
+
     def requires_hadoop(self):
         return HdfsJob()
 
@@ -155,6 +158,7 @@ def Popen_fake(arglist, stdout=None, stderr=None, env=None,
             arglist_result.append(arglist)
 
             class P(object):
+
                 def wait(self):
                     pass
 
@@ -189,6 +193,7 @@ def test_handle_failed_job(self):
         def Popen_fake(arglist, stdout=None, stderr=None, env=None,
                        close_fds=True):
             class P(object):
+
                 def wait(self):
                     pass
 
@@ -198,7 +203,6 @@ def poll(self):
                 def communicate(self):
                     return 'end'
 
-
             p = P()
             p.returncode = 1
             if stdout == subprocess.PIPE:
@@ -217,7 +221,7 @@ def communicate(self):
             job = Test1xJob()
             job.run()
         except SparkJobError as e:
-            self.assertEqual(e.err, ['stderr'])
+            self.assertEqual(e.err, 'stderr')
         else:
             self.fail("Should have thrown SparkJobError")
         finally:
@@ -225,6 +229,7 @@ def communicate(self):
 
 
 class TestPySpark1xJob(PySpark1xJob):
+
     def requires_hadoop(self):
         return HdfsJob()
 
@@ -258,6 +263,7 @@ def Popen_fake(arglist, stdout=None, stderr=None, env=None,
             arglist_result.append(arglist)
 
             class P(object):
+
                 def wait(self):
                     pass
 
@@ -281,7 +287,7 @@ def communicate(self):
             job.run()
             self.assertEqual(len(arglist_result), 1)
             self.assertEqual(arglist_result[0][0:6],
-                             [self.ss,'--master', 'yarn-client', job.program()])
+                             [self.ss, '--master', 'yarn-client', job.program()])
         finally:
             luigi.hdfs.HdfsTarget, subprocess.Popen = h, p  # restore
 
@@ -291,6 +297,7 @@ def test_handle_failed_job(self):
         def Popen_fake(arglist, stdout=None, stderr=None, env=None,
                        close_fds=True):
             class P(object):
+
                 def wait(self):
                     pass
 
@@ -300,7 +307,6 @@ def poll(self):
                 def communicate(self):
                     return 'end'
 
-
             p = P()
             p.returncode = 1
             if stdout == subprocess.PIPE:
@@ -319,7 +325,7 @@ def communicate(self):
             job = TestPySpark1xJob()
             job.run()
         except SparkJobError as e:
-            self.assertEqual(e.err, ['stderr'])
+            self.assertEqual(e.err, 'stderr')
         else:
             self.fail("Should have thrown SparkJobError")
         finally:
diff --git a/test/subtask_test.py b/test/subtask_test.py
index e345f01ee2..74e2fe594b 100644
--- a/test/subtask_test.py
+++ b/test/subtask_test.py
@@ -18,33 +18,12 @@
 # you might end up running tasks in different processes
 
 import abc
-import luigi
+import os
+import random
+import tempfile
 import unittest
-import random, tempfile, os
-from luigi.util import CompositionTask
-
-
-class F(luigi.Task):
-    k = luigi.IntParameter()
 
-    def f(self, x):
-        return x ** self.k
-
-
-class SubtaskTask(CompositionTask):
-    def subtasks(self):
-        return [F(1), F(2)]
-
-    def run(self):
-        self.run_subtasks()
-
-        for t in self.subtasks():
-            t.f(42)
-
-
-class SubtaskTest(unittest.TestCase):
-    def test_multiple_workers(self):
-        luigi.build([SubtaskTask()], local_scheduler=True)
+import luigi
 
 
 class AbstractTask(luigi.Task):
@@ -63,6 +42,7 @@ def run(self):
 
 
 class Implementation(AbstractTask):
+
     @property
     def foo(self):
         return "bar"
@@ -72,6 +52,7 @@ def helper_function(self):
 
 
 class AbstractSubclassTest(unittest.TestCase):
+
     def test_instantiate_abstract(self):
         def try_instantiate():
             AbstractTask(k=1)
@@ -83,4 +64,3 @@ def test_instantiate(self):
 
 if __name__ == '__main__':
     luigi.run()
-
diff --git a/test/target_test.py b/test/target_test.py
index 1a33ec8285..6696a8d645 100644
--- a/test/target_test.py
+++ b/test/target_test.py
@@ -4,6 +4,7 @@
 
 
 class TargetTest(unittest.TestCase):
+
     def test_cannot_instantiate(self):
         def instantiate_target():
             luigi.target.Target()
@@ -21,6 +22,7 @@ def instantiate_target():
 
     def test_instantiate_subclass(self):
         class GoodTarget(luigi.target.Target):
+
             def exists(self):
                 return True
 
diff --git a/test/task_history_test.py b/test/task_history_test.py
index 42cadabc2f..8841e36664 100644
--- a/test/task_history_test.py
+++ b/test/task_history_test.py
@@ -12,11 +12,18 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import luigi, luigi.scheduler, luigi.task_history, luigi.worker
 import unittest
+
+import luigi
+import luigi.scheduler
+import luigi.task_history
+import luigi.worker
+
 luigi.notifications.DEBUG = True
 
+
 class SimpleTaskHistory(luigi.task_history.TaskHistory):
+
     def __init__(self):
         self.actions = []
 
@@ -31,6 +38,7 @@ def task_started(self, task_id, worker_host):
 
 
 class TaskHistoryTest(unittest.TestCase):
+
     def setUp(self):
         self.th = SimpleTaskHistory()
         self.sch = luigi.scheduler.CentralPlannerScheduler(task_history=self.th)
diff --git a/test/task_test.py b/test/task_test.py
index 1a03400637..d35f1e9755 100644
--- a/test/task_test.py
+++ b/test/task_test.py
@@ -1,15 +1,15 @@
 import doctest
 import unittest
+from datetime import datetime, timedelta
 
-import luigi.task
 import luigi
-from datetime import datetime, timedelta
+import luigi.task
 
 
 class DummyTask(luigi.Task):
 
     param = luigi.Parameter()
-    bool_param = luigi.BooleanParameter()
+    bool_param = luigi.BoolParameter()
     int_param = luigi.IntParameter()
     float_param = luigi.FloatParameter()
     date_param = luigi.DateParameter()
@@ -35,9 +35,26 @@ def test_task_to_str_to_task(self):
             list_param=['in', 'flames'])
 
         original = DummyTask(**params)
-        other = DummyTask.from_str_params(original.to_str_params(), {})
+        other = DummyTask.from_str_params(original.to_str_params())
         self.assertEqual(original, other)
 
+    def test_id_to_name_and_params(self):
+        task_id = "InputText(date=2014-12-29)"
+        (name, params) = luigi.task.id_to_name_and_params(task_id)
+        self.assertEquals(name, "InputText")
+        self.assertEquals(params, dict(date="2014-12-29"))
+
+    def test_id_to_name_and_params_multiple_args(self):
+        task_id = "InputText(date=2014-12-29,foo=bar)"
+        (name, params) = luigi.task.id_to_name_and_params(task_id)
+        self.assertEquals(name, "InputText")
+        self.assertEquals(params, dict(date="2014-12-29", foo="bar"))
+
+    def test_id_to_name_and_params_list_args(self):
+        task_id = "InputText(date=2014-12-29,foo=[bar,baz-foo])"
+        (name, params) = luigi.task.id_to_name_and_params(task_id)
+        self.assertEquals(name, "InputText")
+        self.assertEquals(params, dict(date="2014-12-29", foo=["bar", "baz-foo"]))
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_event_callbacks.py b/test/test_event_callbacks.py
index c9405dfb76..b9950c3e69 100644
--- a/test/test_event_callbacks.py
+++ b/test/test_event_callbacks.py
@@ -1,16 +1,19 @@
-from unittest import TestCase
-from mock import patch
 import random
-from luigi import Task, build, Event
+from unittest import TestCase
+
+import luigi
+from luigi import Event, Task, build
 from luigi.mock import MockFile, MockFileSystem
 from luigi.task import flatten
-import luigi
+from mock import patch
+
 
 class DummyException(Exception):
     pass
 
+
 class EmptyTask(Task):
-    fail = luigi.BooleanParameter()
+    fail = luigi.BoolParameter()
 
     def run(self):
         if self.fail:
@@ -18,12 +21,14 @@ def run(self):
 
 
 class TaskWithCallback(Task):
+
     def run(self):
         print "Triggering event"
         self.trigger_event("foo event")
 
 
 class TestEventCallbacks(TestCase):
+
     def test_start_handler(self):
         saved_tasks = []
 
@@ -100,7 +105,9 @@ def eval_contents(f):
     with f.open('r') as i:
         return eval(i.read())
 
+
 class ConsistentMockOutput(object):
+
     '''
     Computes output location and contents from the task and its parameters. Rids us of writing ad-hoc boilerplate output() et al.
     '''
@@ -113,29 +120,41 @@ def produce_output(self):
         with self.output().open('w') as o:
             o.write(repr([self.task_id] + sorted([eval_contents(i) for i in flatten(self.input())])))
 
+
 class HappyTestFriend(ConsistentMockOutput, luigi.Task):
+
     '''
     Does trivial "work", outputting the list of inputs. Results in a convenient lispy comparable.
     '''
+
     def run(self):
         self.produce_output()
 
+
 class D(ConsistentMockOutput, luigi.ExternalTask):
     pass
 
+
 class C(HappyTestFriend):
+
     def requires(self):
         return [D(self.param), D(self.param + 1)]
 
+
 class B(HappyTestFriend):
+
     def requires(self):
         return C(self.param)
 
+
 class A(HappyTestFriend):
+
     def requires(self):
         return [B(1), B(2)]
 
+
 class TestDependencyEvents(TestCase):
+
     def tearDown(self):
         MockFileSystem().remove('')
 
@@ -146,9 +165,11 @@ def _run_test(self, task, expected_events):
         @luigi.Task.event_handler(Event.DEPENDENCY_DISCOVERED)
         def callback_dependency_discovered(*args):
             actual_events.setdefault(Event.DEPENDENCY_DISCOVERED, set()).add(tuple(map(lambda t: t.task_id, args)))
+
         @luigi.Task.event_handler(Event.DEPENDENCY_MISSING)
         def callback_dependency_missing(*args):
             actual_events.setdefault(Event.DEPENDENCY_MISSING, set()).add(tuple(map(lambda t: t.task_id, args)))
+
         @luigi.Task.event_handler(Event.DEPENDENCY_PRESENT)
         def callback_dependency_present(*args):
             actual_events.setdefault(Event.DEPENDENCY_PRESENT, set()).add(tuple(map(lambda t: t.task_id, args)))
diff --git a/test/test_sigpipe.py b/test/test_sigpipe.py
index f1e2aaba76..d9a67daf5b 100644
--- a/test/test_sigpipe.py
+++ b/test/test_sigpipe.py
@@ -14,8 +14,10 @@
 
 import os
 from unittest import TestCase
+
 from luigi.format import InputPipeProcessWrapper
 
+
 BASH_SCRIPT = """
 #!/bin/bash
 
@@ -35,6 +37,7 @@
 
 
 class TestSigpipe(TestCase):
+
     def setUp(self):
         with open("/tmp/luigi_test_sigpipe.sh", "w") as fp:
             fp.write(BASH_SCRIPT)
@@ -61,6 +64,7 @@ def test_full_read(self):
 
 
 class TestSubprocessException(TestCase):
+
     def setUp(self):
         with open("/tmp/luigi_test_sigpipe.sh", "w") as fp:
             fp.write(FAIL_SCRIPT)
diff --git a/test/test_ssh.py b/test/test_ssh.py
index 42503311d9..6ff83821c0 100644
--- a/test/test_ssh.py
+++ b/test/test_ssh.py
@@ -12,12 +12,14 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-from luigi.contrib.ssh import RemoteContext
-import unittest
 import subprocess
+import unittest
+
+from luigi.contrib.ssh import RemoteContext
 
 
 class TestMockedRemoteContext(unittest.TestCase):
+
     def test_subprocess_delegation(self):
         """ Test subprocess call structure using mock module """
         orig_Popen = subprocess.Popen
diff --git a/test/util_previous_test.py b/test/util_previous_test.py
index e9960a8696..b95068ed7c 100644
--- a/test/util_previous_test.py
+++ b/test/util_previous_test.py
@@ -1,9 +1,9 @@
-import unittest
 import datetime
+import unittest
 
 import luigi
 import luigi.date_interval
-from luigi.util import previous, get_previous_completed
+from luigi.util import get_previous_completed, previous
 
 
 class DateTaskOk(luigi.Task):
@@ -15,6 +15,7 @@ def complete(self):
 
 
 class DateTaskOkTest(unittest.TestCase):
+
     def test_previous(self):
         task = DateTaskOk(datetime.date(2000, 3, 1))
         prev = previous(task)
@@ -27,7 +28,7 @@ def test_get_previous_completed(self):
 
     def test_get_previous_completed_not_found(self):
         task = DateTaskOk(datetime.date(2000, 3, 1))
-        prev = get_previous_completed(task,4)
+        prev = get_previous_completed(task, 4)
         self.assertEqual(None, prev)
 
 
@@ -40,6 +41,7 @@ def complete(self):
 
 
 class DateHourTaskOkTest(unittest.TestCase):
+
     def test_previous(self):
         task = DateHourTaskOk(datetime.datetime(2000, 3, 1, 2))
         prev = previous(task)
@@ -56,6 +58,32 @@ def test_get_previous_completed_not_found(self):
         self.assertEqual(None, prev)
 
 
+class DateMinuteTaskOk(luigi.Task):
+    minute = luigi.DateMinuteParameter()
+
+    def complete(self):
+        # test against 2000.03.01T02H03
+        return self.minute in [datetime.datetime(2000, 3, 1, 2, 0), datetime.datetime(2000, 3, 1, 2, 3), datetime.datetime(2000, 3, 1, 2, 4)]
+
+
+class DateMinuteTaskOkTest(unittest.TestCase):
+
+    def test_previous(self):
+        task = DateMinuteTaskOk(datetime.datetime(2000, 3, 1, 2, 3))
+        prev = previous(task)
+        self.assertEqual(prev.minute, datetime.datetime(2000, 3, 1, 2, 2))
+
+    def test_get_previous_completed(self):
+        task = DateMinuteTaskOk(datetime.datetime(2000, 3, 1, 2, 3))
+        prev = get_previous_completed(task, 3)
+        self.assertEqual(prev.minute, datetime.datetime(2000, 3, 1, 2, 0))
+
+    def test_get_previous_completed_not_found(self):
+        task = DateMinuteTaskOk(datetime.datetime(2000, 3, 1, 2, 3))
+        prev = get_previous_completed(task, 2)
+        self.assertEqual(None, prev)
+
+
 class DateIntervalTaskOk(luigi.Task):
     interval = luigi.DateIntervalParameter()
 
@@ -64,6 +92,7 @@ def complete(self):
 
 
 class DateIntervalTaskOkTest(unittest.TestCase):
+
     def test_previous(self):
         task = DateIntervalTaskOk(luigi.date_interval.Week(2000, 1))
         prev = previous(task)
@@ -86,6 +115,7 @@ class ExtendedDateTaskOk(DateTaskOk):
 
 
 class ExtendedDateTaskOkTest(unittest.TestCase):
+
     def test_previous(self):
         task = ExtendedDateTaskOk(datetime.date(2000, 3, 1), "some value")
         prev = previous(task)
@@ -100,6 +130,7 @@ class MultiTemporalTaskNok(luigi.Task):
 
 
 class MultiTemporalTaskNokTest(unittest.TestCase):
+
     def test_previous(self):
         task = MultiTemporalTaskNok(datetime.date(2000, 1, 1), datetime.datetime(2000, 1, 1, 1))
         self.assertRaises(NotImplementedError, previous, task)
@@ -111,6 +142,7 @@ class NoTemporalTaskNok(luigi.Task):
 
 
 class NoTemporalTaskNokTest(unittest.TestCase):
+
     def test_previous(self):
         task = NoTemporalTaskNok("some value")
         self.assertRaises(NotImplementedError, previous, task)
diff --git a/test/util_test.py b/test/util_test.py
deleted file mode 100644
index e47b4e5ac6..0000000000
--- a/test/util_test.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2012 Spotify AB
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not
-# use this file except in compliance with the License. You may obtain a copy of
-# the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations under
-# the License.
-
-import unittest
-import luigi
-import luigi.util
-import luigi.notifications
-luigi.notifications.DEBUG = True
-
-
-class A(luigi.Task):
-    x = luigi.IntParameter(default=3)
-
-
-class B(luigi.util.Derived(A)):
-    y = luigi.IntParameter(default=4)
-
-
-class A2(luigi.Task):
-    x = luigi.IntParameter(default=3)
-    g = luigi.IntParameter(is_global=True, default=42)
-
-
-class B2(luigi.util.Derived(A2)):
-    pass
-
-
-class UtilTest(unittest.TestCase):
-    def test_derived_extended(self):
-        b = B(1, 2)
-        self.assertEqual(b.x, 1)
-        self.assertEqual(b.y, 2)
-        a = A(1)
-        self.assertEqual(b.parent_obj, a)
-
-    def test_derived_extended_default(self):
-        b = B()
-        self.assertEqual(b.x, 3)
-        self.assertEqual(b.y, 4)
-
-    def test_derived_global_param(self):
-        # Had a bug with this
-        b = B2()
-        self.assertEqual(b.g, 42)
diff --git a/test/worker_multiprocess_test.py b/test/worker_multiprocess_test.py
index b4f68f3861..f9f05ccc1b 100644
--- a/test/worker_multiprocess_test.py
+++ b/test/worker_multiprocess_test.py
@@ -12,12 +12,13 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import luigi.worker
-from luigi.worker import Worker
-from luigi import Task, RemoteScheduler, Parameter
-import unittest
 import logging
+import unittest
+
 import luigi.notifications
+import luigi.worker
+from luigi import Parameter, RemoteScheduler, Task
+from luigi.worker import Worker
 from mock import Mock
 
 luigi.notifications.DEBUG = True
@@ -41,6 +42,7 @@ def run(self):
 
 
 class MultiprocessWorkerTest(unittest.TestCase):
+
     def setUp(self):
         self.scheduler = RemoteScheduler()
         self.scheduler.add_worker = Mock()
@@ -55,6 +57,7 @@ def test_positive_path(self):
         b = DummyTask("b")
 
         class MultipleRequirementTask(DummyTask):
+
             def requires(self):
                 return [a, b]
 
@@ -69,6 +72,7 @@ def requires(self):
 
     def test_path_with_task_failures(self):
         class FailingTask(DummyTask):
+
             def run(self):
                 raise Exception("I am failing")
 
@@ -76,6 +80,7 @@ def run(self):
         b = FailingTask("b")
 
         class MultipleRequirementTask(DummyTask):
+
             def requires(self):
                 return [a, b]
 
diff --git a/test/worker_parallel_scheduling_test.py b/test/worker_parallel_scheduling_test.py
index 0da1da6fda..9be00af9bb 100644
--- a/test/worker_parallel_scheduling_test.py
+++ b/test/worker_parallel_scheduling_test.py
@@ -1,14 +1,14 @@
 import pickle
 import time
 import unittest
-import mock
 
 import luigi
-
+import mock
 from luigi.worker import Worker
 
 
 class SlowCompleteWrapper(luigi.WrapperTask):
+
     def requires(self):
         return [SlowCompleteTask(i) for i in range(4)]
 
@@ -29,20 +29,23 @@ def complete(self):
         return self.n < self.k or self.k == 0
 
     def requires(self):
-        return [OverlappingSelfDependenciesTask(self.n-1, k) for k in range(self.k+1)]
+        return [OverlappingSelfDependenciesTask(self.n - 1, k) for k in range(self.k + 1)]
 
 
 class ExceptionCompleteTask(luigi.Task):
+
     def complete(self):
         assert False
 
 
 class ExceptionRequiresTask(luigi.Task):
+
     def requires(self):
         assert False
 
 
 class UnpicklableExceptionTask(luigi.Task):
+
     def complete(self):
         class UnpicklableException(Exception):
             pass
@@ -50,6 +53,7 @@ class UnpicklableException(Exception):
 
 
 class ParallelSchedulingTest(unittest.TestCase):
+
     def setUp(self):
         self.sch = mock.Mock()
         self.w = Worker(scheduler=self.sch, worker_id='x')
diff --git a/test/worker_task_test.py b/test/worker_task_test.py
index ab17fa4fcc..d9d5519e34 100644
--- a/test/worker_task_test.py
+++ b/test/worker_task_test.py
@@ -12,20 +12,26 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
+import unittest
+
 import luigi
 import luigi.date_interval
-import unittest
 import luigi.notifications
 from luigi.worker import TaskException
+
 luigi.notifications.DEBUG = True
 
+
 class MyTask(luigi.Task):
     # Test overriding the constructor without calling the superconstructor
     # This is a simple mistake but caused an error that was very hard to understand
+
     def __init__(self):
         pass
 
+
 class WorkerTaskTest(unittest.TestCase):
+
     def test_constructor(self):
         def f():
             luigi.build([MyTask()], local_scheduler=True)
diff --git a/test/worker_test.py b/test/worker_test.py
index c0dc920773..bf8cd3942c 100644
--- a/test/worker_test.py
+++ b/test/worker_test.py
@@ -12,24 +12,29 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import shutil
-import time
-from luigi.scheduler import CentralPlannerScheduler
-import luigi.worker
-from luigi.worker import Worker
-from luigi import Task, ExternalTask, RemoteScheduler
-from helpers import with_config
-import unittest
+import functools
 import logging
-import threading
 import os
+import shutil
 import signal
-import luigi.notifications
 import tempfile
+import threading
+import time
+import unittest
+
+import luigi.notifications
+import luigi.worker
+import mock
+from helpers import with_config
+from luigi import ExternalTask, RemoteScheduler, Task
+from luigi.scheduler import CentralPlannerScheduler
+from luigi.worker import Worker
+
 luigi.notifications.DEBUG = True
 
 
 class DummyTask(Task):
+
     def __init__(self, *args, **kwargs):
         super(DummyTask, self).__init__(*args, **kwargs)
         self.has_run = False
@@ -51,9 +56,52 @@ def output(self):
     def run(self):
         with self.output().open('w') as f:
             f.write('Done!')
+        time.sleep(0.5)  # so we can benchmark & see if parallelization works
+
+
+class DynamicDummyTaskWithNamespace(DynamicDummyTask):
+    task_namespace = 'banana'
+
+
+class DynamicRequires(Task):
+    p = luigi.Parameter()
+    use_banana_task = luigi.BoolParameter(default=False)
+
+    def output(self):
+        return luigi.LocalTarget(os.path.join(self.p, 'parent'))
+
+    def run(self):
+        if self.use_banana_task:
+            task_cls = DynamicDummyTaskWithNamespace
+        else:
+            task_cls = DynamicDummyTask
+        dummy_targets = yield [task_cls(os.path.join(self.p, str(i)))
+                               for i in range(5)]
+        dummy_targets += yield [task_cls(os.path.join(self.p, str(i)))
+                                for i in range(5, 7)]
+        with self.output().open('w') as f:
+            for i, d in enumerate(dummy_targets):
+                for line in d.open('r'):
+                    print >>f, '%d: %s' % (i, line.strip())
+
+
+class DynamicRequiresOtherModule(Task):
+    p = luigi.Parameter()
+
+    def output(self):
+        return luigi.LocalTarget(os.path.join(self.p, 'baz'))
+
+    def run(self):
+        import other_module
+        other_target_foo = yield other_module.OtherModuleTask(os.path.join(self.p, 'foo'))
+        other_target_bar = yield other_module.OtherModuleTask(os.path.join(self.p, 'bar'))
+
+        with self.output().open('w') as f:
+            f.write('Done!')
 
 
 class WorkerTest(unittest.TestCase):
+
     def setUp(self):
         # InstanceCache.disable()
         self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
@@ -72,6 +120,7 @@ def setTime(self, t):
 
     def test_dep(self):
         class A(Task):
+
             def run(self):
                 self.has_run = True
 
@@ -80,6 +129,7 @@ def complete(self):
         a = A()
 
         class B(Task):
+
             def requires(self):
                 return a
 
@@ -100,11 +150,13 @@ def complete(self):
 
     def test_external_dep(self):
         class A(ExternalTask):
+
             def complete(self):
                 return False
         a = A()
 
         class B(Task):
+
             def requires(self):
                 return a
 
@@ -127,6 +179,7 @@ def complete(self):
 
     def test_fail(self):
         class A(Task):
+
             def run(self):
                 self.has_run = True
                 raise Exception()
@@ -137,6 +190,7 @@ def complete(self):
         a = A()
 
         class B(Task):
+
             def requires(self):
                 return a
 
@@ -160,15 +214,18 @@ def complete(self):
     def test_unknown_dep(self):
         # see central_planner_test.CentralPlannerTest.test_remove_dep
         class A(ExternalTask):
+
             def complete(self):
                 return False
 
         class C(Task):
+
             def complete(self):
                 return True
 
         def get_b(dep):
             class B(Task):
+
                 def requires(self):
                     return dep
 
@@ -198,6 +255,7 @@ def complete(self):
 
     def test_unfulfilled_dep(self):
         class A(Task):
+
             def complete(self):
                 return self.done
 
@@ -206,6 +264,7 @@ def run(self):
 
         def get_b(a):
             class B(A):
+
                 def requires(self):
                     return a
             b = B()
@@ -222,43 +281,14 @@ def requires(self):
         self.assertTrue(a.complete())
         self.assertTrue(b.complete())
 
-    def test_dynamic_dependencies(self):
-
-        class DynamicRequires(Task):
-            p = luigi.Parameter()
-
-            def output(self):
-                return luigi.LocalTarget(os.path.join(self.p, 'parent'))
-
-            def run(self):
-                dummy_targets = yield [DynamicDummyTask(os.path.join(self.p, str(i)))
-                                     for i in range(5)]
-                dummy_targets += yield [DynamicDummyTask(os.path.join(self.p, str(i)))
-                                       for i in range(5, 7)]
-                with self.output().open('w') as f:
-                    for i, d in enumerate(dummy_targets):
-                        for line in d.open('r'):
-                            print >>f, '%d: %s' % (i, line.strip())
-
-        p = tempfile.mkdtemp()
-        try:
-            t = DynamicRequires(p=p)
-            luigi.build([t], local_scheduler=True)
-            self.assertTrue(t.complete())
-
-            # loop through output and verify
-            f = t.output().open('r')
-            for i in xrange(7):
-                self.assertEqual(f.readline().strip(), '%d: Done!' % i)
-        finally:
-            shutil.rmtree(p)
-
     def test_avoid_infinite_reschedule(self):
         class A(Task):
+
             def complete(self):
                 return False
 
         class B(Task):
+
             def complete(self):
                 return False
 
@@ -270,6 +300,7 @@ def requires(self):
 
     def test_allow_reschedule_with_many_missing_deps(self):
         class A(Task):
+
             """ Task that must run twice to succeed """
             i = luigi.IntParameter()
 
@@ -302,7 +333,6 @@ def run(self):
         self.assertTrue(b.complete())
         self.assertTrue(all(a.complete() for a in b.deps()))
 
-
     def test_interleaved_workers(self):
         class A(DummyTask):
             pass
@@ -310,6 +340,7 @@ class A(DummyTask):
         a = A()
 
         class B(DummyTask):
+
             def requires(self):
                 return a
 
@@ -372,6 +403,7 @@ def complete(self):
 
     def test_interleaved_workers3(self):
         class A(DummyTask):
+
             def run(self):
                 logging.debug('running A')
                 time.sleep(0.1)
@@ -380,8 +412,10 @@ def run(self):
         a = A()
 
         class B(DummyTask):
+
             def requires(self):
                 return a
+
             def run(self):
                 logging.debug('running B')
                 super(B, self).run()
@@ -390,7 +424,7 @@ def run(self):
 
         sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
 
-        w  = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True)
+        w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True)
         w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1)
 
         self.assertTrue(w.add(a))
@@ -407,6 +441,7 @@ def run(self):
 
     def test_die_for_non_unique_pending(self):
         class A(DummyTask):
+
             def run(self):
                 logging.debug('running A')
                 time.sleep(0.1)
@@ -415,8 +450,10 @@ def run(self):
         a = A()
 
         class B(DummyTask):
+
             def requires(self):
                 return a
+
             def run(self):
                 logging.debug('running B')
                 super(B, self).run()
@@ -425,7 +462,7 @@ def run(self):
 
         sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
 
-        w  = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True)
+        w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True)
         w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1)
 
         self.assertTrue(w.add(b))
@@ -442,6 +479,7 @@ def run(self):
     def test_complete_exception(self):
         "Tests that a task is still scheduled if its sister task crashes in the complete() method"
         class A(DummyTask):
+
             def complete(self):
                 raise Exception("doh")
 
@@ -453,6 +491,7 @@ class C(DummyTask):
         c = C()
 
         class B(DummyTask):
+
             def requires(self):
                 return a, c
 
@@ -468,6 +507,7 @@ def requires(self):
 
     def test_requires_exception(self):
         class A(DummyTask):
+
             def requires(self):
                 raise Exception("doh")
 
@@ -479,6 +519,7 @@ class C(DummyTask):
         c = C()
 
         class B(DummyTask):
+
             def requires(self):
                 return a, c
 
@@ -492,7 +533,46 @@ def requires(self):
         self.assertFalse(a.has_run)
         w.stop()
 
+
+class DynamicDependenciesTest(unittest.TestCase):
+    n_workers = 1
+    timeout = float('inf')
+
+    def setUp(self):
+        self.p = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.p)
+
+    def test_dynamic_dependencies(self, use_banana_task=False):
+        t0 = time.time()
+        t = DynamicRequires(p=self.p, use_banana_task=use_banana_task)
+        luigi.build([t], local_scheduler=True, workers=self.n_workers)
+        self.assertTrue(t.complete())
+
+        # loop through output and verify
+        f = t.output().open('r')
+        for i in xrange(7):
+            self.assertEqual(f.readline().strip(), '%d: Done!' % i)
+
+        self.assertTrue(time.time() - t0 < self.timeout)
+
+    def test_dynamic_dependencies_with_namespace(self):
+        self.test_dynamic_dependencies(use_banana_task=True)
+
+    def test_dynamic_dependencies_other_module(self):
+        t = DynamicRequiresOtherModule(p=self.p)
+        luigi.build([t], local_scheduler=True, workers=self.n_workers)
+        self.assertTrue(t.complete())
+
+
+class DynamicDependenciesWithMultipleWorkersTest(DynamicDependenciesTest):
+    n_workers = 100
+    timeout = 3.0  # We run 7 tasks that take 0.5s each so it should take less than 3.5s
+
+
 class WorkerPingThreadTests(unittest.TestCase):
+
     def test_ping_retry(self):
         """ Worker ping fails once. Ping continues to try to connect to scheduler
 
@@ -534,25 +614,25 @@ def test_ping_thread_shutdown(self):
         self.assertFalse(w._keep_alive_thread.is_alive())
 
 
-EMAIL_CONFIG = {"core": {"error-email": "not-a-real-email-address-for-test-only"}}
+def email_patch(test_func):
+    EMAIL_CONFIG = {"core": {"error-email": "not-a-real-email-address-for-test-only"}, "email": {"force-send": "true"}}
+    emails = []
 
+    def mock_send_email(sender, recipients, msg):
+        emails.append(msg)
 
-class EmailTest(unittest.TestCase):
-    def setUp(self):
-        super(EmailTest, self).setUp()
-
-        self.send_email = luigi.notifications.send_email
-        self.last_email = None
+    @with_config(EMAIL_CONFIG)
+    @functools.wraps(test_func)
+    @mock.patch('smtplib.SMTP')
+    def run_test(self, smtp):
+        smtp().sendmail.side_effect = mock_send_email
+        test_func(self, emails)
 
-        def mock_send_email(subject, message, sender, recipients, image_png=None):
-            self.last_email = (subject, message, sender, recipients, image_png)
-        luigi.notifications.send_email = mock_send_email
+    return run_test
 
-    def tearDown(self):
-        luigi.notifications.send_email = self.send_email
 
+class WorkerEmailTest(unittest.TestCase):
 
-class WorkerEmailTest(EmailTest):
     def setUp(self):
         super(WorkerEmailTest, self).setUp()
         sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
@@ -561,8 +641,8 @@ def setUp(self):
     def tearDown(self):
         self.worker.stop()
 
-    @with_config(EMAIL_CONFIG)
-    def test_connection_error(self):
+    @email_patch
+    def test_connection_error(self, emails):
         sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337, connect_timeout=1)
         worker = Worker(scheduler=sch)
 
@@ -577,44 +657,47 @@ class A(DummyTask):
             pass
 
         a = A()
-        self.assertEqual(self.last_email, None)
+        self.assertEqual(emails, [])
         worker.add(a)
         self.assertEqual(self.waits, 2)  # should attempt to add it 3 times
-        self.assertNotEquals(self.last_email, None)
-        self.assertEqual(self.last_email[0], "Luigi: Framework error while scheduling %s" % (a,))
+        self.assertNotEquals(emails, [])
+        self.assertTrue(emails[0].find("Luigi: Framework error while scheduling %s" % (a,)) != -1)
         worker.stop()
 
-    @with_config(EMAIL_CONFIG)
-    def test_complete_error(self):
+    @email_patch
+    def test_complete_error(self, emails):
         class A(DummyTask):
+
             def complete(self):
                 raise Exception("b0rk")
 
         a = A()
-        self.assertEqual(self.last_email, None)
+        self.assertEqual(emails, [])
         self.worker.add(a)
-        self.assertEqual(("Luigi: %s failed scheduling" % (a,)), self.last_email[0])
+        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1)
         self.worker.run()
-        self.assertEqual(("Luigi: %s failed scheduling" % (a,)), self.last_email[0])
+        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1)
         self.assertFalse(a.has_run)
 
-    @with_config(EMAIL_CONFIG)
-    def test_complete_return_value(self):
+    @email_patch
+    def test_complete_return_value(self, emails):
         class A(DummyTask):
+
             def complete(self):
                 pass  # no return value should be an error
 
         a = A()
-        self.assertEqual(self.last_email, None)
+        self.assertEqual(emails, [])
         self.worker.add(a)
-        self.assertEqual(("Luigi: %s failed scheduling" % (a,)), self.last_email[0])
+        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1)
         self.worker.run()
-        self.assertEqual(("Luigi: %s failed scheduling" % (a,)), self.last_email[0])
+        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1)
         self.assertFalse(a.has_run)
 
-    @with_config(EMAIL_CONFIG)
-    def test_run_error(self):
+    @email_patch
+    def test_run_error(self, emails):
         class A(luigi.Task):
+
             def complete(self):
                 return False
 
@@ -623,34 +706,49 @@ def run(self):
 
         a = A()
         self.worker.add(a)
-        self.assertEqual(self.last_email, None)
+        self.assertEqual(emails, [])
         self.worker.run()
-        self.assertEqual(("Luigi: %s FAILED" % (a,)), self.last_email[0])
+        self.assertTrue(emails[0].find("Luigi: %s FAILED" % (a,)) != -1)
 
-    def test_no_error(self):
+    @email_patch
+    def test_no_error(self, emails):
         class A(DummyTask):
             pass
         a = A()
-        self.assertEqual(self.last_email, None)
+        self.assertEqual(emails, [])
         self.worker.add(a)
-        self.assertEqual(self.last_email, None)
+        self.assertEqual(emails, [])
         self.worker.run()
-        self.assertEqual(self.last_email, None)
+        self.assertEqual(emails, [])
         self.assertTrue(a.complete())
 
 
 class RaiseSystemExit(luigi.Task):
+
     def run(self):
         raise SystemExit("System exit!!")
 
 
 class SuicidalWorker(luigi.Task):
     signal = luigi.IntParameter()
+
     def run(self):
         os.kill(os.getpid(), self.signal)
 
 
+class HungWorker(luigi.Task):
+    worker_timeout = luigi.IntParameter(default=None)
+
+    def run(self):
+        while True:
+            pass
+
+    def complete(self):
+        return False
+
+
 class MultipleWorkersTest(unittest.TestCase):
+
     def test_multiple_workers(self):
         # Test using multiple workers
         # Also test generating classes dynamically since this may reflect issues with
@@ -659,12 +757,13 @@ def test_multiple_workers(self):
         # other implementations of multiprocessing (using spawn etc) may fail
         class MyDynamicTask(luigi.Task):
             x = luigi.Parameter()
+
             def run(self):
                 time.sleep(0.1)
 
         t0 = time.time()
         luigi.build([MyDynamicTask(i) for i in xrange(100)], workers=100, local_scheduler=True)
-        self.assertTrue(time.time() < t0 + 5.0) # should ideally take exactly 0.1s, but definitely less than 10.0
+        self.assertTrue(time.time() < t0 + 5.0)  # should ideally take exactly 0.1s, but definitely less than 10.0
 
     def test_system_exit(self):
         # This would hang indefinitely before this fix:
@@ -692,5 +791,39 @@ def test_purge_multiple_workers(self):
         w._handle_next_task()
         w._handle_next_task()
 
+    def test_time_out_hung_worker(self):
+        luigi.build([HungWorker(0.1)], workers=2, local_scheduler=True)
+
+    @mock.patch('luigi.worker.time')
+    def test_purge_hung_worker_default_timeout_time(self, mock_time):
+        w = Worker(worker_processes=2, wait_interval=0.01, worker_timeout=5)
+        mock_time.time.return_value = 0
+        w.add(HungWorker())
+        w._run_task('HungWorker(worker_timeout=None)')
+
+        mock_time.time.return_value = 5
+        w._handle_next_task()
+        self.assertEqual(1, len(w._running_tasks))
+
+        mock_time.time.return_value = 6
+        w._handle_next_task()
+        self.assertEqual(0, len(w._running_tasks))
+
+    @mock.patch('luigi.worker.time')
+    def test_purge_hung_worker_override_timeout_time(self, mock_time):
+        w = Worker(worker_processes=2, wait_interval=0.01, worker_timeout=5)
+        mock_time.time.return_value = 0
+        w.add(HungWorker(10))
+        w._run_task('HungWorker(worker_timeout=10)')
+
+        mock_time.time.return_value = 10
+        w._handle_next_task()
+        self.assertEqual(1, len(w._running_tasks))
+
+        mock_time.time.return_value = 11
+        w._handle_next_task()
+        self.assertEqual(0, len(w._running_tasks))
+
+
 if __name__ == '__main__':
-    unittest.main()
+    luigi.run()
diff --git a/test/wrap_test.py b/test/wrap_test.py
index 30d3e356ef..983135b36d 100644
--- a/test/wrap_test.py
+++ b/test/wrap_test.py
@@ -12,17 +12,20 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-import luigi
-from luigi.mock import MockFile
-import unittest
-from luigi.util import Derived
 import datetime
+import unittest
+
+import luigi
 import luigi.notifications
+from luigi.mock import MockFile
+from luigi.util import inherits
+
 luigi.notifications.DEBUG = True
 File = MockFile
 
 
 class A(luigi.Task):
+
     def output(self):
         return File('/tmp/a.txt')
 
@@ -45,9 +48,11 @@ def run(self):
 
 
 def XMLWrapper(cls):
-    class XMLWrapperCls(Derived(cls)):
+    @inherits(cls)
+    class XMLWrapperCls(luigi.Task):
+
         def requires(self):
-            return self.parent_obj
+            return self.clone_parent()
 
         def run(self):
             f = self.input().open('r')
@@ -61,20 +66,24 @@ def run(self):
 
 
 class AXML(XMLWrapper(A)):
+
     def output(self):
         return File('/tmp/a.xml')
 
 
 class BXML(XMLWrapper(B)):
+
     def output(self):
         return File(self.date.strftime('/tmp/b-%Y-%m-%d.xml'))
 
 
 class WrapperTest(unittest.TestCase):
+
     ''' This test illustrates how a task class can wrap another task class by modifying its behavior.
 
     See instance_wrap_test.py for an example of how instances can wrap each other. '''
     workers = 1
+
     def setUp(self):
         MockFile.fs.clear()
 
diff --git a/tox.ini b/tox.ini
index ffa34ff30c..254f2e8d3e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = {cdh,hdp}
+envlist = {pep8,cdh,hdp,nonhdfs}
 skipsdist = True
 
 [testenv]
@@ -7,12 +7,64 @@ usedevelop = True
 deps=
   -r{toxinidir}/test/requirements.txt
   py26: unittest2
+  coverage>=3.6,<3.999
+  coveralls
 setenv =
   cdh: HADOOP_DISTRO=cdh
   cdh: HADOOP_HOME=/tmp/hadoop-cdh
   hdp: HADOOP_DISTRO=hdp
   hdp: HADOOP_HOME=/tmp/hadoop-hdp
+  cdh,hdp: NOSE_ATTR=minicluster
+  nonhdfs: NOSE_ATTR=!minicluster
   LUIGI_CONFIG_PATH={toxinidir}/test/testconfig/client.cfg
 commands =
-  {toxinidir}/scripts/ci/setup_env.sh
-  {toxinidir}/scripts/ci/run_tests.sh
+  cdh,hdp: {toxinidir}/scripts/ci/setup_hadoop_env.sh
+  coverage run test/runtests.py -v {posargs:}
+  coveralls
+
+[testenv:pep8]
+deps = pep8
+commands = pep8 --ignore E501 luigi test examples bin
+
+[testenv:autopep8]
+deps = autopep8
+commands = autopep8 --ignore E309,E501 -a -i -r luigi test examples bin
+
+[testenv:isort]
+deps = isort
+commands = isort -w 120 -rc luigi test examples bin
+
+[testenv:docs]
+# Build documentation using sphinx.
+# Call this using `tox -e docs`.
+deps =
+  sqlalchemy
+  Sphinx
+  sphinx_rtd_theme
+commands =
+    # build API docs
+    sphinx-apidoc -o doc/api -T luigi
+
+    # sync README.rst file
+    cp README.rst doc/README.rst
+    # github images are local
+    sed -i.orig 's@https://raw\.githubusercontent\.com/spotify/luigi/master/doc/@@g' doc/README.rst
+    # remove badges
+    sed -i.orig '/.. image::/d' doc/README.rst
+    sed -i.orig '/:target:/d' doc/README.rst
+    rm doc/README.rst.orig
+
+    # check URLs in docs
+    sphinx-build -W -b linkcheck -d {envtmpdir}/doctrees doc doc/_build/linkcheck
+
+    # build HTML docs
+    sphinx-build -W -b html -d {envtmpdir}/doctrees doc doc/_build/html
+
+    # remove temporary README.rst
+    rm doc/README.rst
+
+whitelist_externals =
+    cp
+    sed
+    rm
+