diff --git a/docs/source/child_modules.rst b/docs/source/child_modules.rst
index afa1e0afbcd7c7..c50075e9d4b50a 100644
--- a/docs/source/child_modules.rst
+++ b/docs/source/child_modules.rst
@@ -66,7 +66,9 @@ that change in the `Autoencoder` model are the init, forward, training, validati
             x_hat = self(representation)
 
             loss = F.nll_loss(logits, y)
-            return {f'{prefix}_loss': loss}
+            result = pl.EvalResult()
+            result.log(f'{prefix}_loss', loss)
+            return result
 
 
 and we can train this using the same trainer
diff --git a/docs/source/hyperparameters.rst b/docs/source/hyperparameters.rst
index 26d38679fd847d..4af29f35859625 100644
--- a/docs/source/hyperparameters.rst
+++ b/docs/source/hyperparameters.rst
@@ -42,6 +42,8 @@ It is best practice to layer your arguments in three sections.
 2.  Model specific arguments (layer_dim, num_layers, learning_rate, etc...)
 3.  Program arguments (data_path, cluster_email, etc...)
 
+|
+
 We can do this as follows. First, in your LightningModule, define the arguments
 specific to that module. Remember that data splits or data paths may also be specific to
 a module (ie: if your project has a model that trains on Imagenet and another on CIFAR-10).
diff --git a/docs/source/introduction_guide.rst b/docs/source/introduction_guide.rst
index abca6333cba119..440fe2536f2d88 100644
--- a/docs/source/introduction_guide.rst
+++ b/docs/source/introduction_guide.rst
@@ -320,6 +320,8 @@ When your models need to know about the data, it's best to process the data befo
 1. use `prepare_data` to download and process the dataset.
 2. use `setup` to do splits, and build your model internals
 
+|
+
 .. testcode::
 
     class LitMNIST(LightningModule):
@@ -391,11 +393,11 @@ In the case of MNIST we do the following
 
     for epoch in epochs:
         for batch in data:
-            # TRAINING STEP START
+            # ------ TRAINING STEP START ------
             x, y = batch
             logits = model(x)
             loss = F.nll_loss(logits, y)
-            # TRAINING STEP END
+            # ------ TRAINING STEP END ------
 
             loss.backward()
             optimizer.step()
@@ -419,12 +421,13 @@ This code is not restricted which means it can be as complicated as a full seq-2
 
 TrainResult
 ^^^^^^^^^^^
-Whenever you'd like more control over the outputs of the `training_step` use a `TrainResult` object which can:
+Whenever you'd like to log, or sync values across GPUs use `TrainResult`.
 
 - log to Tensorboard or the other logger of your choice.
 - log to the progress-bar.
 - log on every step.
 - log aggregate epoch metrics.
+- average values across GPUs/TPU cores
 
 .. code-block:: python
 
@@ -441,6 +444,13 @@ Whenever you'd like more control over the outputs of the `training_step` use a `
         # equivalent
         result.log('train_loss', loss, on_step=True, on_epoch=False, prog_bar=False, logger=True, reduce_fx=torch.mean)
 
+When training across accelerators (GPUs/TPUs) you can sync a metric if needed.
+
+.. code-block:: python
+
+        # sync across GPUs / TPUs, etc...
+        result.log('train_loss', loss, sync_dist=True)
+
 If you are only using a training_loop (`training_step`) without a
 validation or test loop (`validation_step`, `test_step`), you can still use EarlyStopping or automatic checkpointing
 
@@ -460,6 +470,8 @@ So far we defined 4 key ingredients in pure PyTorch but organized the code with
 3. Optimizer.
 4. What happens in the training loop.
 
+|
+
 For clarity, we'll recall that the full LightningModule now looks like this.
 
 .. code-block:: python
@@ -533,6 +545,9 @@ Which will generate automatic tensorboard logs.
 
 .. figure:: /_images/mnist_imgs/mnist_tb.png
    :alt: mnist CPU bar
+   :width: 500
+
+|
 
 But you can also use any of the `number of other loggers <loggers.rst>`_ we support.
 
@@ -585,13 +600,20 @@ First, change the runtime to TPU (and reinstall lightning).
 
 .. figure:: /_images/mnist_imgs/runtime_tpu.png
     :alt: mnist GPU bar
+    :width: 400
 
 .. figure:: /_images/mnist_imgs/restart_runtime.png
     :alt: mnist GPU bar
+    :width: 400
+
+|
 
 Next, install the required xla library (adds support for PyTorch on TPUs)
 
+.. code-block:: shell
+
     !curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
+
     !python pytorch-xla-env-setup.py --version nightly --apt-packages libomp5 libopenblas-dev
 
 In distributed training (multiple GPUs and multiple TPU cores) each GPU or TPU core will run a copy
@@ -607,6 +629,10 @@ In this method we do all the preparation we need to do once (instead of on every
 .. code-block:: python
 
     class MNISTDataModule(LightningDataModule):
+        def __init__(self, batch_size=64):
+            super().__init__()
+            self.batch_size = batch_size
+
         def prepare_data(self):
             # download only
             MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor())
@@ -614,7 +640,7 @@ In this method we do all the preparation we need to do once (instead of on every
 
         def setup(self, stage):
             # transform
-            transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
+            transform=transforms.Compose([transforms.ToTensor()])
             MNIST(os.getcwd(), train=True, download=False, transform=transform)
             MNIST(os.getcwd(), train=False, download=False, transform=transform)
 
@@ -627,13 +653,13 @@ In this method we do all the preparation we need to do once (instead of on every
             self.test_dataset = mnist_test
 
         def train_dataloader(self):
-            return DataLoader(self.train_dataset, batch_size=64)
+            return DataLoader(self.train_dataset, batch_size=self.batch_size)
 
         def val_dataloader(self):
-            return DataLoader(self.val_dataset, batch_size=64)
+            return DataLoader(self.val_dataset, batch_size=self.batch_size)
 
         def test_dataloader(self):
-            return DataLoader(self.test_dataset, batch_size=64)
+            return DataLoader(self.test_dataset, batch_size=self.batch_size)
 
 The `prepare_data` method is also a good place to do any data processing that needs to be done only
 once (ie: download or tokenize, etc...).
@@ -653,11 +679,13 @@ You'll now see the TPU cores booting up.
 
 .. figure:: /_images/mnist_imgs/tpu_start.png
     :alt: TPU start
+    :width: 400
 
 Notice the epoch is MUCH faster!
 
 .. figure:: /_images/mnist_imgs/tpu_fast.png
     :alt: TPU speed
+    :width: 600
 
 ----------------
 
@@ -737,12 +765,13 @@ If you still need even more fine-grain control, define the other optional method
 .. code-block:: python
 
     def validation_step(self, batch, batch_idx):
-        val_step_output = {'step_output': x}
-        return val_step_output
+        result = pl.EvalResult()
+        result.prediction = some_prediction
+        return result
 
     def validation_epoch_end(self, val_step_outputs):
-        for val_step_output in val_step_outputs:
-            # each object here is what you passed back at each validation_step
+        # do something with all the predictions from each validation_step
+        all_predictions = val_step_outputs.prediction
 
 ----------------
 
diff --git a/docs/source/lightning-module.rst b/docs/source/lightning-module.rst
index 3e329bec3a4e82..bf202129a65c7c 100644
--- a/docs/source/lightning-module.rst
+++ b/docs/source/lightning-module.rst
@@ -3,10 +3,1233 @@
 
 LightningModule
 ===============
+A :class:`~LightningModule` organizes your PyTorch code into 5 sections
 
-.. automodule:: pytorch_lightning.core
-   :noindex:
-   :exclude-members:
-        _abc_impl,
-        summarize,
+- Computations (init).
+- Train loop (training_step)
+- Validation loop (validation_step)
+- Test loop (test_step)
+- Optimizers (configure_optimizers)
 
+|
+
+.. figure:: https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/pl_mod_small.gif
+   :alt: Convert from PyTorch to Lightning
+
+|
+
+Notice a few things.
+
+1.  It's the SAME code.
+2.  The PyTorch code IS NOT abstracted - just organized.
+3.  All the other code that's not in the :class:`~LightningModule`
+    has been automated for you by the trainer.
+
+|
+
+    .. code-block:: python
+
+        net = Net()
+        trainer = Trainer()
+        trainer.fit(net)
+
+4.  There are no .cuda() or .to() calls... Lightning does these for you.
+
+|
+
+    .. code-block:: python
+
+        # don't do in lightning
+        x = torch.Tensor(2, 3)
+        x = x.cuda()
+        x = x.to(device)
+
+        # do this instead
+        x = x  # leave it alone!
+
+        # or to init a new tensor
+        new_x = torch.Tensor(2, 3)
+        new_x = new_x.type_as(x.type())
+
+5.  There are no samplers for distributed, Lightning also does this for you.
+
+|
+
+    .. code-block:: python
+
+        # Don't do in Lightning...
+        data = MNIST(...)
+        sampler = DistributedSampler(data)
+        DataLoader(data, sampler=sampler)
+
+        # do this instead
+        data = MNIST(...)
+        DataLoader(data)
+
+6.  A :class:`~LightningModule` is a :class:`torch.nn.Module` but with added functionality. Use it as such!
+
+|
+
+    .. code-block:: python
+
+        net = Net.load_from_checkpoint(PATH)
+        net.freeze()
+        out = net(x)
+
+Thus, to use Lightning, you just need to organize your code which takes about 30 minutes,
+(and let's be real, you probably should do anyhow).
+
+------------
+
+Minimal Example
+---------------
+
+Here are the only required methods.
+
+.. code-block:: python
+
+    >>> import pytorch_lightning as pl
+    >>> class LitModel(pl.LightningModule):
+    ...
+    ...     def __init__(self):
+    ...         super().__init__()
+    ...         self.l1 = torch.nn.Linear(28 * 28, 10)
+    ...
+    ...     def forward(self, x):
+    ...         return torch.relu(self.l1(x.view(x.size(0), -1)))
+    ...
+    ...     def training_step(self, batch, batch_idx):
+    ...         x, y = batch
+    ...         y_hat = self(x)
+    ...         loss = F.cross_entropy(y_hat, y)
+    ...         return pl.TrainResult(loss)
+    ...
+    ...     def configure_optimizers(self):
+    ...         return torch.optim.Adam(self.parameters(), lr=0.02)
+
+Which you can train by doing:
+
+.. code-block:: python
+
+    train_loader = DataLoader(MNIST(os.getcwd(), download=True, transform=transforms.ToTensor()))
+    trainer = pl.Trainer()
+    model = LitModel()
+
+    trainer.fit(model, train_loader)
+
+----------
+
+LightningModule for research
+----------------------------
+For research, LightningModules are best structured as systems.
+
+A model (colloquially) refers to something like a resnet or RNN. A system, may be a collection of models. Here
+are examples of systems:
+
+- GAN (generator, discriminator)
+- RL (policy, actor, critic)
+- Autoencoders (encoder, decoder)
+- Seq2Seq (encoder, attention, decoder)
+- etc...
+
+A LightningModule is best used to define a complex system:
+
+.. code-block:: python
+
+    import pytorch_lightning as pl
+    import torch
+    from torch import nn
+
+    class Autoencoder(pl.LightningModule):
+
+         def __init__(self, latent_dim=2):
+            super().__init__()
+            self.encoder = nn.Sequential(nn.Linear(28 * 28, 256), nn.ReLU(), nn.Linear(256, latent_dim))
+            self.decoder = nn.Sequential(nn.Linear(latent_dim, 256), nn.ReLU(), nn.Linear(256, 28 * 28))
+
+         def training_step(self, batch, batch_idx):
+            x, _ = batch
+
+            # encode
+            x = x.view(x.size(0), -1)
+            z = self.encoder(x)
+
+            # decode
+            recons = self.decoder(z)
+
+            # reconstruction
+            reconstruction_loss = nn.functional.mse_loss(recons, x)
+            return pl.TrainResult(reconstruction_loss)
+
+         def validation_step(self, batch, batch_idx):
+            x, _ = batch
+            x = x.view(x.size(0), -1)
+            z = self.encoder(x)
+            recons = self.decoder(z)
+            reconstruction_loss = nn.functional.mse_loss(recons, x)
+
+            result = pl.EvalResult(checkpoint_on=reconstruction_loss)
+            return result
+
+         def configure_optimizers(self):
+            return torch.optim.Adam(self.parameters(), lr=0.0002)
+
+Which can be trained like this:
+
+.. code-block:: python
+
+    autoencoder = Autoencoder()
+    trainer = pl.Trainer(gpus=1)
+    trainer.fit(autoencoder, train_dataloader, val_dataloader)
+
+This simple model generates examples that look like this (the encoders and decoders are too weak)
+
+.. figure:: https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/pl_docs/ae_docs.png
+    :width: 300
+
+The methods above are part of the lightning interface:
+
+- training_step
+- validation_step
+- test_step
+- configure_optimizers
+
+Note that in this case, the train loop and val loop are exactly the same. We can of course reuse this code.
+
+.. code-block:: python
+
+    class Autoencoder(pl.LightningModule):
+
+         def __init__(self, latent_dim=2):
+            super().__init__()
+            self.encoder = nn.Sequential(nn.Linear(28 * 28, 256), nn.ReLU(), nn.Linear(256, latent_dim))
+            self.decoder = nn.Sequential(nn.Linear(latent_dim, 256), nn.ReLU(), nn.Linear(256, 28 * 28))
+
+         def training_step(self, batch, batch_idx):
+            loss = self.shared_step(batch)
+            return pl.TrainResult(loss)
+
+         def validation_step(self, batch, batch_idx):
+            loss = self.shared_step(batch)
+            result = pl.EvalResult(checkpoint_on=loss)
+            return result
+
+         def shared_step(self, batch):
+            x, _ = batch
+
+            # encode
+            x = x.view(x.size(0), -1)
+            z = self.encoder(x)
+
+            # decode
+            recons = self.decoder(z)
+
+            # loss
+            return nn.functional.mse_loss(recons, x)
+
+         def configure_optimizers(self):
+            return torch.optim.Adam(self.parameters(), lr=0.0002)
+
+We create a new method called `shared_step` that all loops can use. This method name is arbitrary and NOT reserved.
+
+Inference in Research
+^^^^^^^^^^^^^^^^^^^^^
+In the case where we want to perform inference with the system we can add a `forward` method to the LightningModule.
+
+.. code-block:: python
+
+    class Autoencoder(pl.LightningModule):
+        def forward(self, x):
+            return self.decoder(x)
+
+The advantage of adding a forward is that in complex systems, you can do a much more involved inference procedure,
+such as text generation:
+
+.. code-block:: python
+
+    class Seq2Seq(pl.LightningModule):
+
+        def forward(self, x):
+            embeddings = self(x)
+            hidden_states = self.encoder(embeddings)
+            for h in hidden_states:
+                # decode
+                ...
+            return decoded
+
+---------------------
+
+LightningModule for production
+------------------------------
+For cases like production, you might want to iterate different models inside a LightningModule.
+
+.. code-block:: python
+
+    import pytorch_lightning as pl
+    from pytorch_lightning.metrics import functional as FM
+
+    class ClassificationTask(pl.LightningModule):
+
+         def __init__(self, model):
+             super().__init__()
+             self.model = model
+
+         def training_step(self, batch, batch_idx):
+             x, y = batch
+             y_hat = self.model(x)
+             loss = F.cross_entropy(y_hat, y)
+             return pl.TrainResult(loss)
+
+         def validation_step(self, batch, batch_idx):
+            x, y = batch
+            y_hat = self.model(x)
+            loss = F.cross_entropy(y_hat, y)
+            acc = FM.accuracy(y_hat, y)
+            result = pl.EvalResult(checkpoint_on=loss)
+            result.log_dict({'val_acc': acc, 'val_loss': loss})
+            return result
+
+         def test_step(self, batch, batch_idx):
+            result = self.validation_step(batch, batch_idx)
+            result.rename_keys({'val_acc': 'test_acc', 'val_loss': 'test_loss'})
+            return result
+
+         def configure_optimizers(self):
+             return torch.optim.Adam(self.model.parameters(), lr=0.02)
+
+Then pass in any arbitrary model to be fit with this task
+
+.. code-block:: python
+
+    for model in [resnet50(), vgg16(), BidirectionalRNN()]:
+        task = ClassificationTask(model)
+
+        trainer = Trainer(gpus=2)
+        trainer.fit(task, train_dataloader, val_dataloader)
+
+Tasks can be arbitrarily complex such as implementing GAN training, self-supervised or even RL.
+
+.. code-block:: python
+
+    class GANTask(pl.LightningModule):
+
+         def __init__(self, generator, discriminator):
+             super().__init__()
+             self.generator = generator
+             self.discriminator = discriminator
+         ...
+
+Inference in production
+^^^^^^^^^^^^^^^^^^^^^^^
+When used like this, the model can be separated from the Task and thus used in production without needing to keep it in
+a `LightningModule`.
+
+- You can export to onnx.
+- Or trace using Jit.
+- or run in the python runtime.
+
+.. code-block:: python
+
+        task = ClassificationTask(model)
+
+        trainer = Trainer(gpus=2)
+        trainer.fit(task, train_dataloader, val_dataloader)
+
+        # use model after training or load weights and drop into the production system
+        model.eval()
+        y_hat = model(x)
+
+
+Training loop
+-------------
+To add a training loop use the `training_step` method
+
+.. code-block:: python
+
+    class LitClassifier(pl.LightningModule):
+
+         def __init__(self, model):
+             super().__init__()
+             self.model = model
+
+         def training_step(self, batch, batch_idx):
+             x, y = batch
+             y_hat = self.model(x)
+             loss = F.cross_entropy(y_hat, y)
+             return pl.TrainResult(loss)
+
+Under the hood, Lightning does the following (pseudocode):
+
+.. code-block:: python
+
+    # put model in train mode
+    model.train()
+    torch.set_grad_enabled(True)
+
+    outs = []
+    for batch in train_dataloader:
+        # forward
+        out = training_step(val_batch)
+
+        # backward
+        loss.backward()
+
+        # apply and clear grads
+        optimizer.step()
+        optimizer.zero_grad()
+
+Training epoch-level metrics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+If you want to calculate epoch-level metrics and log them, use the `TrainResult.log` method
+
+.. code-block:: python
+
+     def training_step(self, batch, batch_idx):
+         x, y = batch
+         y_hat = self.model(x)
+         loss = F.cross_entropy(y_hat, y)
+         result = pl.TrainResult(loss)
+
+         # logs metrics for each training_step, and the average across the epoch, to the progress bar and logger
+         result.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
+         return result
+
+The `TrainResult.log` object automatically reduces the requested metrics across the full epoch.
+Here's the pseudocode of what it does under the hood:
+
+.. code-block:: python
+
+    outs = []
+    for batch in train_dataloader:
+        # forward
+        out = training_step(val_batch)
+
+        # backward
+        loss.backward()
+
+        # apply and clear grads
+        optimizer.step()
+        optimizer.zero_grad()
+
+    epoch_metric = torch.mean(torch.stack([x['train_loss'] for x in outs]))
+
+Train epoch-level operations
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+If you need to do something with all the outputs of each `training_step`, override `training_epoch_end` yourself.
+
+.. code-block:: python
+
+     def training_step(self, batch, batch_idx):
+         x, y = batch
+         y_hat = self.model(x)
+         loss = F.cross_entropy(y_hat, y)
+         result = pl.TrainResult(loss)
+         result.prediction = some_prediction
+
+     def training_epoch_end(self, training_step_outputs):
+        all_predictions = training_step_outputs.prediction
+        ...
+        return result
+
+The matching pseudocode is:
+
+.. code-block:: python
+
+    outs = []
+    for batch in train_dataloader:
+        # forward
+        out = training_step(val_batch)
+
+        # backward
+        loss.backward()
+
+        # apply and clear grads
+        optimizer.step()
+        optimizer.zero_grad()
+
+    epoch_out = training_epoch_end(outs)
+
+Training with DataParallel
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+When training using a `distributed_backend` that splits data from each batch across GPUs, sometimes you might
+need to aggregate them on the master GPU for processing (dp, or ddp2).
+
+In this case, implement the `training_step_end` method
+
+.. code-block:: python
+
+     def training_step(self, batch, batch_idx):
+         x, y = batch
+         y_hat = self.model(x)
+         loss = F.cross_entropy(y_hat, y)
+         result = pl.TrainResult(loss)
+         result.prediction = some_prediction
+
+     def training_step_end(self, batch_parts):
+         gpu_0_prediction = batch_parts.prediction[0]
+         gpu_1_prediction = batch_parts.prediction[1]
+
+         # do something with both outputs
+         return result
+
+     def training_epoch_end(self, training_step_outputs):
+        all_predictions = training_step_outputs.prediction
+        ...
+        return result
+
+The full pseudocode that lighting does under the hood is:
+
+.. code-block:: python
+
+    outs = []
+    for train_batch in train_dataloader:
+        batches = split_batch(train_batch)
+        dp_outs = []
+        for sub_batch in batches:
+            # 1
+            dp_out = training_step(sub_batch)
+            dp_outs.append(dp_out)
+
+        # 2
+        out = training_step_end(dp_outs)
+        outs.append(out)
+
+    # do something with the outputs for all batches
+    # 3
+    training_epoch_end(outs)
+
+------------------
+
+Validation loop
+---------------
+To add a validation loop, override the `validation_step` method of the :class:`~LightningModule`:
+
+.. code-block:: python
+
+    class LitModel(pl.LightningModule):
+        def validation_step(self, batch, batch_idx):
+            x, y = batch
+            y_hat = self.model(x)
+            loss = F.cross_entropy(y_hat, y)
+            result = pl.EvalResult(checkpoint_on=loss)
+            return result
+
+Under the hood, Lightning does the following:
+
+.. code-block:: python
+
+    # ...
+    for batch in train_dataloader:
+        loss = model.training_step()
+        loss.backward()
+        # ...
+
+        if validate_at_some_point:
+            # disable grads + batchnorm + dropout
+            torch.set_grad_enabled(False)
+            model.eval()
+
+            # ----------------- VAL LOOP ---------------
+            for val_batch in model.val_dataloader:
+                val_out = model.validation_step(val_batch)
+            # ----------------- VAL LOOP ---------------
+
+            # enable grads + batchnorm + dropout
+            torch.set_grad_enabled(True)
+            model.train()
+
+Validation epoch-level metrics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+If you need to do something with all the outputs of each `validation_step`, override `validation_epoch_end`.
+
+.. code-block:: python
+
+     def validation_step(self, batch, batch_idx):
+         x, y = batch
+         y_hat = self.model(x)
+         loss = F.cross_entropy(y_hat, y)
+         result = pl.EvalResult(loss)
+         result.prediction = some_prediction
+
+     def validation_epoch_end(self, validation_step_outputs):
+        all_predictions = validation_step_outputs.prediction
+        ...
+        return result
+
+Validating with DataParallel
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+When training using a `distributed_backend` that splits data from each batch across GPUs, sometimes you might
+need to aggregate them on the master GPU for processing (dp, or ddp2).
+
+In this case, implement the `validation_step_end` method
+
+.. code-block:: python
+
+     def validation_step(self, batch, batch_idx):
+         x, y = batch
+         y_hat = self.model(x)
+         loss = F.cross_entropy(y_hat, y)
+         result = pl.EvalResult(loss)
+         result.prediction = some_prediction
+
+     def validation_step_end(self, batch_parts):
+         gpu_0_prediction = batch_parts.prediction[0]
+         gpu_1_prediction = batch_parts.prediction[1]
+
+         # do something with both outputs
+         return result
+
+     def validation_epoch_end(self, validation_step_outputs):
+        all_predictions = validation_step_outputs.prediction
+        ...
+        return result
+
+The full pseudocode that lighting does under the hood is:
+
+.. code-block:: python
+
+    outs = []
+    for batch in dataloader:
+        batches = split_batch(batch)
+        dp_outs = []
+        for sub_batch in batches:
+            # 1
+            dp_out = validation_step(sub_batch)
+            dp_outs.append(dp_out)
+
+        # 2
+        out = validation_step_end(dp_outs)
+        outs.append(out)
+
+    # do something with the outputs for all batches
+    # 3
+    validation_epoch_end(outs)
+
+----------------
+
+Test loop
+---------
+The process for adding a test loop is the same as the process for adding a validation loop. Please refer to
+the section above for details.
+
+The only difference is that the test loop is only called when `.test()` is used:
+
+.. code-block:: python
+
+    model = Model()
+    trainer = Trainer()
+    trainer.fit()
+
+    # automatically loads the best weights for you
+    trainer.test(model)
+
+There are two ways to call `test()`:
+
+.. code-block:: python
+
+    # call after training
+    trainer = Trainer()
+    trainer.fit(model)
+
+    # automatically auto-loads the best weights
+    trainer.test(test_dataloaders=test_dataloader)
+
+    # or call with pretrained model
+    model = MyLightningModule.load_from_checkpoint(PATH)
+    trainer = Trainer()
+    trainer.test(model, test_dataloaders=test_dataloader)
+
+----------
+
+Live demo
+---------
+Check out this
+`COLAB <https://colab.research.google.com/drive/1F_RNcHzTfFuQf-LeKvSlud6x7jXYkG31#scrollTo=HOk9c4_35FKg>`_
+for a live demo.
+
+-----------
+
+LightningModule API
+-------------------
+
+Training loop methods
+^^^^^^^^^^^^^^^^^^^^^
+
+training_step
+~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.training_step
+    :noindex:
+
+training_step_end
+~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.training_step_end
+    :noindex:
+
+training_epoch_end
+~~~~~~~~~~~~~~~~~~
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.training_epoch_end
+    :noindex:
+
+---------------
+
+Validation loop methods
+^^^^^^^^^^^^^^^^^^^^^^^
+
+validation_step
+~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.validation_step
+    :noindex:
+
+validation_step_end
+~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.validation_step_end
+    :noindex:
+
+validation_epoch_end
+~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.validation_epoch_end
+    :noindex:
+
+----------------
+
+test loop methods
+^^^^^^^^^^^^^^^^^
+
+test_step
+~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.test_step
+    :noindex:
+
+test_step_end
+~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.test_step_end
+    :noindex:
+
+test_epoch_end
+~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.test_epoch_end
+    :noindex:
+
+--------------
+
+configure_optimizers
+^^^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.configure_optimizers
+    :noindex:
+
+--------------
+
+Convenience methods
+^^^^^^^^^^^^^^^^^^^
+Use these methods for convenience
+
+print
+~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.print
+    :noindex:
+
+save_hyperparameters
+~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.save_hyperparameters
+    :noindex:
+
+------------
+
+Inference methods
+^^^^^^^^^^^^^^^^^
+Use these hooks for inference with a lightning module
+
+forward
+~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.forward
+    :noindex:
+
+freeze
+~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.freeze
+    :noindex:
+
+to_onnx
+~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.to_onnx
+    :noindex:
+
+unfreeze
+~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.unfreeze
+    :noindex:
+
+------------
+
+Properties
+^^^^^^^^^^
+These are properties available in a LightningModule.
+
+-----------
+
+current_epoch
+~~~~~~~~~~~~~
+The current epoch
+
+.. code-block:: python
+
+    def training_step(...):
+        if self.current_epoch == 0:
+
+-------------
+
+device
+~~~~~~
+The device the module is on. Use it to keep your code device agnostic
+
+.. code-block:: python
+
+    def training_step(...):
+        z = torch.rand(2, 3, device=self.device)
+
+-------------
+
+global_rank
+~~~~~~~~~~~
+The global_rank of this LightningModule. Lightning saves logs, weights etc only from global_rank = 0. You
+normally do not need to use this property
+
+Global rank refers to the index of that GPU across ALL GPUs. For example, if using 10 machines, each with 4 GPUs,
+the 4th GPU on the 10th machine has global_rank = 39
+
+-------------
+
+global_step
+~~~~~~~~~~~
+The current step (does not reset each epoch)
+
+.. code-block:: python
+
+    def training_step(...):
+        self.logger.experiment.log_image(..., step=self.global_step)
+
+-------------
+
+hparams
+~~~~~~~
+After calling `save_hyperparameters` anything passed to init() is available via hparams.
+
+.. code-block:: python
+
+    def __init__(self, learning_rate):
+        self.save_hyperparameters()
+
+    def configure_optimizers(self):
+        return Adam(self.parameters(), lr=self.hparams.learning_rate)
+
+--------------
+
+logger
+~~~~~~
+The current logger being used (tensorboard or other supported logger)
+
+.. code-block:: python
+
+    def training_step(...):
+        # the generic logger (same no matter if tensorboard or other supported logger)
+        self.logger
+
+        # the particular logger
+        tensorboard_logger = self.logger.experiment
+
+--------------
+
+local_rank
+~~~~~~~~~~~
+The local_rank of this LightningModule. Lightning saves logs, weights etc only from global_rank = 0. You
+normally do not need to use this property
+
+Local rank refers to the rank on that machine. For example, if using 10 machines, the GPU at index 0 on each machine
+has local_rank = 0.
+
+
+-----------
+
+precision
+~~~~~~~~~
+The type of precision used:
+
+.. code-block:: python
+
+    def training_step(...):
+        if self.precision == 16:
+
+------------
+
+trainer
+~~~~~~~
+Pointer to the trainer
+
+.. code-block:: python
+
+    def training_step(...):
+        max_steps = self.trainer.max_steps
+        any_flag = self.trainer.any_flag
+
+------------
+
+use_ddp
+~~~~~~~
+True if using ddp
+
+------------
+
+use_ddp2
+~~~~~~~~
+True if using ddp2
+
+------------
+
+use_dp
+~~~~~~
+True if using dp
+
+------------
+
+use_tpu
+~~~~~~~
+True if using TPUs
+
+--------------
+
+Hooks
+-----
+
+Hook lifecycle pseudocode
+^^^^^^^^^^^^^^^^^^^^^^^^^
+This is the pseudocode to describe how all the hooks are called during a call to `.fit()`
+
+.. code-block:: python
+
+    def fit(...):
+        on_fit_start()
+
+        if global_rank == 0:
+            # prepare data is called on GLOBAL_ZERO only
+            prepare_data()
+
+        for gpu/tpu in gpu/tpus:
+            train_on_device(model.copy())
+
+        on_fit_end()
+
+    def train_on_device(model):
+        # setup is called PER DEVICE
+        setup()
+        configure_optimizers()
+        on_pretrain_routine_start()
+
+        for epoch in epochs:
+            train_loop()
+
+        teardown()
+
+    def train_loop():
+        on_train_epoch_start()
+        train_outs = []
+        for train_batch in train_dataloader():
+            on_train_batch_start()
+
+            # ----- train_step methods -------
+            out = training_step(batch)
+            train_outs.append(out)
+
+            loss = out.loss
+
+            backward()
+            on_after_backward()
+            optimizer_step()
+            on_before_zero_grad()
+            optimizer_zero_grad()
+
+            on_train_batch_end()
+
+            if should_check_val:
+                val_loop()
+
+        # end training epoch
+        logs = training_epoch_end(outs)
+
+    def val_loop():
+        model.eval()
+        torch.set_grad_enabled(False)
+
+        on_validation_epoch_start()
+        val_outs = []
+        for val_batch in val_dataloader():
+            on_validation_batch_start()
+
+            # -------- val step methods -------
+            out = validation_step(val_batch)
+            val_outs.append(out)
+
+            on_validation_batch_end()
+
+        validation_epoch_end(val_outs)
+        on_validation_epoch_end()
+
+        # set up for train
+        model.train()
+        torch.set_grad_enabled(True)
+
+
+Advanced hooks
+^^^^^^^^^^^^^^
+Use these hooks to modify advanced functionality
+
+configure_apex
+~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.configure_apex
+    :noindex:
+
+configure_ddp
+~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.configure_ddp
+    :noindex:
+
+configure_sync_batchnorm
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.configure_ddp
+    :noindex:
+
+get_progress_bar_dict
+~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.get_progress_bar_dict
+    :noindex:
+
+init_ddp_connection
+~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.init_ddp_connection
+    :noindex:
+
+tbptt_split_batch
+~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.tbptt_split_batch
+    :noindex:
+
+Checkpoint hooks
+^^^^^^^^^^^^^^^^
+These hooks allow you to modify checkpoints
+
+on_load_checkpoint
+~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.on_load_checkpoint
+    :noindex:
+
+on_save_checkpoint
+~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.on_save_checkpoint
+    :noindex:
+
+-------------
+
+Data hooks
+^^^^^^^^^^
+Use these hooks if you want to couple a LightningModule to a dataset.
+
+.. note:: The same collection of hooks is available in a DataModule class to decouple the data from the model.
+
+train_dataloader
+~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.train_dataloader
+    :noindex:
+
+val_dataloader
+~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.val_dataloader
+    :noindex:
+
+test_dataloader
+~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.test_dataloader
+    :noindex:
+
+prepare_data
+~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.prepare_data
+    :noindex:
+
+------------
+
+Optimization hooks
+^^^^^^^^^^^^^^^^^^
+These are hooks related to the optimization procedure.
+
+backward
+~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.backward
+    :noindex:
+
+on_after_backward
+~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.on_after_backward
+    :noindex:
+
+on_before_zero_grad
+~~~~~~~~~~~~~~~~~~~
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.on_before_zero_grad
+    :noindex:
+
+optimizer_step
+~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.optimizer_step
+    :noindex:
+
+optimizer_zero_grad
+~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.lightning.LightningModule.optimizer_zero_grad
+    :noindex:
+
+Training lifecycle hooks
+^^^^^^^^^^^^^^^^^^^^^^^^^
+These hooks are called during training
+
+on_fit_start
+~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_fit_start
+    :noindex:
+
+on_fit_end
+~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_fit_end
+    :noindex:
+
+on_pretrain_routine_start
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_pretrain_routine_start
+    :noindex:
+
+on_pretrain_routine_end
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_pretrain_routine_end
+    :noindex:
+
+on_test_epoch_start
+~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_test_epoch_start
+    :noindex:
+
+on_test_epoch_end
+~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_test_epoch_end
+    :noindex:
+
+on_test_batch_start
+~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_test_batch_start
+    :noindex:
+
+on_test_batch_end
+~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_test_batch_end
+    :noindex:
+
+on_train_batch_start
+~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_train_batch_start
+    :noindex:
+
+on_train_batch_end
+~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_train_batch_end
+    :noindex:
+
+on_train_epoch_start
+~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_train_epoch_start
+    :noindex:
+
+on_train_epoch_end
+~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_train_epoch_end
+    :noindex:
+
+on_validation_batch_start
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_validation_batch_start
+    :noindex:
+
+on_validation_batch_end
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_validation_batch_end
+    :noindex:
+
+on_validation_epoch_start
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_validation_epoch_start
+    :noindex:
+
+on_validation_epoch_end
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.on_validation_epoch_end
+    :noindex:
+
+setup
+~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.setup
+    :noindex:
+
+teardown
+~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.teardown
+    :noindex:
+
+transfer_batch_to_device
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.core.hooks.ModelHooks.transfer_batch_to_device
+    :noindex:
diff --git a/docs/source/new-project.rst b/docs/source/new-project.rst
index 166750bb652b09..e3aebefc788126 100644
--- a/docs/source/new-project.rst
+++ b/docs/source/new-project.rst
@@ -12,6 +12,7 @@ Quick Start
 ===========
 
 PyTorch Lightning is nothing more than organized PyTorch code.
+
 Once you've organized it into a LightningModule, it automates most of the training for you.
 
 To illustrate, here's the typical PyTorch project structure organized in a LightningModule.
@@ -107,7 +108,7 @@ All of it 100% rigorously tested and benchmarked
 
 Training loop under the hood
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Under the hood, lightning does (in high-level pseudocode):
+Under the hood, lightning does the following (in high-level pseudocode):
 
 .. code-block:: python
 
@@ -156,7 +157,12 @@ To add an (optional) validation loop add the following function
             x, y = batch
             y_hat = self(x)
             loss = F.cross_entropy(y_hat, y)
-            return {'val_loss': loss, 'log': {'val_loss': loss}}
+
+            result = pl.EvalResult(checkpoint_on=loss)
+            result.log('val_loss', loss)
+            return result
+
+.. note:: EvalResult is a plain Dict, with convenience functions for logging
 
 And now the trainer will call the validation loop automatically
 
@@ -216,7 +222,10 @@ You might also need an optional test loop
             x, y = batch
             y_hat = self(x)
             loss = F.cross_entropy(y_hat, y)
-            return {'test_loss': loss, 'log': {'test_loss': loss}}
+
+            result = pl.EvalResult()
+            result.log('test_loss', loss)
+            return result
 
 
 However, this time you need to specifically call test (this is done so you don't use the test set by mistake)
@@ -426,33 +435,18 @@ Lightning has built-in logging to any of the supported loggers or progress bar.
 
 Log in train loop
 ^^^^^^^^^^^^^^^^^
-To log from the training loop use the `log` reserved key.
-
-.. code-block:: python
-
-    def training_step(self, batch, batch_idx):
-        loss = ...
-        return {'loss': loss, 'log': {'train_loss': loss}}
-
-
-However, for more fine-grain control use the `TrainResult` object.
-These are equivalent:
+To log from the training loop use the `log` method in the `TrainResult`.
 
 .. code-block:: python
 
     def training_step(self, batch, batch_idx):
         loss = ...
-        return {'loss': loss, 'log': {'train_loss': loss}}
-
-    # equivalent
-    def training_step(self, batch, batch_idx):
-        loss = ...
-
         result = pl.TrainResult(minimize=loss)
         result.log('train_loss', loss)
         return result
 
-But the TrainResult gives you error-checking and greater flexibility:
+The `TrainResult` gives you options for logging on every step and/or at the end of the epoch.
+It also allows logging to the progress bar.
 
 .. code-block:: python
 
@@ -471,42 +465,16 @@ Then boot up your logger or tensorboard instance to view training logs
 
 Log in Val/Test loop
 ^^^^^^^^^^^^^^^^^^^^
-To log from the validation or test loop use a similar approach
+To log from the validation or test loop use the `EvalResult`.
 
 .. code-block:: python
 
     def validation_step(self, batch, batch_idx):
         loss = ...
-        acc = ...
-        val_output = {'loss': loss, 'acc': acc}
-        return val_output
-
-    def validation_epoch_end(self, validation_step_outputs):
-        # this step allows you to aggregate whatever you passed in from every val step
-        val_epoch_loss = torch.stack([x['loss'] for x in val_output]).mean()
-        val_epoch_acc = torch.stack([x['acc'] for x in val_output]).mean()
-        return {
-            'val_loss': val_epoch_loss,
-            'log': {'avg_val_loss': val_epoch_loss, 'avg_val_acc': val_epoch_acc}
-        }
-
-The recommended equivalent version in case you don't need to do anything special
-with all the outputs of the validation step:
-
-.. code-block:: python
-
-    def validation_step(self, batch, batch_idx):
-        loss = ...
-        acc = ...
-
-        result = pl.EvalResult(checkpoint_on=loss)
-        result.log('val_loss', loss)
-        result.log('val_acc', acc)
+        result = pl.EvalResult()
+        result.log_dict({'val_loss': loss, 'val_acc': acc})
         return result
 
-.. note:: Only use `validation_epoch_end` if you need fine-grain control over aggreating all step outputs
-
-
 Log to the progress bar
 ^^^^^^^^^^^^^^^^^^^^^^^
 |
@@ -518,25 +486,47 @@ Log to the progress bar
 
 |
 
-In addition to visual logging, you can log to the progress bar by using the keyword `progress_bar`:
+In addition to visual logging, you can log to the progress bar by setting `prog_bar` to True
 
 .. code-block:: python
 
     def training_step(self, batch, batch_idx):
         loss = ...
-        return {'loss': loss, 'progress_bar': {'train_loss': loss}}
+        result = pl.TrainResult(loss)
+        result.log('train_loss', loss, prog_bar=True)
+
+-----------------
 
-Or simply set `prog_bar=True` in either of the `EvalResult` or `TrainResult`
+Advanced loop aggregation
+-------------------------
+For certain train/val/test loops, you may wish to do more than just logging. In this case,
+you can also implement `__epoch_end` which gives you the output for each step
+
+Here's the motivating Pytorch example:
 
 .. code-block:: python
 
-    def training_step(self, batch, batch_idx):
-        result = TrainResult(loss)
-        result.log('train_loss', loss, prog_bar=True)
-        return result
+    validation_step_outputs = []
+    for batch_idx, batch in val_dataloader():
+        out = validation_step(batch, batch_idx)
+        validation_step_outputs.append(out)
 
+    validation_epoch_end(validation_step_outputs)
 
------------------
+And the lightning equivalent
+
+.. code-block:: python
+
+    def validation_step(self, batch, batch_idx):
+        loss = ...
+        predictions = ...
+        result = pl.EvalResult(checkpoint_on=loss)
+        result.log('val_loss', loss)
+        result.predictions = predictions
+
+     def validation_epoch_end(self, validation_step_outputs):
+        all_val_losses = validation_step_outputs.val_loss
+        all_predictions = validation_step_outputs.predictions
 
 Why do you need Lightning?
 --------------------------
@@ -544,12 +534,19 @@ The MAIN teakeaway points are:
 
 - Lightning is for professional AI researchers/production teams.
 - Lightning is organized PyTorch. It is not an abstraction.
+- You STILL keep pure PyTorch.
+- You DON't lose any flexibility.
+- You can get rid of all of your boilerplate.
+- You make your code generalizable to any hardware.
+- Your code is now readable and easier to reproduce (ie: you help with the reproducibility crisis).
+- Your LightningModule is still just a pure PyTorch module.
 
 Lightning is for you if
 ^^^^^^^^^^^^^^^^^^^^^^^
 
 - You're a professional researcher/ml engineer working on non-trivial deep learning.
 - You already know PyTorch and are not a beginner.
+- You want to iterate through research much faster.
 - You want to put models into production much faster.
 - You need full control of all the details but don't need the boilerplate.
 - You want to leverage code written by hundreds of AI researchers, research engs and PhDs from the world's top AI labs.
@@ -617,13 +614,12 @@ would normally do.
 
 ---------------
 
-Summary
--------
-In short, by refactoring your PyTorch code:
+Masterclass
+-----------
+You can learn Lightning in-depth by watching our Masterclass.
 
-1.  You STILL keep pure PyTorch.
-2.  You DON't lose any flexibility.
-3.  You can get rid of all of your boilerplate.
-4.  You make your code generalizable to any hardware.
-5.  Your code is now readable and easier to reproduce (ie: you help with the reproducibility crisis).
-6.  Your LightningModule is still just a pure PyTorch module.
+.. image:: _images/general/PTL101_youtube_thumbnail.jpg
+    :width: 500
+    :align: center
+    :alt: Masterclass
+    :target: https://www.youtube.com/playlist?list=PLaMu-SDt_RB5NUm67hU2pdE75j6KaIOv2
diff --git a/docs/source/results.rst b/docs/source/results.rst
index 960cda2bcf399f..ed583ded863977 100644
--- a/docs/source/results.rst
+++ b/docs/source/results.rst
@@ -2,33 +2,17 @@ Result
 ======
 Lightning has two results objects `TrainResult` and `EvalResult`.
 
-When your `_step_end` or `_epoch_end` does nothing but aggregate metrics to log, you can delete those
-methods and use a Result object instead.
+Use these to control:
 
-However, if you need fine-grain control to do more than logging or a complex aggregation, then keep
-the loops as they are and do not use the `EvalResult` or `TrainResult` objects.
-
-.. note:: These objects are optional and should only be used if you don't need full control of the loops.
+- When to log (each step and/or epoch aggregate).
+- Where to log (progress bar or a logger).
+- How to sync across accelerators.
 
 ------------------
 
 Training loop example
 ---------------------
-We can simplify the following multi-method training loop:
-
-.. code-block:: python
-
-    def training_step(self, batch, batch_idx):
-        return {'loss': loss}
-
-    def training_epoch_end(self, training_step_outputs):
-        epoch_loss = torch.stack([x['loss'] for x in training_step_outputs]).mean()
-        return {
-            'log': {'epoch_loss': epoch_loss},
-            'progress_bar': {'epoch_loss': epoch_loss}
-        }
-
-using the equivalent syntax via the `TrainResult` object:
+Return a `TrainResult` from the Training loop.
 
 .. code-block:: python
 
@@ -38,34 +22,54 @@ using the equivalent syntax via the `TrainResult` object:
         result.log('train_loss', loss, prog_bar=True)
         return result
 
+If you'd like to do something special with the outputs other than logging, implement `__epoch_end`.
+
+.. code-block:: python
+
+    def training_step(self, batch, batch_idx):
+        result = pl.TrainResult(loss)
+        result.some_prediction = some_prediction
+        return result
+
+    def training_epoch_end(self, training_step_output_result):
+        all_train_predictions = training_step_output_result.some_prediction
+
+        training_step_output_result.some_new_prediction = some_new_prediction
+        return training_step_output_result
+
 --------------------
 
 Validation/Test loop example
 -----------------------------
-We can replace the following validation/test loop:
+Return a `EvalResult` object from a validation/test loop
 
 .. code-block:: python
 
     def validation_step(self, batch, batch_idx):
-        return {'some_metric': some_metric}
-
-    def validation_epoch_end(self, validation_step_outputs):
-        some_metric_mean = torch.stack([x['some_metric'] for x in validation_step_outputs]).mean()
-        return {
-            'log': {'some_metric_mean': some_metric_mean},
-            'progress_bar': {'some_metric_mean': some_metric_mean}
-        }
+        some_metric = ...
+        result = pl.EvalResult(checkpoint_on=some_metric)
+        result.log('some_metric', some_metric, prog_bar=True)
+        return result
 
-With the equivalent using the `EvalResult` syntax
+If you'd like to do something special with the outputs other than logging, implement `__epoch_end`.
 
 .. code-block:: python
 
     def validation_step(self, batch, batch_idx):
-        some_metric = ...
         result = pl.EvalResult(checkpoint_on=some_metric)
-        result.log('some_metric', some_metric, prog_bar=True)
+        result.a_prediction = some_prediction
         return result
 
+    def validation_epoch_end(self, validation_step_output_result):
+        all_validation_step_predictions = validation_step_output_result.a_prediction
+        # do something with the predictions from all validation_steps
+
+        return validation_step_output_result
+
+
+With the equivalent using the `EvalResult` syntax
+
+
 ------------------
 
 TrainResult
@@ -161,7 +165,6 @@ Finally, you can use your own reduction function instead:
 
 Finally, you may need more esoteric logging such as something specific to your logger like images:
 
-
 .. code-block:: python
 
     def training_step(...):
@@ -171,6 +174,14 @@ Finally, you may need more esoteric logging such as something specific to your l
         # also log images (if tensorboard for example)
         self.logger.experiment.log_figure(...)
 
+Sync across devices
+^^^^^^^^^^^^^^^^^^^
+When training on multiple GPUs/CPUs/TPU cores, calculate the global mean of a logged metric as follows:
+
+.. code-block:: python
+
+    result.log('train_loss', loss, sync_dist=True)
+
 TrainResult API
 ^^^^^^^^^^^^^^^
 
@@ -226,6 +237,14 @@ Val/Test loop
 ^^^^^^^^^^^^^
 Eval result can be used in both `test_step` and `validation_step`.
 
+Sync across devices (v)
+^^^^^^^^^^^^^^^^^^^^^^^
+When training on multiple GPUs/CPUs/TPU cores, calculate the global mean of a logged metric as follows:
+
+.. code-block:: python
+
+    result.log('val_loss', loss, sync_dist=True)
+
 EvalResult API
 ^^^^^^^^^^^^^^^
 
diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index 3b4aed8566bb65..8f4ef9da4087e9 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -89,15 +89,6 @@ def __init__(self, monitor: str = 'val_loss', min_delta: float = 0.0, patience:
         self.best_score = torch_inf if self.monitor_op == torch.lt else -torch_inf
 
     def _validate_condition_metric(self, logs):
-        """
-        Checks that the condition metric for early stopping is good
-
-        Args:
-            logs: callback metrics from validation output
-
-        Return:
-             True if specified metric is available
-        """
         monitor_val = logs.get(self.monitor)
         error_msg = (f'Early stopping conditioned on metric `{self.monitor}`'
                      f' which is not available. Either add `{self.monitor}` to the return of '
diff --git a/pytorch_lightning/callbacks/lr_logger.py b/pytorch_lightning/callbacks/lr_logger.py
index a4f2ddc75789ca..0188be550bd11f 100755
--- a/pytorch_lightning/callbacks/lr_logger.py
+++ b/pytorch_lightning/callbacks/lr_logger.py
@@ -94,8 +94,6 @@ def on_epoch_start(self, trainer, pl_module):
                 trainer.logger.log_metrics(latest_stat, step=trainer.current_epoch)
 
     def _extract_lr(self, trainer, interval):
-        """ Extracts learning rates for lr schedulers and saves information
-            into dict structure. """
         latest_stat = {}
 
         for name, scheduler in zip(self.lr_sch_names, trainer.lr_schedulers):
diff --git a/pytorch_lightning/core/__init__.py b/pytorch_lightning/core/__init__.py
index c29fea079f650d..7116da179afa8f 100644
--- a/pytorch_lightning/core/__init__.py
+++ b/pytorch_lightning/core/__init__.py
@@ -1,362 +1,3 @@
-"""
-A :class:`~LightningModule` organizes your PyTorch code into the following sections:
-
-.. figure:: /_images/lightning_module/pt_to_pl.png
-   :alt: Convert from PyTorch to Lightning
-
-
-Notice a few things.
-
-1.  It's the SAME code.
-2.  The PyTorch code IS NOT abstracted - just organized.
-3.  All the other code that's not in the :class:`~LightningModule`
-    has been automated for you by the trainer.
-
-    .. code-block:: python
-
-        net = Net()
-        trainer = Trainer()
-        trainer.fit(net)
-
-4.  There are no .cuda() or .to() calls... Lightning does these for you.
-
-    .. code-block:: python
-
-        # don't do in lightning
-        x = torch.Tensor(2, 3)
-        x = x.cuda()
-        x = x.to(device)
-
-        # do this instead
-        x = x  # leave it alone!
-
-        # or to init a new tensor
-        new_x = torch.Tensor(2, 3)
-        new_x = new_x.type_as(x.type())
-
-5.  There are no samplers for distributed, Lightning also does this for you.
-
-    .. code-block:: python
-
-        # Don't do in Lightning...
-        data = MNIST(...)
-        sampler = DistributedSampler(data)
-        DataLoader(data, sampler=sampler)
-
-        # do this instead
-        data = MNIST(...)
-        DataLoader(data)
-
-6.  A :class:`~LightningModule` is a :class:`torch.nn.Module` but with added functionality. Use it as such!
-
-    .. code-block:: python
-
-        net = Net.load_from_checkpoint(PATH)
-        net.freeze()
-        out = net(x)
-
-Thus, to use Lightning, you just need to organize your code which takes about 30 minutes,
-(and let's be real, you probably should do anyhow).
-
-------------
-
-Minimal Example
----------------
-
-Here are the only required methods.
-
-.. code-block:: python
-
-    >>> import pytorch_lightning as pl
-    >>> class LitModel(pl.LightningModule):
-    ...
-    ...     def __init__(self):
-    ...         super().__init__()
-    ...         self.l1 = torch.nn.Linear(28 * 28, 10)
-    ...
-    ...     def forward(self, x):
-    ...         return torch.relu(self.l1(x.view(x.size(0), -1)))
-    ...
-    ...     def training_step(self, batch, batch_idx):
-    ...         x, y = batch
-    ...         y_hat = self(x)
-    ...         loss = F.cross_entropy(y_hat, y)
-    ...         return pl.TrainResult(loss)
-    ...
-    ...     def configure_optimizers(self):
-    ...         return torch.optim.Adam(self.parameters(), lr=0.02)
-
-Which you can train by doing:
-
-.. code-block:: python
-
-    train_loader = DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()))
-    trainer = pl.Trainer()
-    model = LitModel()
-
-    trainer.fit(model, train_loader)
-
-----------
-
-Training loop structure
------------------------
-
-The general pattern is that each loop has a single method to worry about
-
-- ``___step``
-
-If you need more control, there are two optional methods.
-
-- ``___step_end``
-- ``___epoch_end``
-
-To show how Lightning calls these, let's use the validation loop as an example:
-
-.. code-block:: python
-
-    # put model in prediction mode
-    model.eval()
-    torch.set_grad_enabled(False)
-
-    val_outs = []
-    for val_batch in val_data:
-        # do something with each batch
-        out = validation_step(val_batch)
-        val_outs.append(out)
-
-    # do something with the outputs for all batches
-    # like calculate validation set accuracy or loss
-    validation_epoch_end(val_outs)
-
-    # put model back in train mode
-    model.train()
-    torch.set_grad_enabled(True)
-
-If we use dp or ddp2 mode, we can also define the ``XXX_step_end`` method to operate
-on all parts of the batch::
-
-    val_outs = []
-    for val_batch in val_data:
-        batches = split_batch(val_batch)
-        dp_outs = []
-        for sub_batch in batches:
-            dp_out = validation_step(sub_batch)
-            dp_outs.append(dp_out)
-
-        out = validation_step_end(dp_outs)
-        val_outs.append(out)
-
-    # do something with the outputs for all batches
-    # like calculate validation set accuracy or loss
-    validation_epoch_end(val_outs)
-
-
-Add validation loop
-^^^^^^^^^^^^^^^^^^^
-
-Thus, if we wanted to add a validation loop you would add this to your
-:class:`~LightningModule`:
-
-    >>> import pytorch_lightning as pl
-    >>> class LitModel(pl.LightningModule):
-    ...     def validation_step(self, batch, batch_idx):
-    ...         x, y = batch
-    ...         y_hat = self(x)
-    ...         loss = F.cross_entropy(y_hat, y)
-    ...         result = pl.EvalResult(checkpoint_on=loss)
-    ...         result.log('val_loss', loss)
-    ...         return result
-
-The equivalent expanded version (which you normally wouldn't need to use) is the following:
-
-    >>> import pytorch_lightning as pl
-    >>> class LitModel(pl.LightningModule):
-    ...     def validation_step(self, batch, batch_idx):
-    ...         x, y = batch
-    ...         y_hat = self(x)
-    ...         return {'val_loss': F.cross_entropy(y_hat, y)}
-    ...
-    ...     def validation_epoch_end(self, outputs):
-    ...         val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean()
-    ...         return {'val_loss': val_loss_mean}
-    ...
-    ...     def val_dataloader(self):
-    ...         # can also return a list of val dataloaders
-    ...         return DataLoader(...)
-
-Add test loop
-^^^^^^^^^^^^^
-
-    >>> import pytorch_lightning as pl
-    >>> class LitModel(pl.LightningModule):
-    ...     def test_step(self, batch, batch_idx):
-    ...         x, y = batch
-    ...         y_hat = self(x)
-    ...         loss = F.cross_entropy(y_hat, y)
-    ...         result = pl.EvalResult(checkpoint_on=loss)
-    ...         result.log('test_loss', loss)
-    ...         return result
-
-However, the test loop won't ever be called automatically to make sure you
-don't run your test data by accident. Instead you have to explicitly call:
-
-.. code-block:: python
-
-    # call after training
-    trainer = Trainer()
-    trainer.fit(model)
-    trainer.test(test_dataloaders=test_dataloader)
-
-    # or call with pretrained model
-    model = MyLightningModule.load_from_checkpoint(PATH)
-    trainer = Trainer()
-    trainer.test(model, test_dataloaders=test_dataloader)
-
--------------
-
-TrainResult
-^^^^^^^^^^^
-When you are using the `_step_end` and `_epoch_end` only for aggregating metrics and then logging,
-consider using either a `EvalResult` or `TrainResult` instead.
-
-Here's a training loop structure
-
-.. code-block:: python
-
-    def training_step(self, batch, batch_idx):
-        return {'loss': loss}
-
-    def training_epoch_end(self, training_step_outputs):
-        epoch_loss = torch.stack([x['loss'] for x in training_step_outputs]).mean()
-        return {
-            'log': {'epoch_loss': epoch_loss},
-            'progress_bar': {'epoch_loss': epoch_loss}
-        }
-
-using the equivalent syntax via the `TrainResult` object:
-
-.. code-block:: python
-
-    def training_step(self, batch_subset, batch_idx):
-        loss = ...
-        result = pl.TrainResult(minimize=loss)
-        result.log('train_loss', loss, prog_bar=True)
-        return result
-
-EvalResult
-^^^^^^^^^^
-Same for val/test loop
-
-.. code-block:: python
-
-    def validation_step(self, batch, batch_idx):
-        return {'some_metric': some_metric}
-
-    def validation_epoch_end(self, validation_step_outputs):
-        some_metric_mean = torch.stack([x['some_metric'] for x in validation_step_outputs]).mean()
-        return {
-            'log': {'some_metric_mean': some_metric_mean},
-            'progress_bar': {'some_metric_mean': some_metric_mean}
-        }
-
-With the equivalent using the `EvalResult` syntax
-
-.. code-block:: python
-
-    def validation_step(self, batch, batch_idx):
-        some_metric = ...
-        result = pl.EvalResult(checkpoint_on=some_metric)
-        result.log('some_metric', some_metric, prog_bar=True)
-        return result
-
-----------
-
-Training_step_end method
-------------------------
-When using :class:`~pytorch_lightning.overrides.data_parallel.LightningDataParallel` or
-:class:`~pytorch_lightning.overrides.data_parallel.LightningDistributedDataParallel`, the
-:meth:`~LightningModule.training_step`
-will be operating on a portion of the batch. This is normally okay but in special
-cases like calculating NCE loss using negative samples, we might want to
-perform a softmax across all samples in the batch.
-
-For these types of situations, each loop has an additional ``__step_end`` method
-which allows you to operate on the pieces of the batch:
-
-.. code-block:: python
-
-    training_outs = []
-    for train_batch in train_data:
-        # dp, ddp2 splits the batch
-        sub_batches = split_batches_for_dp(batch)
-
-        # run training_step on each piece of the batch
-        batch_parts_outputs = [training_step(sub_batch) for sub_batch in sub_batches]
-
-        # do softmax with all pieces
-        out = training_step_end(batch_parts_outputs)
-        training_outs.append(out)
-
-    # do something with the outputs for all batches
-    # like calculate validation set accuracy or loss
-    training_epoch_end(val_outs)
-
-----------
-
-Remove cuda calls
------------------
-In a :class:`~LightningModule`, all calls to ``.cuda()``
-and ``.to(device)`` should be removed. Lightning will do these
-automatically. This will allow your code to work on CPUs, TPUs and GPUs.
-
-When you init a new tensor in your code, just use :meth:`~torch.Tensor.type_as`:
-
-.. code-block:: python
-
-    def training_step(self, batch, batch_idx):
-        x, y = batch
-
-        # put the z on the appropriate gpu or tpu core
-        z = sample_noise()
-        z = z.type_as(x)
-
-----------
-
-Lifecycle
----------
-The methods in the :class:`~LightningModule` are called in this order:
-
-1. :meth:`~LightningModule.__init__`
-2. :meth:`~LightningModule.prepare_data`
-3. :meth:`~LightningModule.configure_optimizers`
-4. :meth:`~LightningModule.train_dataloader`
-
-If you define a validation loop then
-
-5. :meth:`~LightningModule.val_dataloader`
-
-And if you define a test loop:
-
-6. :meth:`~LightningModule.test_dataloader`
-
-Note:
-    :meth:`~LightningModule.test_dataloader` is only called with ``.test()``
-
-In every epoch, the loop methods are called in this frequency:
-
-1. :meth:`~LightningModule.validation_step` called every batch
-2. :meth:`~LightningModule.validation_epoch_end` called every epoch
-
-Live demo
----------
-Check out this
-`COLAB <https://colab.research.google.com/drive/1F_RNcHzTfFuQf-LeKvSlud6x7jXYkG31#scrollTo=HOk9c4_35FKg>`_
-for a live demo.
-
-LightningModule Class
----------------------
-
-"""
 
 from pytorch_lightning.core.datamodule import LightningDataModule
 from pytorch_lightning.core.lightning import LightningModule
diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py
index 2bf9c18cf7593b..c594b2f04bb8fc 100644
--- a/pytorch_lightning/core/hooks.py
+++ b/pytorch_lightning/core/hooks.py
@@ -121,7 +121,51 @@ def on_train_batch_end(self, batch: Any, batch_idx: int, dataloader_idx: int) ->
             batch_idx: the index of the batch
             dataloader_idx: the index of the dataloader
         """
-        # do something when the batch end
+        # do something when the batch ends
+
+    def on_validation_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        """
+        Called in the validation loop before anything happens for that batch.
+
+        Args:
+            batch: The batched data as it is returned by the training DataLoader.
+            batch_idx: the index of the batch
+            dataloader_idx: the index of the dataloader
+        """
+        # do something when the batch starts
+
+    def on_validation_batch_end(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        """
+        Called in the validation loop after the batch.
+
+        Args:
+            batch: The batched data as it is returned by the training DataLoader.
+            batch_idx: the index of the batch
+            dataloader_idx: the index of the dataloader
+        """
+        # do something when the batch ends
+
+    def on_test_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        """
+        Called in the test loop before anything happens for that batch.
+
+        Args:
+            batch: The batched data as it is returned by the training DataLoader.
+            batch_idx: the index of the batch
+            dataloader_idx: the index of the dataloader
+        """
+        # do something when the batch starts
+
+    def on_test_batch_end(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        """
+        Called in the test loop after the batch.
+
+        Args:
+            batch: The batched data as it is returned by the training DataLoader.
+            batch_idx: the index of the batch
+            dataloader_idx: the index of the dataloader
+        """
+        # do something when the batch ends
 
     def on_batch_start(self, batch: Any) -> None:
         """
diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index d23cde63f450eb..66d067a5146b62 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -24,6 +24,7 @@
 from pytorch_lightning.utilities import rank_zero_warn
 from pytorch_lightning.utilities.device_dtype_mixin import DeviceDtypeModuleMixin
 from pytorch_lightning.utilities.parsing import AttributeDict, collect_init_args, get_init_args
+from pytorch_lightning.core.step_result import TrainResult, EvalResult
 
 try:
     import torch_xla.core.xla_model as xm
@@ -68,6 +69,9 @@ def __init__(self, *args, **kwargs):
         #: True if using amp
         self.use_amp = False
 
+        #: The precision used
+        self.precision = 32
+
         # optionally can be set by user
         self._example_input_array = None
         self._datamodule = None
@@ -115,7 +119,6 @@ def forward(self, x):
         if self.trainer.is_global_zero:
             print(*args, **kwargs)
 
-    @abstractmethod
     def forward(self, *args, **kwargs):
         r"""
         Same as :meth:`torch.nn.Module.forward()`, however in Lightning you want this to define
@@ -168,7 +171,7 @@ def forward(self, batch):
 
         """
 
-    def training_step(self, *args, **kwargs) -> Union[int, Dict[str, Union[Tensor, Dict[str, Union[float, Tensor]]]]]:
+    def training_step(self, *args, **kwargs):
         r"""
         Here you compute and return the training loss and some additional metrics for e.g.
         the progress bar or logger.
@@ -182,69 +185,72 @@ def training_step(self, *args, **kwargs) -> Union[int, Dict[str, Union[Tensor, D
                 :paramref:`~pytorch_lightning.trainer.trainer.Trainer.truncated_bptt_steps` > 0.
 
         Return:
-            Dict with loss key and optional log or progress bar keys.
-            When implementing :meth:`training_step`, return whatever you need in that step:
+            :class:`~pytorch_lightning.core.step_result.TrainResult`
 
-            - loss -> tensor scalar **REQUIRED**
-            - progress_bar -> Dict for progress bar display. Must have either scalar tensors or Python scalars
-            - log -> Dict of metrics to add to logger. Must have either scalar tensors or Python scalars (no images, etc)
+            .. note:: :class:`~pytorch_lightning.core.step_result.TrainResult` is simply a Dict with convenient
+                functions for logging, distributed sync and error checking.
 
         In this step you'd normally do the forward pass and calculate the loss for a batch.
         You can also do fancier things like multiple forward passes or something model specific.
 
-        Examples:
-            .. code-block:: python
+        Example::
 
-                def training_step(self, batch, batch_idx):
-                    x, y, z = batch
+            def training_step(self, batch, batch_idx):
+                x, y, z = batch
 
-                    # implement your own
-                    out = self(x)
-                    loss = self.loss(out, x)
+                # implement your own
+                out = self(x)
+                loss = self.loss(out, x)
 
-                    logger_logs = {'training_loss': loss} # optional
+                # TrainResult auto-detaches the loss after the optimization steps are complete
+                result = pl.TrainResult(minimize=loss)
 
-                    # if using TestTubeLogger or TensorBoardLogger you can nest scalars
-                    logger_logs = {'losses': logger_logs} # optional
+        The return object :class:`~pytorch_lightning.core.step_result.TrainResult` controls where to log,
+        when to log (step or epoch) and syncing with multiple GPUs.
 
-                    output = {
-                        'loss': loss, # required
-                        'progress_bar': {'training_loss': loss}, # optional
-                        'log': logger_logs
-                    }
+        .. code-block:: python
 
-                    # return a dict
-                    return output
+            # log to progress bar and logger
+            result.log('train_loss', loss, prog_bar=True, logger=True)
 
-            If you define multiple optimizers, this step will be called with an additional
-            ``optimizer_idx`` parameter.
+            # sync metric value across GPUs in distributed training
+            result.log('train_loss_2', loss, sync_dist=True)
 
-            .. code-block:: python
+            # log to progress bar as well
+            result.log('train_loss_2', loss, prog_bar=True)
 
-                # Multiple optimizers (e.g.: GANs)
-                def training_step(self, batch, batch_idx, optimizer_idx):
-                    if optimizer_idx == 0:
-                        # do training_step with encoder
-                    if optimizer_idx == 1:
-                        # do training_step with decoder
+            # assign arbitrary values
+            result.predictions = predictions
+            result.some_value = 'some_value'
 
+        If you define multiple optimizers, this step will be called with an additional
+        ``optimizer_idx`` parameter.
 
-            If you add truncated back propagation through time you will also get an additional
-            argument with the hidden states of the previous step.
+        .. code-block:: python
 
-            .. code-block:: python
+            # Multiple optimizers (e.g.: GANs)
+            def training_step(self, batch, batch_idx, optimizer_idx):
+                if optimizer_idx == 0:
+                    # do training_step with encoder
+                if optimizer_idx == 1:
+                    # do training_step with decoder
+
+
+        If you add truncated back propagation through time you will also get an additional
+        argument with the hidden states of the previous step.
+
+        .. code-block:: python
 
-                # Truncated back-propagation through time
-                def training_step(self, batch, batch_idx, hiddens):
-                    # hiddens are the hidden states from the previous truncated backprop step
-                    ...
-                    out, hiddens = self.lstm(data, hiddens)
-                    ...
+            # Truncated back-propagation through time
+            def training_step(self, batch, batch_idx, hiddens):
+                # hiddens are the hidden states from the previous truncated backprop step
+                ...
+                out, hiddens = self.lstm(data, hiddens)
+                ...
 
-                    return {
-                        "loss": ...,
-                        "hiddens": hiddens  # remember to detach() this
-                    }
+                # TrainResult auto-detaches hiddens
+                result = pl.TrainResult(minimize=loss, hiddens=hiddens)
+                return result
 
         Notes:
             The loss value shown in the progress bar is smoothed (averaged) over the last values,
@@ -258,145 +264,122 @@ def training_end(self, *args, **kwargs):
             Deprecated in v0.7.0. Use  :meth:`training_step_end` instead.
         """
 
-    def training_epoch_end(
-        self, outputs: Union[List[Dict[str, Tensor]], List[List[Dict[str, Union[float, Tensor]]]]]
-    ) -> Dict[str, Dict[str, Union[float, Tensor]]]:
-        """Called at the end of the training epoch with the outputs of all training steps.
+    def training_step_end(self, *args, **kwargs):
+        """
+        Use this when training with dp or ddp2 because :meth:`training_step`
+        will operate on only part of the batch. However, this is still optional
+        and only needed for things like softmax or NCE loss.
+
+        Note:
+            If you later switch to ddp or some other mode, this will still be called
+            so that you don't have to change your code
 
         .. code-block:: python
 
-            # the pseudocode for these calls
-            train_outs = []
-            for train_batch in train_data:
-                out = training_step(train_batch)
-                train_outs.append(out)
-            training_epoch_end(train_outs)
+            # pseudocode
+            sub_batches = split_batches_for_dp(batch)
+            batch_parts_outputs = [training_step(sub_batch) for sub_batch in sub_batches]
+            training_step_end(batch_parts_outputs)
 
         Args:
-            outputs: List of outputs you defined in :meth:`training_step`, or if there are
-                multiple dataloaders, a list containing a list of outputs for each dataloader.
+            batch_parts_outputs: What you return in `training_step` for each batch part.
 
         Return:
-            Dict or OrderedDict.
-            May contain the following optional keys:
+            :class:`~pytorch_lightning.core.step_result.TrainResult`
 
-            - log (metrics to be added to the logger; only tensors)
-            - progress_bar (dict for progress bar display)
-            - any metric used in a callback (e.g. early stopping).
+            .. note:: :class:`~pytorch_lightning.core.step_result.TrainResult` is simply a Dict with convenient
+                functions for logging, distributed sync and error checking.
 
-        Note:
-            If this method is not overridden, this won't be called.
+        When using dp/ddp2 distributed backends, only a portion of the batch is inside the training_step:
 
-        - The outputs here are strictly for logging or progress bar.
-        - If you don't need to display anything, don't return anything.
-        - If you want to manually set current step, you can specify the 'step' key in the 'log' dict.
+        .. code-block:: python
 
-        Examples:
-            With a single dataloader:
+            def training_step(self, batch, batch_idx):
+                # batch is 1/num_gpus big
+                x, y = batch
 
-            .. code-block:: python
+                out = self(x)
 
-                def training_epoch_end(self, outputs):
-                    train_acc_mean = 0
-                    for output in outputs:
-                        train_acc_mean += output['train_acc']
+                # softmax uses only a portion of the batch in the denomintaor
+                loss = self.softmax(out)
+                loss = nce_loss(loss)
+                return pl.TrainResult(loss)
 
-                    train_acc_mean /= len(outputs)
+        If you wish to do something with all the parts of the batch, then use this method to do it:
 
-                    # log training accuracy at the end of an epoch
-                    results = {
-                        'log': {'train_acc': train_acc_mean.item()},
-                        'progress_bar': {'train_acc': train_acc_mean},
-                    }
-                    return results
+        .. code-block:: python
 
-            With multiple dataloaders, ``outputs`` will be a list of lists. The outer list contains
-            one entry per dataloader, while the inner list contains the individual outputs of
-            each training step for that dataloader.
+            def training_step(self, batch, batch_idx):
+                # batch is 1/num_gpus big
+                x, y = batch
 
-            .. code-block:: python
+                out = self(x)
+                result = pl.TrainResult()
+                result.out = out
 
-                def training_epoch_end(self, outputs):
-                    train_acc_mean = 0
-                    i = 0
-                    for dataloader_outputs in outputs:
-                        for output in dataloader_outputs:
-                            train_acc_mean += output['train_acc']
-                            i += 1
-
-                    train_acc_mean /= i
-
-                    # log training accuracy at the end of an epoch
-                    results = {
-                        'log': {'train_acc': train_acc_mean.item(), 'step': self.current_epoch}
-                        'progress_bar': {'train_acc': train_acc_mean},
-                    }
-                    return results
-        """
+            def training_step_end(self, training_step_outputs):
+                # this out is now the full size of the batch
+                all_outs = training_step_outputs.out
+
+                # this softmax now uses the full batch
+                loss = nce_loss(all_outs)
+                result = pl.TrainResult(loss)
+                return result
 
-    def training_step_end(self, *args, **kwargs) -> Dict[str, Union[Tensor, Dict[str, Union[float, Tensor]]]]:
+        See Also:
+            See the :ref:`multi-gpu-training` guide for more details.
         """
-        Use this when training with dp or ddp2 because :meth:`training_step`
-        will operate on only part of the batch. However, this is still optional
-        and only needed for things like softmax or NCE loss.
 
-        Note:
-            If you later switch to ddp or some other mode, this will still be called
-            so that you don't have to change your code
+    def training_epoch_end(
+            self, outputs: Union[TrainResult, List[TrainResult]]
+    ):
+        """
+        Called at the end of the training epoch with the outputs of all training steps.
+        Use this in case you need to do something with all the outputs for every training_step.
 
         .. code-block:: python
 
-            # pseudocode
-            sub_batches = split_batches_for_dp(batch)
-            batch_parts_outputs = [training_step(sub_batch) for sub_batch in sub_batches]
-            training_step_end(batch_parts_outputs)
+            # the pseudocode for these calls
+            train_outs = []
+            for train_batch in train_data:
+                out = training_step(train_batch)
+                train_outs.append(out)
+            training_epoch_end(train_outs)
 
         Args:
-            batch_parts_outputs: What you return in `training_step` for each batch part.
+            outputs: List of outputs you defined in :meth:`training_step`, or if there are
+                multiple dataloaders, a list containing a list of outputs for each dataloader.
 
         Return:
-            Dict with loss key and optional log or progress bar keys.
+            :class:`~pytorch_lightning.core.step_result.TrainResult`
 
-            - loss -> tensor scalar **REQUIRED**
-            - progress_bar -> Dict for progress bar display. Must have either scalar tensors or Python scalars
-            - log -> Dict of metrics to add to logger. Must have either scalar tensors or Python scalars (no images, etc)
+        .. note:: :class:`~pytorch_lightning.core.step_result.TrainResult` is simply a Dict with convenient
+            functions for logging, distributed sync and error checking.
 
-        Examples:
-            .. code-block:: python
-
-                # WITHOUT training_step_end
-                # if used in DP or DDP2, this batch is 1/num_gpus large
-                def training_step(self, batch, batch_idx):
-                    # batch is 1/num_gpus big
-                    x, y = batch
-
-                    out = self(x)
-                    loss = self.softmax(out)
-                    loss = nce_loss(loss)
-                    return {'loss': loss}
+        Note:
+            If this method is not overridden, this won't be called.
 
-                # --------------
-                # with training_step_end to do softmax over the full batch
-                def training_step(self, batch, batch_idx):
-                    # batch is 1/num_gpus big
-                    x, y = batch
+        Example::
 
-                    out = self(x)
-                    return {'out': out}
+            def training_epoch_end(self, training_step_outputs):
+                # do something with all training_step outputs
+                return result
 
-                def training_step_end(self, outputs):
-                    # this out is now the full size of the batch
-                    out = outputs['out']
+        With multiple dataloaders, ``outputs`` will be a list of lists. The outer list contains
+        one entry per dataloader, while the inner list contains the individual outputs of
+        each training step for that dataloader.
 
-                    # this softmax now uses the full batch size
-                    loss = nce_loss(loss)
-                    return {'loss': loss}
+        .. code-block:: python
 
-        See Also:
-            See the :ref:`multi-gpu-training` guide for more details.
+            def training_epoch_end(self, outputs):
+                epoch_result = pl.TrainResult()
+                for train_result in outputs:
+                    all_losses = train_result.minimize
+                    # do something with all losses
+                return results
         """
 
-    def validation_step(self, *args, **kwargs) -> Dict[str, Union[float, Tensor]]:
+    def validation_step(self, *args, **kwargs) -> EvalResult:
         r"""
         Operates on a single batch of data from the validation set.
         In this step you'd might generate examples or calculate anything of interest like accuracy.
@@ -418,8 +401,7 @@ def validation_step(self, *args, **kwargs) -> Dict[str, Union[float, Tensor]]:
                 (only if multiple val datasets used)
 
         Return:
-            Dict or OrderedDict - passed to :meth:`validation_epoch_end`.
-            If you defined :meth:`validation_step_end` it will go to that first.
+            :class:`~pytorch_lightning.core.step_result.TrainResult`
 
         .. code-block:: python
 
@@ -459,15 +441,10 @@ def validation_step(self, batch, batch_idx):
                     labels_hat = torch.argmax(out, dim=1)
                     val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
 
-                    # all optional...
-                    # return whatever you need for the collation function validation_epoch_end
-                    output = OrderedDict({
-                        'val_loss': loss_val,
-                        'val_acc': torch.tensor(val_acc), # everything must be a tensor
-                    })
-
-                    # return an optional dict
-                    return output
+                    # log the outputs!
+                    result = pl.EvalResult(checkpoint_on=loss)
+                    result.log_dict({'val_loss': loss, 'val_acc': val_acc})
+                    return result
 
             If you pass in multiple val datasets, validation_step will have an additional argument.
 
@@ -486,7 +463,7 @@ def validation_step(self, batch, batch_idx, dataloader_idx):
             the model goes back to training mode and gradients are enabled.
         """
 
-    def validation_step_end(self, *args, **kwargs) -> Dict[str, Union[float, Tensor]]:
+    def validation_step_end(self, *args, **kwargs) -> EvalResult:
         """
         Use this when validating with dp or ddp2 because :meth:`validation_step`
         will operate on only part of the batch. However, this is still optional
@@ -508,38 +485,42 @@ def validation_step_end(self, *args, **kwargs) -> Dict[str, Union[float, Tensor]
                 for each batch part.
 
         Return:
-           Dict or OrderedDict - passed to the :meth:`validation_epoch_end` method.
-
-        Examples:
-            .. code-block:: python
-
-                # WITHOUT validation_step_end
-                # if used in DP or DDP2, this batch is 1/num_gpus large
-                def validation_step(self, batch, batch_idx):
-                    # batch is 1/num_gpus big
-                    x, y = batch
-
-                    out = self(x)
-                    loss = self.softmax(out)
-                    loss = nce_loss(loss)
-                    return {'loss': loss}
+            :class:`~pytorch_lightning.core.step_result.TrainResult`
 
-                # --------------
-                # with validation_step_end to do softmax over the full batch
-                def validation_step(self, batch, batch_idx):
-                    # batch is 1/num_gpus big
-                    x, y = batch
-
-                    out = self(x)
-                    return {'out': out}
-
-                def validation_epoch_end(self, outputs):
-                    # this out is now the full size of the batch
-                    out = outputs['out']
+        .. code-block:: python
 
-                    # this softmax now uses the full batch size
-                    loss = nce_loss(loss)
-                    return {'loss': loss}
+            # WITHOUT validation_step_end
+            # if used in DP or DDP2, this batch is 1/num_gpus large
+            def validation_step(self, batch, batch_idx):
+                # batch is 1/num_gpus big
+                x, y = batch
+
+                out = self(x)
+                loss = self.softmax(out)
+                loss = nce_loss(loss)
+                result = pl.EvalResult()
+                result.log('val_loss', loss)
+                return result
+
+            # --------------
+            # with validation_step_end to do softmax over the full batch
+            def validation_step(self, batch, batch_idx):
+                # batch is 1/num_gpus big
+                x, y = batch
+
+                out = self(x)
+                result = pl.EvalResult()
+                result.out = out
+                return result
+
+            def validation_epoch_end(self, output_results):
+                # this out is now the full size of the batch
+                all_val_step_outs = output_results.out
+                loss = nce_loss(all_val_step_outs)
+
+                result = pl.EvalResult(checkpoint_on=loss)
+                result.log('val_loss', loss)
+                return result
 
         See Also:
             See the :ref:`multi-gpu-training` guide for more details.
@@ -553,8 +534,8 @@ def validation_end(self, outputs):
         """
 
     def validation_epoch_end(
-        self, outputs: Union[List[Dict[str, Union[float, Tensor]]], List[List[Dict[str, Union[float, Tensor]]]]]
-    ) -> Dict[str, Dict[str, Union[float, Tensor]]]:
+            self, outputs: Union[EvalResult, List[EvalResult]]
+    ) -> EvalResult:
         """
         Called at the end of the validation epoch with the outputs of all validation steps.
 
@@ -572,38 +553,25 @@ def validation_epoch_end(
                 are multiple dataloaders, a list containing a list of outputs for each dataloader.
 
         Return:
-            Dict or OrderedDict.
-            May have the following optional keys:
-
-            - progress_bar (dict for progress bar display; either scalar tensors or Python scalars)
-            - log (dict of metrics to add to logger; either scalar tensors or Python scalars).
+            :class:`~pytorch_lightning.core.step_result.TrainResult`
 
         Note:
             If you didn't define a :meth:`validation_step`, this won't be called.
 
         - The outputs here are strictly for logging or progress bar.
         - If you don't need to display anything, don't return anything.
-        - If you want to manually set current step, you can specify the 'step' key in the 'log' dict.
 
         Examples:
             With a single dataloader:
 
             .. code-block:: python
 
-                def validation_epoch_end(self, outputs):
-                    val_acc_mean = 0
-                    for output in outputs:
-                        val_acc_mean += output['val_acc']
-
-                    val_acc_mean /= len(outputs)
-                    tqdm_dict = {'val_acc': val_acc_mean.item()}
-
-                    # show val_acc in progress bar but only log val_loss
-                    results = {
-                        'progress_bar': tqdm_dict,
-                        'log': {'val_acc': val_acc_mean.item()}
-                    }
-                    return results
+                def validation_epoch_end(self, val_step_outputs):
+                    # do something with the outputs of all val batches
+                    all_val_preds = val_step_outputs.predictions
+
+                    val_step_outputs.some_result = calc_all_results(all_val_preds)
+                    return val_step_outputs
 
             With multiple dataloaders, `outputs` will be a list of lists. The outer list contains
             one entry per dataloader, while the inner list contains the individual outputs of
@@ -612,25 +580,15 @@ def validation_epoch_end(self, outputs):
             .. code-block:: python
 
                 def validation_epoch_end(self, outputs):
-                    val_acc_mean = 0
-                    i = 0
-                    for dataloader_outputs in outputs:
-                        for output in dataloader_outputs:
-                            val_acc_mean += output['val_acc']
-                            i += 1
-
-                    val_acc_mean /= i
-                    tqdm_dict = {'val_acc': val_acc_mean.item()}
-
-                    # show val_loss and val_acc in progress bar but only log val_loss
-                    results = {
-                        'progress_bar': tqdm_dict,
-                        'log': {'val_acc': val_acc_mean.item(), 'step': self.current_epoch}
-                    }
-                    return results
+                    for dataloader_output_result in outputs:
+                        dataloader_outs = dataloader_output_result.dataloader_i_outputs
+
+                    result = pl.EvalResult()
+                    result.log('final_metric', final_value)
+                    return result
         """
 
-    def test_step(self, *args, **kwargs) -> Dict[str, Union[float, Tensor]]:
+    def test_step(self, *args, **kwargs) -> EvalResult:
         r"""
         Operates on a single batch of data from the test set.
         In this step you'd normally generate examples or calculate anything of interest
@@ -653,8 +611,7 @@ def test_step(self, *args, **kwargs) -> Dict[str, Union[float, Tensor]]:
                 (only if multiple test datasets used).
 
         Return:
-            Dict or OrderedDict - passed to the :meth:`test_epoch_end` method.
-            If you defined :meth:`test_step_end` it will go to that first.
+            :class:`~pytorch_lightning.core.step_result.TrainResult`
 
         .. code-block:: python
 
@@ -683,17 +640,12 @@ def test_step(self, batch, batch_idx):
 
                     # calculate acc
                     labels_hat = torch.argmax(out, dim=1)
-                    val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
-
-                    # all optional...
-                    # return whatever you need for the collation function test_epoch_end
-                    output = OrderedDict({
-                        'val_loss': loss_val,
-                        'val_acc': torch.tensor(val_acc), # everything must be a tensor
-                    })
+                    test_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
 
-                    # return an optional dict
-                    return output
+                    # log the outputs!
+                    result = pl.EvalResult(checkpoint_on=loss)
+                    result.log_dict({'test_loss': loss, 'test_acc': test_acc})
+                    return resultt
 
             If you pass in multiple validation datasets, :meth:`test_step` will have an additional
             argument.
@@ -713,7 +665,7 @@ def test_step(self, batch, batch_idx, dataloader_idx):
             to training mode and gradients are enabled.
         """
 
-    def test_step_end(self, *args, **kwargs) -> Dict[str, Union[float, Tensor]]:
+    def test_step_end(self, *args, **kwargs) -> EvalResult:
         """
         Use this when testing with dp or ddp2 because :meth:`test_step` will operate
         on only part of the batch. However, this is still optional
@@ -734,38 +686,42 @@ def test_step_end(self, *args, **kwargs) -> Dict[str, Union[float, Tensor]]:
             batch_parts_outputs: What you return in :meth:`test_step` for each batch part.
 
         Return:
-             Dict or OrderedDict - passed to the :meth:`test_epoch_end`.
-
-        Examples:
-            .. code-block:: python
-
-                # WITHOUT test_step_end
-                # if used in DP or DDP2, this batch is 1/num_gpus large
-                def test_step(self, batch, batch_idx):
-                    # batch is 1/num_gpus big
-                    x, y = batch
-
-                    out = self(x)
-                    loss = self.softmax(out)
-                    loss = nce_loss(loss)
-                    return {'loss': loss}
-
-                # --------------
-                # with test_step_end to do softmax over the full batch
-                def test_step(self, batch, batch_idx):
-                    # batch is 1/num_gpus big
-                    x, y = batch
+            :class:`~pytorch_lightning.core.step_result.TrainResult`
 
-                    out = self(x)
-                    return {'out': out}
-
-                def test_step_end(self, outputs):
-                    # this out is now the full size of the batch
-                    out = outputs['out']
+        .. code-block:: python
 
-                    # this softmax now uses the full batch size
-                    loss = nce_loss(loss)
-                    return {'loss': loss}
+            # WITHOUT test_step_end
+            # if used in DP or DDP2, this batch is 1/num_gpus large
+            def test_step(self, batch, batch_idx):
+                # batch is 1/num_gpus big
+                x, y = batch
+
+                out = self(x)
+                loss = self.softmax(out)
+                loss = nce_loss(loss)
+                result = pl.EvalResult()
+                result.log('test_loss', loss)
+                return result
+
+            # --------------
+            # with test_step_end to do softmax over the full batch
+            def test_step(self, batch, batch_idx):
+                # batch is 1/num_gpus big
+                x, y = batch
+
+                out = self(x)
+                result = pl.EvalResult()
+                result.out = out
+                return result
+
+            def test_epoch_end(self, output_results):
+                # this out is now the full size of the batch
+                all_test_step_outs = output_results.out
+                loss = nce_loss(all_test_step_outs)
+
+                result = pl.EvalResult(checkpoint_on=loss)
+                result.log('test_loss', loss)
+                return result
 
         See Also:
             See the :ref:`multi-gpu-training` guide for more details.
@@ -779,8 +735,9 @@ def test_end(self, outputs):
         """
 
     def test_epoch_end(
-        self, outputs: Union[List[Dict[str, Union[float, Tensor]]], List[List[Dict[str, Union[float, Tensor]]]]]
-    ) -> Dict[str, Dict[str, Union[float, Tensor]]]:
+            self, outputs: Union[EvalResult, List[EvalResult]]
+    ) -> EvalResult:
+
         """
         Called at the end of a test epoch with the output of all test steps.
 
@@ -798,17 +755,13 @@ def test_epoch_end(
                 are multiple dataloaders, a list containing a list of outputs for each dataloader
 
         Return:
-            Dict or OrderedDict: Dict has the following optional keys:
-
-            - progress_bar -> Dict for progress bar display. Must have either scalar tensors or Python scalars.
-            - log -> Dict of metrics to add to logger. Must have either scalar tensors or Python scalars (no images, etc).
+            :class:`~pytorch_lightning.core.step_result.TrainResult`
 
         Note:
             If you didn't define a :meth:`test_step`, this won't be called.
 
         - The outputs here are strictly for logging or progress bar.
         - If you don't need to display anything, don't return anything.
-        - If you want to manually set current step, specify it with the 'step' key in the 'log' Dict
 
         Examples:
             With a single dataloader:
@@ -816,19 +769,11 @@ def test_epoch_end(
             .. code-block:: python
 
                 def test_epoch_end(self, outputs):
-                    test_acc_mean = 0
-                    for output in outputs:
-                        test_acc_mean += output['test_acc']
-
-                    test_acc_mean /= len(outputs)
-                    tqdm_dict = {'test_acc': test_acc_mean.item()}
-
-                    # show test_loss and test_acc in progress bar but only log test_loss
-                    results = {
-                        'progress_bar': tqdm_dict,
-                        'log': {'test_acc': test_acc_mean.item()}
-                    }
-                    return results
+                    # do something with the outputs of all test batches
+                    all_test_preds = test_step_outputs.predictions
+
+                    test_step_outputs.some_result = calc_all_results(all_test_preds)
+                    return test_step_outputs
 
             With multiple dataloaders, `outputs` will be a list of lists. The outer list contains
             one entry per dataloader, while the inner list contains the individual outputs of
@@ -837,21 +782,11 @@ def test_epoch_end(self, outputs):
             .. code-block:: python
 
                 def test_epoch_end(self, outputs):
-                    test_acc_mean = 0
-                    i = 0
-                    for dataloader_outputs in outputs:
-                        for output in dataloader_outputs:
-                            test_acc_mean += output['test_acc']
-                            i += 1
-
-                    test_acc_mean /= i
-                    tqdm_dict = {'test_acc': test_acc_mean.item()}
-
-                    # show test_loss and test_acc in progress bar but only log test_loss
-                    results = {
-                        'progress_bar': tqdm_dict,
-                        'log': {'test_acc': test_acc_mean.item(), 'step': self.current_epoch}
-                    }
+                    for dataloader_output_result in outputs:
+                        dataloader_outs = dataloader_output_result.dataloader_i_outputs
+
+                    result = pl.EvalResult()
+                    result.log('final_metric', final_value)
                     return results
         """
 
@@ -889,6 +824,7 @@ def configure_ddp(self, model, device_ids):
         return model
 
     def _init_slurm_connection(self) -> None:
+        """"""
         """
         Sets up environment variables necessary for pytorch distributed communications
         based on slurm environment.
@@ -957,6 +893,10 @@ def init_ddp_connection(self, global_rank: int, world_size: int, is_slurm_managi
         log.info(f"initializing ddp: GLOBAL_RANK: {global_rank}, MEMBER: {global_rank+1}/{world_size}")
         torch_distrib.init_process_group(torch_backend, rank=global_rank, world_size=world_size)
 
+    """
+    configure_sync_batchnorm
+    ^^^^^^^^^^^^^^^^^^^^^^^^
+    """
     def configure_sync_batchnorm(self, model: 'LightningModule') -> 'LightningModule':
         """
         Add global batchnorm for a model spread across multiple GPUs and nodes.
@@ -1632,6 +1572,7 @@ def get_tqdm_dict(self) -> Dict[str, Union[int, str]]:
 
     @classmethod
     def _auto_collect_arguments(cls, frame=None) -> Tuple[Dict, Dict]:
+        """"""
         """
         Collect all module arguments in the current constructor and all child constructors.
         The child constructors are all the ``__init__`` methods that reach the current class through
@@ -1792,6 +1733,7 @@ def hparams(self, hp: Union[dict, Namespace, Any]):
         self._set_hparams(hp)
 
     def __get_hparams_assignment_variable(self):
+        """"""
         """
         looks at the code of the class to figure out what the user named self.hparams
         this only happens when the user explicitly sets self.hparams
diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index 8b482f04361015..ea62fdab2e9960 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -25,7 +25,7 @@ def __init__(
         if checkpoint_on is not None and checkpoint_on:
             self.checkpoint_on = checkpoint_on
         if hiddens is not None:
-            self.hiddens = hiddens
+            self.hiddens = hiddens.detach()
         if minimize is not None:
             err = 'Minimize can only be used in training_step, training_step_end, training_epoch_end'
             self._assert_grad_tensor_metric('minimize', minimize, err)
@@ -59,7 +59,7 @@ def __getattr__(self, key: str) -> Any:
 
     def __setattr__(self, key: str, val: Union[Tensor, Any]):
         # ensure reserve keys are tensors and detached
-        if key in {'hiddens', 'checkpoint_on', 'early_stop_on'}:
+        if key in {'checkpoint_on', 'early_stop_on'}:
             self._assert_tensor_metric(key, val)
             if val is not None and isinstance(val, torch.Tensor):
                 val = val.detach()
@@ -95,17 +95,17 @@ def log(
             tbptt_reduce_fx: Callable = torch.mean,
             tbptt_pad_token: int = 0,
             enable_graph: bool = False,
-            sync_ddp: bool = False,
-            sync_ddp_op: Union[Any, str] = 'mean',
-            sync_ddp_group: Optional[Any] = None
+            sync_dist: bool = False,
+            sync_dist_op: Union[Any, str] = 'mean',
+            sync_dist_group: Optional[Any] = None
     ):
         # no metrics should be logged with graphs
         if not enable_graph and isinstance(value, torch.Tensor):
             value = value.detach()
 
         # sync across ddp
-        if sync_ddp and isinstance(value, (torch.Tensor, numbers.Number)):
-            value = _sync_ddp_if_available(value, group=sync_ddp_group, reduce_op=sync_ddp_op)
+        if sync_dist and isinstance(value, (torch.Tensor, numbers.Number)):
+            value = _sync_ddp_if_available(value, group=sync_dist_group, reduce_op=sync_dist_op)
 
         if 'meta' not in self:
             self.__setitem__('meta', {})
@@ -450,9 +450,9 @@ def log(
             tbptt_reduce_fx: Callable = torch.mean,
             tbptt_pad_token: int = 0,
             enable_graph: bool = False,
-            sync_ddp: bool = False,
-            sync_ddp_op: Union[Any, str] = 'mean',
-            sync_ddp_group: Optional[Any] = None
+            sync_dist: bool = False,
+            sync_dist_op: Union[Any, str] = 'mean',
+            sync_dist_group: Optional[Any] = None
     ):
         """
         Log a key, value
@@ -485,9 +485,9 @@ def log(
             tbptt_reduce_fx: function to reduce on truncated back prop
             tbptt_pad_token: token to use for padding
             enable_graph: if True, will not auto detach the graph
-            sync_ddp: if True, reduces the metric across GPUs/TPUs
-            sync_ddp_op: the op to sync across
-            sync_ddp_group: the ddp group
+            sync_dist: if True, reduces the metric across GPUs/TPUs
+            sync_dist_op: the op to sync across
+            sync_dist_group: the ddp group
         """
         super().log(name=name,
                     value=value,
@@ -497,9 +497,9 @@ def log(
                     on_epoch=on_epoch,
                     reduce_fx=reduce_fx,
                     enable_graph=enable_graph,
-                    sync_ddp=sync_ddp,
-                    sync_ddp_group=sync_ddp_group,
-                    sync_ddp_op=sync_ddp_op,
+                    sync_dist=sync_dist,
+                    sync_dist_group=sync_dist_group,
+                    sync_dist_op=sync_dist_op,
                     tbptt_pad_token=tbptt_pad_token,
                     tbptt_reduce_fx=tbptt_reduce_fx)
 
@@ -514,9 +514,9 @@ def log_dict(
             tbptt_reduce_fx: Callable = torch.mean,
             tbptt_pad_token: int = 0,
             enable_graph: bool = False,
-            sync_ddp: bool = False,
-            sync_ddp_op: Union[Any, str] = 'mean',
-            sync_ddp_group: Optional[Any] = None
+            sync_dist: bool = False,
+            sync_dist_op: Union[Any, str] = 'mean',
+            sync_dist_group: Optional[Any] = None
     ):
         """
         Log a dictonary of values at once
@@ -536,9 +536,9 @@ def log_dict(
             tbptt_reduce_fx: function to reduce on truncated back prop
             tbptt_pad_token: token to use for padding
             enable_graph: if True, will not auto detach the graph
-            sync_ddp: if True, reduces the metric across GPUs/TPUs
-            sync_ddp_op: the op to sync across
-            sync_ddp_group: the ddp group:
+            sync_dist: if True, reduces the metric across GPUs/TPUs
+            sync_dist_op: the op to sync across
+            sync_dist_group: the ddp group:
         """
         for k, v in dictionary.items():
             self.log(name=k,
@@ -549,9 +549,9 @@ def log_dict(
                      on_epoch=on_epoch,
                      reduce_fx=reduce_fx,
                      enable_graph=enable_graph,
-                     sync_ddp=sync_ddp,
-                     sync_ddp_group=sync_ddp_group,
-                     sync_ddp_op=sync_ddp_op,
+                     sync_dist=sync_dist,
+                     sync_dist_group=sync_dist_group,
+                     sync_dist_op=sync_dist_op,
                      tbptt_pad_token=tbptt_pad_token,
                      tbptt_reduce_fx=tbptt_reduce_fx)
 
@@ -602,9 +602,9 @@ def log(
             tbptt_reduce_fx: Callable = torch.mean,
             tbptt_pad_token: int = 0,
             enable_graph: bool = False,
-            sync_ddp: bool = False,
-            sync_ddp_op: Union[Any, str] = 'mean',
-            sync_ddp_group: Optional[Any] = None
+            sync_dist: bool = False,
+            sync_dist_op: Union[Any, str] = 'mean',
+            sync_dist_group: Optional[Any] = None
     ):
         """
         Log a key, value
@@ -636,9 +636,9 @@ def log(
             tbptt_reduce_fx: function to reduce on truncated back prop
             tbptt_pad_token: token to use for padding
             enable_graph: if True, will not auto detach the graph
-            sync_ddp: if True, reduces the metric across GPUs/TPUs
-            sync_ddp_op: the op to sync across
-            sync_ddp_group: the ddp group
+            sync_dist: if True, reduces the metric across GPUs/TPUs
+            sync_dist_op: the op to sync across
+            sync_dist_group: the ddp group
         """
         super().log(name=name,
                     value=value,
@@ -648,9 +648,9 @@ def log(
                     on_epoch=on_epoch,
                     reduce_fx=reduce_fx,
                     enable_graph=enable_graph,
-                    sync_ddp=sync_ddp,
-                    sync_ddp_group=sync_ddp_group,
-                    sync_ddp_op=sync_ddp_op,
+                    sync_dist=sync_dist,
+                    sync_dist_group=sync_dist_group,
+                    sync_dist_op=sync_dist_op,
                     tbptt_pad_token=tbptt_pad_token,
                     tbptt_reduce_fx=tbptt_reduce_fx)
 
@@ -665,9 +665,9 @@ def log_dict(
             tbptt_reduce_fx: Callable = torch.mean,
             tbptt_pad_token: int = 0,
             enable_graph: bool = False,
-            sync_ddp: bool = False,
-            sync_ddp_op: Union[Any, str] = 'mean',
-            sync_ddp_group: Optional[Any] = None
+            sync_dist: bool = False,
+            sync_dist_op: Union[Any, str] = 'mean',
+            sync_dist_group: Optional[Any] = None
     ):
         """
         Log a dictonary of values at once
@@ -687,9 +687,9 @@ def log_dict(
             tbptt_reduce_fx: function to reduce on truncated back prop
             tbptt_pad_token: token to use for padding
             enable_graph: if True, will not auto detach the graph
-            sync_ddp: if True, reduces the metric across GPUs/TPUs
-            sync_ddp_op: the op to sync across
-            sync_ddp_group: the ddp group
+            sync_dist: if True, reduces the metric across GPUs/TPUs
+            sync_dist_op: the op to sync across
+            sync_dist_group: the ddp group
         """
         for k, v in dictionary.items():
             self.log(name=k,
@@ -700,9 +700,9 @@ def log_dict(
                      on_epoch=on_epoch,
                      reduce_fx=reduce_fx,
                      enable_graph=enable_graph,
-                     sync_ddp=sync_ddp,
-                     sync_ddp_group=sync_ddp_group,
-                     sync_ddp_op=sync_ddp_op,
+                     sync_dist=sync_dist,
+                     sync_dist_group=sync_dist_group,
+                     sync_dist_op=sync_dist_op,
                      tbptt_pad_token=tbptt_pad_token,
                      tbptt_reduce_fx=tbptt_reduce_fx)
 
diff --git a/pytorch_lightning/trainer/distrib_parts.py b/pytorch_lightning/trainer/distrib_parts.py
index f76c6f1b008dea..1d22d1de45cb0a 100644
--- a/pytorch_lightning/trainer/distrib_parts.py
+++ b/pytorch_lightning/trainer/distrib_parts.py
@@ -71,6 +71,7 @@ class TrainerDPMixin(ABC):
     amp_level: str
     precision: ...
     global_rank: int
+    local_rank: int
     tpu_local_core_rank: int
     tpu_global_core_rank: int
     use_tpu: bool
@@ -129,6 +130,9 @@ def copy_trainer_model_properties(self, model):
             m.use_tpu = self.use_tpu
             m.tpu_local_core_rank = self.tpu_local_core_rank
             m.tpu_global_core_rank = self.tpu_global_core_rank
+            m.precision = self.precision
+            m.global_rank = self.global_rank
+            m.local_rank = self.local_rank
 
     def transfer_batch_to_tpu(self, batch: Any, tpu_id: Optional[int] = None):
         """
diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py
index 5ce7b7718c2a7e..433ea970877db6 100644
--- a/pytorch_lightning/trainer/evaluation_loop.py
+++ b/pytorch_lightning/trainer/evaluation_loop.py
@@ -312,9 +312,16 @@ def _evaluate(
                 # callbacks
                 if test_mode:
                     self.on_test_batch_start(batch, batch_idx, dataloader_idx)
+                    if self.is_overridden('on_test_batch_start'):
+                        model_ref = self.get_model()
+                        with self.profiler.profile('on_test_batch_start'):
+                            model_ref.on_test_batch_start(output)
                 else:
                     self.on_validation_batch_start(batch, batch_idx, dataloader_idx)
-
+                    if self.is_overridden('on_validation_batch_start'):
+                        model_ref = self.get_model()
+                        with self.profiler.profile('on_validation_batch_start'):
+                            model_ref.on_validation_batch_start(output)
                 # -----------------
                 # RUN EVALUATION STEP
                 # -----------------
@@ -335,13 +342,25 @@ def _evaluate(
                         model_ref = self.get_model()
                         with self.profiler.profile('test_step_end'):
                             output = model_ref.test_step_end(output)
-                    self.on_test_batch_end(batch, batch_idx, dataloader_idx)
                 else:
                     if self.is_overridden('validation_step_end'):
                         model_ref = self.get_model()
                         with self.profiler.profile('validation_step_end'):
                             output = model_ref.validation_step_end(output)
+
+                # callbacks (on __batch_end)
+                if test_mode:
+                    self.on_test_batch_end(batch, batch_idx, dataloader_idx)
+                    if self.is_overridden('on_test_batch_end'):
+                        model_ref = self.get_model()
+                        with self.profiler.profile('on_test_batch_end'):
+                            model_ref.on_test_batch_end(output)
+                else:
                     self.on_validation_batch_end(batch, batch_idx, dataloader_idx)
+                    if self.is_overridden('on_validation_batch_end'):
+                        model_ref = self.get_model()
+                        with self.profiler.profile('on_validation_batch_end'):
+                            model_ref.on_validation_batch_end(output)
 
                 # track outputs for collation
                 if output is not None:
diff --git a/tests/core/test_results.py b/tests/core/test_results.py
index 743a6d89153436..0630838a871ef0 100644
--- a/tests/core/test_results.py
+++ b/tests/core/test_results.py
@@ -21,7 +21,7 @@ def _ddp_test_fn(rank, worldsize, result_cls: Result):
     tensor = torch.tensor([1.0])
 
     res = result_cls()
-    res.log("test_tensor", tensor, sync_ddp=True, sync_ddp_op=torch.distributed.ReduceOp.SUM)
+    res.log("test_tensor", tensor, sync_dist=True, sync_dist_op=torch.distributed.ReduceOp.SUM)
 
     assert res["test_tensor"].item() == dist.get_world_size(), "Result-Log does not work properly with DDP and Tensors"