diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9303655..faffb71 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,30 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
+## [0.4.0] - 2021-02-21
+
+- Increased minimum versions of dependencies (`transformers` to 4.3.0, `pytorch-lightning` to 1.2.0)
+  - Remove dependency on `tokenizers` as `transformers` pins it.
+- Made Fast tokenizers the default (as it is the default in `transformers` 4.0.0)
+- Made serialized tokenizers the default for custom tokenizers, and added support for loading them for both `aitextgen` and `TokenDataset`s
+- Added gradient checkpointing for GPT-2, and set it to the default for training 355M and 774M.
+- Added layer freezing to freeze the first `n` layers of GPT-2 while training. This allows 1.5B GPT-2 to be trained with a high `n`.
+- Added schema-based generation for specificed schema_tokens (which can be encoded in the Transformers config). This can be used with an appropriate dataset for schema-based generation.
+- Switched TensorFlow weight download URL from GCP (as OpenAI removed it from there) to Azure
+- Fixed issue where prompt character length was used to check for a too-long assert instead of prompt token length (#90)
+- Workaround breaking issue in Transformers 4.3.0 by moving special token stripping into aitextgen instead of the tokenizer (#90)
+- Added an `lstrip` param to generation, which strips all whitespace at the beginning of generated text (related to point above)
+
+## [0.3.0] - 2020-11-30
+
+- Increased minimum versions of dependencies (`transformers` to 4.0.0, `pytorch-lightning` to 1.0.8, Pytorch to 1.6)
+- Fixed imports to account for new Transfomers file architecture
+- Fixed training to account for new transformer/pytorch-lightning minimums
+- Fully removed TorchScript code (ONNX implementation will supercede it)
+- Made prompt specification for generation more canonical with Transformers
+- Set default `vocab` size for new tokenizers to `1000`
+- Began work on serializing tokenizers in accordance to the new `tokenizers` approach
+
 ## [0.2.1] - 2020-06-28
 
 ### Added
diff --git a/README.md b/README.md
index 6ef08bc..da242da 100644
--- a/README.md
+++ b/README.md
@@ -5,8 +5,8 @@ A robust Python tool for text-based AI training and generation using [OpenAI's](
 aitextgen is a Python package that leverages [PyTorch](https://pytorch.org), [Hugging Face Transformers](https://github.com/huggingface/transformers) and [pytorch-lightning](https://github.com/PyTorchLightning/pytorch-lightning) with specific optimizations for text generation using GPT-2, plus _many_ added features. It is the successor to [textgenrnn](https://github.com/minimaxir/textgenrnn) and [gpt-2-simple](https://github.com/minimaxir/gpt-2-simple), taking the best of both packages:
 
 - Finetunes on a pretrained 124M GPT-2 model from OpenAI...or create your own GPT-2 model + tokenizer and train from scratch!
-- Generates text faster than gpt-2-simple and with better memory efficiency! (even [from the 1.5B GPT-2 model](https://docs.aitextgen.io/tutorials/generate_1_5b/)!)
-- With Transformers, aitextgen preserves compatibility with the base package, allowing you to use the model for other NLP tasks, download custom GPT-2 models from the Hugging Face model repository, and upload your own models! Also, it uses the included `generate()` function to allow a massive amount of control over the generated text.
+- Generates text faster than gpt-2-simple and with better memory efficiency!
+- With Transformers, aitextgen preserves compatibility with the base package, allowing you to use the model for other NLP tasks, download custom GPT-2 models from the HuggingFace model repository, and upload your own models! Also, it uses the included `generate()` function to allow a massive amount of control over the generated text.
 - With pytorch-lightning, aitextgen trains models not just on CPUs and GPUs, but also _multiple_ GPUs and (eventually) TPUs! It also includes a pretty training progress bar, with the ability to add optional loggers.
 - The input dataset is its own object, allowing you to not only easily encode megabytes of data in seconds, cache, and compress it on a local computer before transporting to a remote server, but you are able to _merge_ datasets without biasing the resulting dataset, or _cross-train_ on multiple datasets to create blended output.
 
@@ -54,7 +54,7 @@ aitextgen generate
 aitextgen generate --prompt "I believe in unicorns because" --to_file False
 ```
 
-Want to train your own mini GPT-2 model on your own computer? Download this [text file of Shakespeare's plays](https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt), cd to that directory in a Terminal, open up a `python3` console and go:
+Want to train your own mini GPT-2 model on your own computer? You can follow along [in this Jupyter Notebook](/notebooks/training_hello_world.ipynb) or, download this [text file of Shakespeare's plays](https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt), cd to that directory in a Terminal, open up a `python3` console and go:
 
 ```python
 from aitextgen.TokenDataset import TokenDataset
@@ -66,29 +66,36 @@ from aitextgen import aitextgen
 file_name = "input.txt"
 
 # Train a custom BPE Tokenizer on the downloaded text
-# This will save two files: aitextgen-vocab.json and aitextgen-merges.txt,
-# which are needed to rebuild the tokenizer.
+# This will save one file: `aitextgen.tokenizer.json`, which contains the
+# information needed to rebuild the tokenizer.
 train_tokenizer(file_name)
-vocab_file = "aitextgen-vocab.json"
-merges_file = "aitextgen-merges.txt"
+tokenizer_file = "aitextgen.tokenizer.json"
 
 # GPT2ConfigCPU is a mini variant of GPT-2 optimized for CPU-training
 # e.g. the # of input tokens here is 64 vs. 1024 for base GPT-2.
 config = GPT2ConfigCPU()
 
 # Instantiate aitextgen using the created tokenizer and config
-ai = aitextgen(vocab_file=vocab_file, merges_file=merges_file, config=config)
+ai = aitextgen(tokenizer_file=tokenizer_file, config=config)
 
 # You can build datasets for training by creating TokenDatasets,
 # which automatically processes the dataset with the appropriate size.
-data = TokenDataset(file_name, vocab_file=vocab_file, merges_file=merges_file, block_size=64)
+data = TokenDataset(file_name, tokenizer_file=tokenizer_file, block_size=64)
 
 # Train the model! It will save pytorch_model.bin periodically and after completion.
-# On a 2016 MacBook Pro, this took ~25 minutes to run.
-ai.train(data, batch_size=16, num_steps=5000)
+# On a 2020 8-core iMac, this took ~25 minutes to run.
+ai.train(data, batch_size=8, num_steps=50000, generate_every=5000, save_every=5000)
 
 # Generate text from it!
 ai.generate(10, prompt="ROMEO:")
+
+# With your trained model, you can reload the model at any time by
+# providing the pytorch_model.bin model weights, the config, and the tokenizer.
+ai2 = aitextgen(model="trained_model/pytorch_model.bin",
+                tokenizer_file="aitextgen.tokenizer.json",
+                config="trained_model/config.json")
+
+ai2.generate(10, prompt="ROMEO:")
 ```
 
 Want to run aitextgen and finetune GPT-2? Use the Colab notebooks in the Demos section, or [follow the documentation](https://docs.aitextgen.io/) to get more information and learn some helpful tips!
@@ -102,7 +109,7 @@ Want to run aitextgen and finetune GPT-2? Use the Colab notebooks in the Demos s
 
 ## Upcoming Features
 
-The current release (v0.2.X) of aitextgen **is considered to be a beta**, targeting the most common use cases. The Notebooks and examples written so far are tested to work, but more fleshing out of the docs/use cases will be done over the next few months in addition to fixing the known issues noted above.
+The current release (v0.4.X) of aitextgen **is considered to be a beta**, targeting the most common use cases. The Notebooks and examples written so far are tested to work, but more fleshing out of the docs/use cases will be done over the next few months in addition to fixing the known issues noted above.
 
 The next versions of aitextgen (and one of the reasons I made this package in the first place) will have native support for _schema-based generation_. (See [this repo](https://github.com/minimaxir/gpt-2-keyword-generation) for a rough proof-of-concept.)
 
diff --git a/aitextgen/TokenDataset.py b/aitextgen/TokenDataset.py
index 3aa9b4d..7c4fcc8 100644
--- a/aitextgen/TokenDataset.py
+++ b/aitextgen/TokenDataset.py
@@ -56,6 +56,8 @@ def __init__(
         file_path: str = None,
         vocab_file: str = os.path.join(STATIC_PATH, "gpt2_vocab.json"),
         merges_file: str = os.path.join(STATIC_PATH, "gpt2_merges.txt"),
+        tokenizer: GPT2TokenizerFast = None,
+        tokenizer_file: str = None,
         texts: List[str] = None,
         line_by_line: bool = False,
         from_cache: bool = False,
@@ -70,7 +72,7 @@ def __init__(
         eos_token: str = "<|endoftext|>",
         unk_token: str = "<|endoftext|>",
         pad_token: str = "<|endoftext|>",
-        progress_bar_refresh_rate: int = 10,
+        progress_bar_refresh_rate: int = 20,
         **kwargs,
     ) -> None:
 
@@ -85,14 +87,27 @@ def __init__(
 
         assert any([texts, file_path]), "texts or file_path must be specified."
 
-        tokenizer = GPT2TokenizerFast(
-            vocab_file=vocab_file,
-            merges_file=merges_file,
-            bos_token=bos_token,
-            eos_token=eos_token,
-            unk_token=unk_token,
-            pad_token=pad_token,
-        )
+        if not tokenizer:
+            if tokenizer_file:
+                # load the custom GPT-2 tokenizer from a serialized tokenizer
+                tokenizer = GPT2TokenizerFast(
+                    vocab_file=None,
+                    merges_file=None,
+                    tokenizer_file=tokenizer_file,
+                    bos_token=bos_token,
+                    eos_token=eos_token,
+                    unk_token=unk_token,
+                    pad_token=pad_token,
+                )
+            else:
+                tokenizer = GPT2TokenizerFast(
+                    vocab_file=vocab_file,
+                    merges_file=merges_file,
+                    bos_token=bos_token,
+                    eos_token=eos_token,
+                    unk_token=unk_token,
+                    pad_token=pad_token,
+                )
 
         # If a cache path is provided, load it.
         if from_cache:
@@ -248,7 +263,7 @@ def encode_tokens_from_file(
     tokenizer: GPT2TokenizerFast,
     newline: str,
     header: bool = True,
-    progress_bar_refresh_rate: int = 10,
+    progress_bar_refresh_rate: int = 20,
     batch_size: int = 1024,
 ) -> List[int]:
     """
@@ -299,7 +314,7 @@ def encode_tokens_from_file(
             if not batch:
                 break
 
-            encoded_texts = tokenizer.batch_encode_plus(
+            encoded_texts = tokenizer(
                 batch,
                 add_special_tokens=False,
                 return_token_type_ids=False,
@@ -340,7 +355,7 @@ def encode_tokens_from_list(
     texts: List[str],
     eos_token: str,
     tokenizer: GPT2TokenizerFast,
-    progress_bar_refresh_rate: int = 10,
+    progress_bar_refresh_rate: int = 20,
     batch_size: int = 1024,
 ) -> List[int]:
     """
@@ -367,7 +382,7 @@ def encode_tokens_from_list(
             ]
         ]
 
-        encoded_texts = tokenizer.batch_encode_plus(
+        encoded_texts = tokenizer(
             batch,
             add_special_tokens=False,
             return_token_type_ids=False,
diff --git a/aitextgen/aitextgen.py b/aitextgen/aitextgen.py
index c1fe644..f30dffa 100644
--- a/aitextgen/aitextgen.py
+++ b/aitextgen/aitextgen.py
@@ -1,6 +1,6 @@
 from transformers import (
     GPT2LMHeadModel,
-    GPT2Tokenizer,
+    GPT2TokenizerFast,
     GPT2Config,
     AutoConfig,
 )
@@ -22,16 +22,19 @@
     download_gpt2,
     set_seed,
     reset_seed,
+    find_index_of_subset,
+    skip_special_tokens,
 )
 from .train import ATGTransformer, ATGProgressBar
 from .colab import create_gdrive_folder
 from typing import Union, Optional, List
 from pkg_resources import resource_filename
 import shutil
+import re
 
 try:
-    import torch_xla.core.xla_model as xm
-except ImportError:
+    import torch_xla.core.xla_model as xm  # noqa
+except ModuleNotFoundError:
     pass
 
 logger = logging.getLogger("aitextgen")
@@ -63,7 +66,7 @@ class aitextgen:
     :param unk_token: String to override the unknown token
     """
 
-    torchscript = False
+    openai_tf_gpt2 = None
 
     # default values for GPT2Tokenizer
     tokenizer = None
@@ -80,11 +83,15 @@ def __init__(
         config: Union[str, GPT2Config] = None,
         vocab_file: str = None,
         merges_file: str = None,
+        tokenizer_file: str = None,
+        schema_tokens: List[str] = None,
+        schema_return: List[str] = None,
         cache_dir: str = "aitextgen",
         tf_gpt2: str = None,
         to_gpu: bool = False,
         to_fp16: bool = False,
         verbose: bool = False,
+        gradient_checkpointing: bool = False,
         bos_token: str = None,
         eos_token: str = None,
         unk_token: str = None,
@@ -103,6 +110,8 @@ def __init__(
             logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
 
         if tf_gpt2:
+            self.openai_tf_gpt2 = tf_gpt2
+
             # Download + convert the TF weights if a PyTorch model has not been created
             if not os.path.isfile(
                 os.path.join(cache_dir, f"pytorch_model_{tf_gpt2}.bin")
@@ -172,11 +181,22 @@ def __init__(
             )
             if model and "gpt2" not in model:
                 logger.info(f"Using the tokenizer for {model}.")
-                self.tokenizer = GPT2Tokenizer.from_pretrained(
+                self.tokenizer = GPT2TokenizerFast.from_pretrained(
                     model,
                     cache_dir=cache_dir,
                 )
 
+        if gradient_checkpointing or tf_gpt2 in ["355M", "774M", "1558M"]:
+            logger.info("Gradient checkpointing enabled for model training.")
+            setattr(self.model.config, "gradient_checkpointing", True)
+            setattr(self.model.config, "use_cache", False)
+
+        if schema_tokens:
+            setattr(self.model.config, "schema_tokens", schema_tokens)
+
+        if schema_tokens:
+            setattr(self.model.config, "schema_return", schema_return)
+
         if self.tokenizer is None:
             # Update tokenizer settings (if not set already)
             args = locals()
@@ -197,14 +217,26 @@ def __init__(
             else:
                 logger.info("Using the default GPT-2 Tokenizer.")
 
-            self.tokenizer = GPT2Tokenizer(
-                vocab_file=self.vocab_file,
-                merges_file=self.merges_file,
-                bos_token=self.bos_token,
-                eos_token=self.eos_token,
-                unk_token=self.unk_token,
-                pad_token=self.pad_token,
-            )
+            if tokenizer_file:
+                # load the custom GPT-2 tokenizer from a serialized tokenizer
+                self.tokenizer = GPT2TokenizerFast(
+                    vocab_file=None,
+                    merges_file=None,
+                    tokenizer_file=tokenizer_file,
+                    bos_token=self.bos_token,
+                    eos_token=self.eos_token,
+                    unk_token=self.unk_token,
+                    pad_token=self.pad_token,
+                )
+            else:
+                self.tokenizer = GPT2TokenizerFast(
+                    vocab_file=self.vocab_file,
+                    merges_file=self.merges_file,
+                    bos_token=self.bos_token,
+                    eos_token=self.eos_token,
+                    unk_token=self.unk_token,
+                    pad_token=self.pad_token,
+                )
 
         self.tokenizer.padding_side = "left"
 
@@ -228,6 +260,10 @@ def generate(
         return_as_list: bool = False,
         seed: int = None,
         pad_token_id: str = None,
+        schema: str = False,
+        normalize_key: bool = True,
+        use_cache: bool = True,
+        lstrip: bool = True,
         **kwargs,
     ) -> Optional[str]:
         """
@@ -249,16 +285,17 @@ def generate(
         and model.
         """
 
-        if prompt:
-            assert (
-                len(prompt) < self.model.config.n_positions
-            ), "The prompt is too large for the model."
-
         prompt_text = prompt
         prompt_tensors = self.tokenizer(text=prompt, return_tensors="pt")
 
+        if prompt:
+            prompt_num_tokens = list(prompt_tensors["input_ids"].shape)[1]
+            assert (
+                prompt_num_tokens < self.model.config.n_positions
+            ), f"The prompt is too large for the model. ({prompt_num_tokens} tokens)"
+
         input_ids = (
-            prompt_tensors["input_ids"].to(self.model.device) if prompt else None
+            prompt_tensors["input_ids"].to(self.get_device()) if prompt else None
         )
 
         if seed:
@@ -278,6 +315,7 @@ def generate(
             do_sample=do_sample,
             num_return_sequences=n,
             pad_token_id=pad_token_id,
+            use_cache=use_cache,
             **kwargs,
         )
 
@@ -285,25 +323,99 @@ def generate(
         if seed:
             reset_seed()
 
-        if n > 1:
-            gen_texts = [
-                self.tokenizer.decode(output, skip_special_tokens=True)
-                for output in outputs
-            ]
-        else:
-            gen_texts = [self.tokenizer.decode(outputs[0], skip_special_tokens=True)]
-
-        if not return_as_list:
-            if prompt is not None:
-                # Bold the prompt if printing to console
-                gen_texts = [
-                    text.replace(prompt_text, f"\033[1m{prompt_text}\033[0m", 1)
-                    for text in gen_texts
+        # Schema token handling
+        if schema:
+            schema_tokens = getattr(self.model.config, "schema_tokens")
+            schema_return = getattr(self.model.config, "schema_return")
+            schema_tokens_enc = self.tokenizer(text=schema_tokens)["input_ids"]
+
+            nonalphanum_pattern = re.compile(r"[\W_]+", re.UNICODE)
+
+            outputs = outputs.tolist()
+            gen_texts = []
+            for output in outputs:
+                gen_text_dict = {}
+
+                # Get indices of each schema token within the text
+                schema_token_indices = [
+                    (schema_tokens[i], find_index_of_subset(output, token_enc))
+                    for i, token_enc in enumerate(schema_tokens_enc)
                 ]
+                schema_token_indices.sort(key=lambda x: x[1])
+
+                for i, token_tuple in enumerate(schema_token_indices):
+                    start_index = token_tuple[1]
+                    end_index = (
+                        schema_token_indices[i + 1][1] - 1
+                        if i + 1 < len(schema_token_indices)
+                        else None
+                    )
+                    key = (
+                        nonalphanum_pattern.sub("", token_tuple[0])
+                        if normalize_key
+                        else token_tuple[0]
+                    )
+
+                    gen_text = skip_special_tokens(
+                        output[start_index:end_index],
+                        self.get_device(),
+                        [self.tokenizer.bos_token_id, self.tokenizer.eos_token_id],
+                    )
+
+                    gen_text_dict[key] = self.tokenizer.decode(gen_text)
+
+                # remove fields not in schema_return
+                if schema_return:
+                    if len(schema_return) == 1:
+                        gen_text_dict = gen_text_dict[schema_return[0]]
+                    for key in gen_text_dict.keys():
+                        if key not in schema_return:
+                            gen_text_dict.pop(key, None)
+
+                gen_texts.append(gen_text_dict)
+
+            if not return_as_list:
+                print(*gen_texts, sep="\n" + "=" * 10 + "\n")
+            else:
+                if n > 1:
+                    return gen_texts
+                else:
+                    return gen_texts[0]
 
-            print(*gen_texts, sep="\n" + "=" * 10 + "\n")
+        # Typical use case
         else:
-            return gen_texts
+            # Handle special token stripping at the PyTorch level
+            gen_texts = [
+                skip_special_tokens(
+                    text,
+                    self.get_device(),
+                    [self.tokenizer.bos_token_id, self.tokenizer.eos_token_id],
+                )
+                for text in outputs
+            ]
+            if n > 1:
+                gen_texts = self.tokenizer.batch_decode(gen_texts)
+            else:
+                gen_texts = [self.tokenizer.decode(gen_texts[0])]
+
+            # Handle stripping tokenization spaces w/ regex
+            if lstrip:
+                gen_texts = [re.sub(r"^\W+", "", text) for text in gen_texts]
+
+            if not return_as_list:
+                if prompt:
+                    # Bold the prompt if printing to console
+                    gen_texts = [
+                        text.replace(prompt_text, f"\033[1m{prompt_text}\033[0m", 1)
+                        for text in gen_texts
+                    ]
+
+                if n > 1:
+                    print(*gen_texts, sep="\n" + "=" * 10 + "\n")
+                else:
+                    print(gen_texts[0])
+            else:
+                return gen_texts
 
     def generate_one(self, **kwargs) -> None:
         """
@@ -423,6 +535,9 @@ def train(
         save_gdrive: bool = False,
         run_id: str = f"ATG_{datetime.utcnow():%Y%m%d_%H%M%S}",
         progress_bar_refresh_rate: int = 20,
+        freeze_layers: bool = False,
+        num_layers_freeze: int = None,
+        use_deepspeed: bool = True,
         **kwargs,
     ) -> None:
         """
@@ -458,8 +573,6 @@ def train(
         the progress bar while training.
         """
 
-        assert not self.torchscript, "You cannot train a traced TorchScript model."
-
         if not os.path.exists(output_dir):
             os.makedirs(output_dir)
 
@@ -474,8 +587,7 @@ def train(
 
         if isinstance(train_data, str):
             train_data = TokenDataset(
-                vocab_file=self.vocab_file,
-                merges_file=self.merges_file,
+                tokenizer=self.tokenizer,
                 bos_token=self.bos_token,
                 eos_token=self.eos_token,
                 unk_token=self.unk_token,
@@ -484,11 +596,22 @@ def train(
                 **kwargs,
             )
 
-        if num_workers is None and tpu_cores == 0:
+        if freeze_layers or self.openai_tf_gpt2 == "1558M":
+            logger.info("Layer freezing enabled for model training.")
+            freeze_layers = True
+            if num_layers_freeze:
+                assert (
+                    num_layers_freeze < self.model.config.n_layer
+                ), "You are freezing more Transformer layers than in the model."
+
+        if num_workers is None:
             # Use all CPU cores as workers if not training on CPU
             # Can overload 2x w/o diminishing returns
             if is_gpu_used:
                 num_workers = os.cpu_count() * 2
+            # TPUs want same amount of workers as CPUs
+            elif tpu_cores > 0:
+                num_workers = os.cpu_count()
             # If training on the CPU, use half the CPUs
             else:
                 num_workers = int(os.cpu_count() / 2)
@@ -500,10 +623,11 @@ def train(
             warmup_steps=warmup_steps,
             batch_size=batch_size,
             num_steps=num_steps,
-            pin_memory=True if is_gpu_used else False,
+            pin_memory=is_gpu_used,
             num_workers=num_workers,
             save_every=save_every,
             generate_every=generate_every,
+            use_tpu=tpu_cores > 0,
         )
 
         # Wrap the model in a pytorch-lightning module
@@ -522,6 +646,19 @@ def train(
         if not is_gpu_used:
             n_gpu = 0
 
+        # use the deepseed plugin if installed and specified
+        deepspeed_plugin = None
+        # if is_gpu_used and use_deepspeed:
+        #     deepspeed_config = gen_deepspeed_config(
+        #         self.get_device(), learning_rate, weight_decay
+        #     )
+        #     deepspeed_plugin = DeepSpeedPlugin(deepseed_config)
+        #     logger.info("Using DeepSpeed training.")
+        # logger.warning(
+        #     "deepspeed was attempted to be used, but was not installed. "
+        #     + "Using normal training behavior."
+        # )
+
         train_params = dict(
             accumulate_grad_batches=gradient_accumulation_steps,
             gpus=n_gpu,
@@ -530,6 +667,7 @@ def train(
             checkpoint_callback=False,
             logger=loggers if loggers else False,
             weights_summary=None,
+            progress_bar_refresh_rate=progress_bar_refresh_rate,  # ignored
             callbacks=[
                 ATGProgressBar(
                     save_every,
@@ -541,8 +679,11 @@ def train(
                     run_id,
                     save_gdrive,
                     progress_bar_refresh_rate,
+                    freeze_layers,
+                    num_layers_freeze,
                 )
             ],
+            plugins=deepspeed_plugin,
         )
 
         if fp16:
@@ -556,7 +697,7 @@ def train(
 
         # benchmark gives a boost for GPUs if input size is constant,
         # which will always be the case with aitextgen training
-        if n_gpu != 0 and benchmark:
+        if is_gpu_used and benchmark:
             train_params["benchmark"] = True
 
         if n_gpu > 1:
diff --git a/aitextgen/tokenizers.py b/aitextgen/tokenizers.py
index c3ba725..b7ed3db 100644
--- a/aitextgen/tokenizers.py
+++ b/aitextgen/tokenizers.py
@@ -1,8 +1,5 @@
-from tokenizers import Tokenizer, trainers, models, ByteLevelBPETokenizer
+from tokenizers import ByteLevelBPETokenizer
 from typing import Union, List
-import logging
-
-logger = logging.getLogger(__name__)
 
 
 def train_tokenizer(
@@ -10,12 +7,14 @@ def train_tokenizer(
     dropout: float = None,
     vocab_size: int = 1000,
     min_frequency: int = 2,
+    prefix: str = "aitextgen",
     save_path: str = "",
     added_tokens: List[str] = [],
     bos_token: str = "<|endoftext|>",
     eos_token: str = "<|endoftext|>",
     unk_token: str = "<|endoftext|>",
-    serialize: bool = False,
+    serialize: bool = True,
+    trim_offsets: bool = True,
 ) -> None:
     """
     Tokenizes the text(s) as a tokenizer, wrapping the tokenizer package.
@@ -27,6 +26,7 @@ def train_tokenizer(
     :param dropout: Training dropout
     :param vocab_size: Final vocabulary size
     :param min_frequency: Minimum number of occurences to add to vocab
+    :param prefix: File name prefix of the final tokenizer
     :param save_path: Where to save the final tokenizer
     :param added_tokens: List of tokens to add to the tokenizer (currently not working)
     :param bos_token: Beginning-of-string special token
@@ -43,28 +43,16 @@ def train_tokenizer(
     if isinstance(files, str):
         files = [files]
 
-    tokenizer = ByteLevelBPETokenizer(dropout=dropout)
+    tokenizer = ByteLevelBPETokenizer(dropout=dropout, trim_offsets=trim_offsets)
 
     tokenizer.train(
         files=files,
-        vocab_size=vocab_size - len(added_tokens),
+        vocab_size=vocab_size,
         min_frequency=min_frequency,
-        special_tokens=[bos_token, eos_token, unk_token],
+        special_tokens=[bos_token, eos_token, unk_token] + added_tokens,
     )
 
-    tokenizer.add_tokens(added_tokens)
-
-    PREFIX = "aitextgen"
-    save_path_str = "the current directory" if save_path == "" else save_path
     if serialize:
-        logger.info(
-            f"Saving {PREFIX}.tokenizer.json to {save_path_str}. "
-            + "You will need this file to build the GPT2Tokenizer."
-        )
-        tokenizer.save(f"{PREFIX}.tokenizer.json")
+        tokenizer.save(f"{prefix}.tokenizer.json")
     else:
-        logger.info(
-            f"Saving {PREFIX}-vocab.json and {PREFIX}-merges.txt to {save_path_str}. "
-            + "You will need both files to build the GPT2Tokenizer."
-        )
-        tokenizer.save_model(save_path, PREFIX)
+        tokenizer.save_model(save_path, prefix)
diff --git a/aitextgen/train.py b/aitextgen/train.py
index 440d583..f7f6fc4 100644
--- a/aitextgen/train.py
+++ b/aitextgen/train.py
@@ -1,10 +1,10 @@
-from torch.utils.data import DataLoader
 import pytorch_lightning as pl
 from pytorch_lightning.callbacks.progress import ProgressBarBase
 from tqdm.auto import tqdm
 import sys
 import torch
 from torch.optim import AdamW
+from torch.utils.data import DataLoader
 from transformers import get_linear_schedule_with_warmup
 import os
 import shutil
@@ -29,16 +29,12 @@ def forward(self, inputs):
         return self.model(**inputs, return_dict=False)
 
     def training_step(self, batch, batch_num):
-        "Compute loss and log."
-
         outputs = self({"input_ids": batch, "labels": batch})
         loss = outputs[0]
 
         return {"loss": loss}
 
     def train_dataloader(self):
-        "Load datasets. Called after prepare data."
-
         return DataLoader(
             self.dataset,
             batch_size=self.hparams["batch_size"],
@@ -98,6 +94,8 @@ def __init__(
         run_id,
         save_gdrive,
         progress_bar_refresh_rate,
+        train_transformers_only,
+        num_layers_freeze,
     ):
         super().__init__()
         self.enabled = True
@@ -112,6 +110,8 @@ def __init__(
         self.run_id = run_id
         self.save_gdrive = save_gdrive
         self.progress_bar_refresh_rate = progress_bar_refresh_rate
+        self.train_transformers_only = train_transformers_only
+        self.num_layers_freeze = num_layers_freeze
 
     def enabled(self):
         self.enabled = True
@@ -129,6 +129,11 @@ def on_train_start(self, trainer, pl_module):
             dynamic_ncols=True,
             file=sys.stdout,
         )
+        self.freeze_layers(pl_module)
+
+    def on_train_end(self, trainer, pl_module):
+        self.main_progress_bar.close()
+        self.unfreeze_layers(pl_module)
 
     def on_batch_end(self, trainer, pl_module):
         super().on_batch_end(trainer, pl_module)
@@ -159,9 +164,8 @@ def on_batch_end(self, trainer, pl_module):
                         "--format=csv,nounits,noheader",
                     ],
                     encoding="utf-8",
-                    # capture_output=True,          # valid for python version >=3.7
                     stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,  # for backward compatibility with python version 3.6
+                    stderr=subprocess.PIPE,
                     check=True,
                 )
                 gpu_memory = result.stdout.strip().split(os.linesep)[0]
@@ -170,12 +174,19 @@ def on_batch_end(self, trainer, pl_module):
             self.main_progress_bar.set_description(desc)
 
         if self.enabled:
-
+            did_unfreeze = False
             if self.save_every > 0 and self.steps % self.save_every == 0:
+                self.unfreeze_layers(pl_module)
                 self.save_pytorch_model(trainer, pl_module)
+                did_unfreeze = True
 
             if self.generate_every > 0 and self.steps % self.generate_every == 0:
+                self.unfreeze_layers(pl_module)
                 self.generate_sample_text(trainer, pl_module)
+                did_unfreeze = True
+
+            if did_unfreeze:
+                self.freeze_layers(pl_module)
 
     def generate_sample_text(self, trainer, pl_module):
         self.main_progress_bar.write(
@@ -185,16 +196,32 @@ def generate_sample_text(self, trainer, pl_module):
         gen_length = min(pl_module.model.config.n_positions, 256)
 
         outputs = pl_module.model.generate(
+            input_ids=None,
             max_length=gen_length,
             do_sample=True,
             num_return_sequences=self.n_generate,
             temperature=0.7,
             pad_token_id=pl_module.tokenizer.pad_token_id,
         )
-        gen_texts = [
-            pl_module.tokenizer.decode(output, skip_special_tokens=True)
+
+        special_token_id_tensor = torch.unique(
+            torch.as_tensor(
+                [pl_module.tokenizer.bos_token_id, pl_module.tokenizer.eos_token_id]
+            )
+        ).to(pl_module.model.device.type)
+
+        outputs = [
+            output[
+                ~output.unsqueeze(1).eq(special_token_id_tensor.unsqueeze(1)).any(1)
+            ].tolist()
             for output in outputs
         ]
+
+        if self.n_generate > 1:
+            gen_texts = pl_module.tokenizer.batch_decode(outputs)
+        else:
+            gen_texts = [pl_module.tokenizer.decode(outputs[0])]
+
         for text in gen_texts:
             self.main_progress_bar.write("=" * 10)
             self.main_progress_bar.write(text)
@@ -219,3 +246,20 @@ def average_loss(self, current_loss, prev_avg_loss, smoothing):
             return current_loss
         else:
             return (smoothing * current_loss) + (1 - smoothing) * prev_avg_loss
+
+    def modify_layers(self, pl_module, unfreeze):
+        if self.train_transformers_only:
+            for name, param in pl_module.model.named_parameters():
+                if self.num_layers_freeze:
+                    layer_num = int(name.split(".")[2]) if ".h." in name else None
+                    to_freeze = layer_num and layer_num < self.num_layers_freeze
+                else:
+                    to_freeze = False
+                if name == "transformer.wte.weight" or to_freeze:
+                    param.requires_grad = unfreeze
+
+    def freeze_layers(self, pl_module):
+        self.modify_layers(pl_module, False)
+
+    def unfreeze_layers(self, pl_module):
+        self.modify_layers(pl_module, True)
diff --git a/aitextgen/utils.py b/aitextgen/utils.py
index b06bed3..aebd582 100644
--- a/aitextgen/utils.py
+++ b/aitextgen/utils.py
@@ -35,7 +35,7 @@ def download_gpt2(model_dir: str = "tf_model", model_name: str = "124M") -> None
     ]:
         if not os.path.isfile(os.path.join(sub_dir, file_name)):
             download_file_with_progress(
-                url_base="https://storage.googleapis.com/gpt-2",
+                url_base="https://openaipublic.blob.core.windows.net/gpt-2",
                 sub_dir=sub_dir,
                 model_name=model_name,
                 file_name=file_name,
@@ -136,3 +136,83 @@ def GPT2ConfigCPU(
         eos_token_id=eos_token_id,
         **kwargs,
     )
+
+
+def find_index_of_subset(large_list, small_list):
+    """
+    Returns the index after the small_list within the large list,
+    Returns None if not present.
+
+    Adapted from https://stackoverflow.com/a/45819222 which shows that it is
+    performant for input lengths < 1000, which is the common case for this function.
+    """
+    length_small_list = len(small_list)
+    firstneedle = small_list[0]
+    for idx, item in enumerate(large_list):
+        if item == firstneedle:
+            if large_list[idx : idx + length_small_list] == small_list:
+                return idx + length_small_list
+    return None
+
+
+def skip_special_tokens(tensor, device, special_token_ids):
+    """Filters out special tokens by ids in the given 1D tensor.
+
+    Adapted from https://stackoverflow.com/a/62588955
+
+    Args:
+        tensor (tensor): PyTorch Tensor
+        device (str): Device, usually "cpu" or "cuda:0"
+        token_ids (set): List of Token IDs
+    """
+    special_token_id_tensor = torch.unique(torch.as_tensor(special_token_ids)).to(
+        device
+    )
+    return tensor[
+        ~tensor.unsqueeze(1).eq(special_token_id_tensor.unsqueeze(1)).any(1)
+    ].tolist()
+
+
+def gen_deepspeed_config(device, lr, weight_decay):
+    """Deepspeed OneBitAdam config.
+
+    Adapted from https://pytorch-lightning.readthedocs.io/en/stable/advanced/multi_gpu.html#deepspeed
+
+    Args:
+        device ([type]): Device for training
+        lr ([type]): Learning rate
+        weight_decay ([type]): Weight decay
+    """
+
+    deepspeed_config = {
+        "zero_allow_untested_optimizer": True,
+        "optimizer": {
+            "type": "OneBitAdam",
+            "params": {
+                "lr": lr,
+                "betas": [0.998, 0.999],
+                "eps": 1e-5,
+                "weight_decay": weight_decay,
+                "cuda_aware": "cuda" in device,
+            },
+        },
+        "scheduler": {
+            "type": "WarmupLR",
+            "params": {
+                "last_batch_iteration": -1,
+                "warmup_min_lr": 0,
+                "warmup_max_lr": 3e-5,
+                "warmup_num_steps": 100,
+            },
+        },
+        "zero_optimization": {
+            "stage": 2,  # Enable Stage 2 ZeRO (Optimizer/Gradient state partitioning)
+            "cpu_offload": True,  # Enable Offloading optimizer state/calculation to the host CPU
+            "contiguous_gradients": True,  # Reduce gradient fragmentation.
+            "overlap_comm": True,  # Overlap reduce/backward operation of gradients for speed.
+            "allgather_bucket_size": 2e8,  # Number of elements to all gather at once.
+            "reduce_bucket_size": 2e8,  # Number of elements we reduce/allreduce at once.
+        },
+    }
+
+    return deepspeed_config
diff --git a/notebooks/training_hello_world.ipynb b/notebooks/training_hello_world.ipynb
new file mode 100644
index 0000000..05f2f18
--- /dev/null
+++ b/notebooks/training_hello_world.ipynb
@@ -0,0 +1,395 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# aitextgen Training Hello World\n",
+    "\n",
+    "_Last Updated: Feb 21, 2021 (v.0.4.0)_\n",
+    "\n",
+    "by Max Woolf\n",
+    "\n",
+    "A \"Hello World\" Tutorial to show how training works with aitextgen, even on a CPU!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from aitextgen.TokenDataset import TokenDataset\n",
+    "from aitextgen.tokenizers import train_tokenizer\n",
+    "from aitextgen.utils import GPT2ConfigCPU\n",
+    "from aitextgen import aitextgen"
+   ]
+  },
+  {
+   "source": [
+    "First, download this [text file of Shakespeare's plays](https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt), to the folder with this notebook, then put the name of the downloaded Shakespeare text for training into the cell below."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file_name = \"input.txt\""
+   ]
+  },
+  {
+   "source": [
+    "You can now train a custom Byte Pair Encoding Tokenizer on the downloaded text!\n",
+    "\n",
+    "This will save one file: `aitextgen.tokenizer.json`, which contains the information needed to rebuild the tokenizer."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_tokenizer(file_name)\n",
+    "tokenizer_file = \"aitextgen.tokenizer.json\""
+   ]
+  },
+  {
+   "source": [
+    "`GPT2ConfigCPU()` is a mini variant of GPT-2 optimized for CPU-training.\n",
+    "\n",
+    "e.g. the # of input tokens here is 64 vs. 1024 for base GPT-2. This dramatically speeds training up."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = GPT2ConfigCPU()"
+   ]
+  },
+  {
+   "source": [
+    "Instantiate aitextgen using the created tokenizer and config"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ai = aitextgen(tokenizer_file=tokenizer_file, config=config)"
+   ]
+  },
+  {
+   "source": [
+    "You can build datasets for training by creating TokenDatasets, which automatically processes the dataset with the appropriate size."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "100%|██████████| 40000/40000 [00:00<00:00, 86712.61it/s]\n"
+     ]
+    },
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "TokenDataset containing 462,820 subsets loaded from file at input.txt."
+      ]
+     },
+     "metadata": {},
+     "execution_count": 6
+    }
+   ],
+   "source": [
+    "data = TokenDataset(file_name, tokenizer_file=tokenizer_file, block_size=64)\n",
+    "data"
+   ]
+  },
+  {
+   "source": [
+    "Train the model! It will save pytorch_model.bin periodically and after completion. On a 2020 8-core iMac, this took ~25 minutes to run.\n",
+    "\n",
+    "The configuration below processes 400,000 subsets of tokens (8 * 50000), which is about just one pass through all the data (1 epoch). Ideally you'll want multiple passes through the data and a training loss less than `2.0` for coherent output; when training a model from scratch, that's more difficult, but with long enough training you can get there!"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "pytorch_model.bin already exists in /trained_model and will be overwritten!\n",
+      "GPU available: False, used: False\n",
+      "TPU available: None, using: 0 TPU cores\n",
+      "\u001b[1m5,000 steps reached: saving model to /trained_model\u001b[0m\n",
+      "\u001b[1m5,000 steps reached: generating sample texts.\u001b[0m\n",
+      "==========\n",
+      "'s dead;\n",
+      "But is no winted in his northeritiff\n",
+      "Tave passage, and eleve your hours.\n",
+      "\n",
+      "PETRUCHIO:\n",
+      "What is this I does, I will, sir;\n",
+      "That, you have, nor tolding we\n",
+      "==========\n",
+      "\u001b[1m10,000 steps reached: saving model to /trained_model\u001b[0m\n",
+      "\u001b[1m10,000 steps reached: generating sample texts.\u001b[0m\n",
+      "==========\n",
+      ".\n",
+      "\n",
+      "QUEEN ELIZABETH:\n",
+      "I know, to, fair beat, to my soul is wonder'd intend.\n",
+      "\n",
+      "KING RICHARD III:\n",
+      "Hold, and threaten, my lord, and my shame!\n",
+      "\n",
+      "QUEEN ELIZAB\n",
+      "==========\n",
+      "\u001b[1m15,000 steps reached: saving model to /trained_model\u001b[0m\n",
+      "\u001b[1m15,000 steps reached: generating sample texts.\u001b[0m\n",
+      "==========\n",
+      "s of capitcts!\n",
+      "\n",
+      "EDWARD:\n",
+      "Gardener, what is this hour will not say.\n",
+      "What, shall the joint, I pray, if they\n",
+      "Harry, let bid me as he would readness so.\n",
+      "\n",
+      "B\n",
+      "==========\n",
+      "\u001b[1m20,000 steps reached: saving model to /trained_model\u001b[0m\n",
+      "\u001b[1m20,000 steps reached: generating sample texts.\u001b[0m\n",
+      "==========\n",
+      " for.\n",
+      "\n",
+      "ROMEO:\n",
+      "Fair to the iercing wide's fretch,\n",
+      "And happy talk of the master,\n",
+      "And waste their justice with the feet and punning,\n",
+      "And therefore be ben\n",
+      "==========\n",
+      "\u001b[1m25,000 steps reached: saving model to /trained_model\u001b[0m\n",
+      "\u001b[1m25,000 steps reached: generating sample texts.\u001b[0m\n",
+      "==========\n",
+      ",\n",
+      "That we we will have not lose such.\n",
+      "\n",
+      "See, to the kingdom of our virtue,\n",
+      "You banish'd our purpose, for our own ignorse,\n",
+      "Dispon I remain, and seem'd in\n",
+      "==========\n",
+      "\u001b[1m30,000 steps reached: saving model to /trained_model\u001b[0m\n",
+      "\u001b[1m30,000 steps reached: generating sample texts.\u001b[0m\n",
+      "==========\n",
+      ".\n",
+      "\n",
+      "BENVOLIO:\n",
+      "O, she's dead!\n",
+      "\n",
+      "CAMILLO:\n",
+      "No, my lord;\n",
+      "These accession will be hous.\n",
+      "\n",
+      "DERBY:\n",
+      "No, my lord.\n",
+      "\n",
+      "GLOUCESTER:\n",
+      "What is the\n",
+      "==========\n",
+      "\u001b[1m35,000 steps reached: saving model to /trained_model\u001b[0m\n",
+      "\u001b[1m35,000 steps reached: generating sample texts.\u001b[0m\n",
+      "==========\n",
+      ",\n",
+      "And whiles it is but the castle,\n",
+      "That stavin'd in the gods of men.\n",
+      "\n",
+      "COMFEY:\n",
+      "What, then?\n",
+      "\n",
+      "ELBOW:\n",
+      "Peace, my lord,\n",
+      "And weat your greats\n",
+      "==========\n",
+      "\u001b[1m40,000 steps reached: saving model to /trained_model\u001b[0m\n",
+      "\u001b[1m40,000 steps reached: generating sample texts.\u001b[0m\n",
+      "==========\n",
+      "\n",
+      "The white mercy of the sun upon my past,\n",
+      "Of my father's son be first, thy sake,\n",
+      "His son's chief son, and my includy;\n",
+      "And if thy brother's loss, thy thrief,\n",
+      "\n",
+      "==========\n",
+      "\u001b[1m45,000 steps reached: saving model to /trained_model\u001b[0m\n",
+      "\u001b[1m45,000 steps reached: generating sample texts.\u001b[0m\n",
+      "==========\n",
+      " to the crown,\n",
+      "Or I'll privy I have.\n",
+      "\n",
+      "POLIXENES:\n",
+      "I have been a stir.\n",
+      "\n",
+      "LEONTES:\n",
+      "The worshiped, the benefition of the crown.\n",
+      "\n",
+      "His somet\n",
+      "==========\n",
+      "\u001b[1m50,000 steps reached: saving model to /trained_model\u001b[0m\n",
+      "\u001b[1m50,000 steps reached: generating sample texts.\u001b[0m\n",
+      "==========\n",
+      ":\n",
+      "Catesby, girls, and make avoides;\n",
+      "But, welcome a far\n",
+      "That ever home, like a villain, and behold\n",
+      "Canusy not passing nonquial at the g\n",
+      "==========\n",
+      "Loss: 2.940 — Avg: 2.884: 100%|██████████| 50000/50000 [31:39<00:00, 26.32it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "ai.train(data, batch_size=8, num_steps=50000, generate_every=5000, save_every=5000)"
+   ]
+  },
+  {
+   "source": [
+    "Generate text from your trained model!"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "\u001b[1mROMEO:\u001b[0m\nAbook, ho! forthing me, gentle Earl's royal king,\nAnd this, I, with that I do not beseech you\nTo visit the battle, that I should believe you,\nWhich I would never\n==========\n\u001b[1mROMEO:\u001b[0m\nConfound is gone, thou art a maid into the widow;\nPut up my life and make me no harmony\nAnd make thee I know uncle,\nUnconted and curses: therefore in my\n==========\n\u001b[1mROMEO:\u001b[0m\nGod push! but what days to see\nThe giving bleedom's heart I do? Therefore,\nAnd most unless I had rather. He saddle\nTake your cold shack down; and so far I\n==========\n\u001b[1mROMEO:\u001b[0m\nPersetain'd up the earth of mercy,\nAnd never yet, the sun to make him all the\nMore than my battle.\n\nROMEO:\nI warrant him, to know, we'll not do't, but hate me\n==========\n\u001b[1mROMEO:\u001b[0m\nMethinks I am a mile, and trench one\nThy winded makes, in faults and cast\nWith one to meether, of twenty days,\nThat in my waters, that f\n==========\n\u001b[1mROMEO:\u001b[0m\nO, here is such a woman guilty.\n\nROMEO:\nI do not think it; I should be renowned\nThat I am in that which can controy\nA bawd I take it to the purpose.\n\nJU\n==========\n\u001b[1mROMEO:\u001b[0m\nI know not what I am.\n\nFLORIZEL:\nAy, as I did,\nI would be adverpite of the homely treason\nFrom the doubled in the farm of his bed.\nTa\n==========\n\u001b[1mROMEO:\u001b[0m\nI pray you, he would have taken to him but,\nAnd freely mark his into a fine of it,\nSpeak to the second to our cheek;\nAnd every day, and sanctious cover\n==========\n\u001b[1mROMEO:\u001b[0m\nI had left me--born to be drawn.\n\nJULIET:\nMy husbour, I will have thee here:\nAnd, I have found to seek thyself.\n\nJULIET:\nI will be not b\n==========\n\u001b[1mROMEO:\u001b[0m\nThat is a hour,\nThe castard is, I'll not buy, or indeeding.\n\nNurse:\nLADY CAPULET:\nThe matter, that ta'en as I may find thee.\n\n"
+     ]
+    }
+   ],
+   "source": [
+    "ai.generate(10, prompt=\"ROMEO:\")"
+   ]
+  },
+  {
+   "source": [
+    "With your trained model, you can reload the model at any time by providing the `pytorch_model.bin` model weights, the `config`, and the `tokenizer`."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ai2 = aitextgen(model=\"trained_model/pytorch_model.bin\",\n",
+    "                tokenizer_file=\"aitextgen.tokenizer.json\",\n",
+    "                config=\"trained_model/config.json\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "\u001b[1mROMEO:\u001b[0m\nBoy, unreacher, unhallupony, in Padua,\nUntimely fall till I be learn'd.\n\nROMEO:\nFie, good friar, be quick, for I am,\nI'll\n==========\n\u001b[1mROMEO:\u001b[0m\nI'll be plain, I am a tail of blessed wounds;\nFor I am dead, I have not borne to make\nA couple of her fortune, but that I'll bear,\nAnd say 'Ay, chur\n==========\n\u001b[1mROMEO:\u001b[0m\nAnd yet I am a resolution of my dear dear:\nIf I have not reason to do me say\nI'll deny the sea of my body to answer,\nAnd all thy tale, or I have my m\n==========\n\u001b[1mROMEO:\u001b[0m\nIntenty to a bawd of my bait,--\n\nJULIET:\nNo, I hope to know the title,\nFor that I wish her place.\n\nJULIET:\nDo I assure her?\n==========\n\u001b[1mROMEO:\u001b[0m\nO, what's the parle that I chide thee,\nThat honourable may be, that I have still'd thee:\nI pray thee, my lord.\n\nMERCUTIO:\nI', my lord.\n\nROMEO:\nHere is a\n==========\n\u001b[1mROMEO:\u001b[0m\nAnd, for I am, and not talk of that?\n\nROMEO:\nWhere's my child, I would guess thee here.\n\nJULIET:\nNay, boy, I'll not be bowling why I;\nO thou\n==========\n\u001b[1mROMEO:\u001b[0m\nO, but thou hast seen thee of mine own.\n\nROMEO:\nI would assist thee--\n\nJULIET:\nAy, it is, and not so.\n\nROMEO:\nNo, but that I must told me with it.\n\nROMEO\n==========\n\u001b[1mROMEO:\u001b[0m\nNo, no, nor I am. I am content.\n\nBENVOLIO:\nI will not, sir: but I have required\nAs I am grown in the lawful virtue\nThat it hath bid you think, and I\n==========\n\u001b[1mROMEO:\u001b[0m\nThat I should pardon, I would be gone.\n\nESCALUS:\nI should believe you, sir, sir, ay, I would not\nnot know more, but that I can, but I would have savour me.\n\nP\n==========\n\u001b[1mROMEO:\u001b[0m\nAnd thou, I will find out thy life the wind of love.\n\nROMEO:\nIt is the morning groom of it.\n\nJULIET:\nFie, good sweet boy, I will take my leave to a happy day,\n"
+     ]
+    }
+   ],
+   "source": [
+    "ai2.generate(10, prompt=\"ROMEO:\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# MIT License\n",
+    "\n",
+    "Copyright (c) 2021 Max Woolf\n",
+    "\n",
+    "Permission is hereby granted, free of charge, to any person obtaining a copy\n",
+    "of this software and associated documentation files (the \"Software\"), to deal\n",
+    "in the Software without restriction, including without limitation the rights\n",
+    "to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n",
+    "copies of the Software, and to permit persons to whom the Software is\n",
+    "furnished to do so, subject to the following conditions:\n",
+    "\n",
+    "The above copyright notice and this permission notice shall be included in all\n",
+    "copies or substantial portions of the Software.\n",
+    "\n",
+    "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n",
+    "IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n",
+    "FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n",
+    "AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n",
+    "LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n",
+    "OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n",
+    "SOFTWARE."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3.9.1 64-bit",
+   "metadata": {
+    "interpreter": {
+     "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
+    }
+   }
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1-final"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index b63ad53..c87cc73 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,4 @@
-transformers>=4.0.0
+transformers>=4.3.0
 fire>=0.3.0
-pytorch-lightning>=1.0.8
-tokenizers>=0.9.4
+pytorch-lightning>=1.2.0
 torch>=1.6.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index a9f31b9..3165427 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 setup(
     name="aitextgen",
     packages=["aitextgen"],  # this must be the same as the name above
-    version="0.3.0",
+    version="0.4.0",
     description="A robust Python tool for text-based AI training and generation using GPT-2.",
     long_description=open("README.md", "r", encoding="utf-8").read(),
     long_description_content_type="text/markdown",
@@ -17,10 +17,9 @@
     python_requires=">=3.6",
     include_package_data=True,
     install_requires=[
-        "transformers>=4.0.0",
+        "transformers>=4.3.0",
         "fire>=0.3.0",
-        "pytorch-lightning>=1.0.8",
-        "tokenizers>=0.9.4",
+        "pytorch-lightning>=1.2.0",
         "torch>=1.6.0",
     ],
 )