diff --git a/configs/text2text/babi.yaml b/configs/text2text/babi.yaml
new file mode 100644
index 00000000..50a7eb46
--- /dev/null
+++ b/configs/text2text/babi.yaml
@@ -0,0 +1,59 @@
+training:
+    problem:
+        name: &name BABI
+        batch_size: &b 1
+        data_type: train
+        embedding_type: glove.6B.100d
+        embedding_size: 50
+        use_mask : false
+        joint_all: true
+        one_hot_embedding: true
+        tasks: [1, 2, 3]
+        ten_thousand_examples: true
+        truncation_length: 50
+        directory : ./
+
+
+    gradient_clipping: 20
+
+    seed_numpy: 847055145
+    seed_torch: 697881609
+
+    optimizer:
+        name: RMSprop
+        lr: 0.0001
+
+    # settings parameters
+    terminal_conditions:
+      loss_stop: 1.0e-2
+      epoch_limit: 100
+
+validation:
+      problem:
+          name: *name
+          batch_size: *b
+          data_type: valid
+          embedding_type: glove.6B.100d
+          joint_all: true
+          one_hot_embedding: true
+          tasks: [1, 2, 3]
+          ten_thousand_examples: true
+          truncation_length : 50
+
+testing:
+    problem:
+        name: *name
+        batch_size: *b
+        data_type: test
+        embedding_type: glove.6B.100d
+        joint_all: true
+        one_hot_embedding: true
+        tasks: [1, 2, 3]
+        ten_thousand_examples: true
+        truncation_length : 50
+
+model:
+    name: LSTM
+    # Hidden state.
+    hidden_state_size: 256
+    num_layers: 1
diff --git a/miprometheus/problems/question_context_to_class/__init__.py b/miprometheus/problems/question_context_to_class/__init__.py
new file mode 100644
index 00000000..cf4893a7
--- /dev/null
+++ b/miprometheus/problems/question_context_to_class/__init__.py
@@ -0,0 +1,4 @@
+from .babiqa_dataset_single_question import BABI
+
+
+__all__ = ['BABI']
diff --git a/miprometheus/problems/question_context_to_class/babiqa_dataset_single_question.py b/miprometheus/problems/question_context_to_class/babiqa_dataset_single_question.py
new file mode 100644
index 00000000..d1bdc649
--- /dev/null
+++ b/miprometheus/problems/question_context_to_class/babiqa_dataset_single_question.py
@@ -0,0 +1,515 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) IBM Corporation 2018
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, e:wqither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""bAbiQA.py: contains code for loading the babi dataset (based on the parsing used in torchtext)"""
+__author__= "Vincent Albouy, Ryan.L McAvoy"
+
+import torch
+from miprometheus.utils.problems_utils.language import Language
+import torch.utils.data
+from tqdm import tqdm
+import requests
+import os
+from miprometheus.utils.app_state import AppState
+from miprometheus.problems.seq_to_seq.seq_to_seq_problem import SeqToSeqProblem
+from miprometheus.problems.seq_to_seq.text2text.text_to_text_problem import TextToTextProblem
+
+from miprometheus.utils.loss.masked_cross_entropy_loss import MaskedCrossEntropyLoss
+
+
+
+class bAbIQASingleQuestion(TextToTextProblem):
+    """
+    Problem Class for loading bAbi QA data set using Torchtext
+    
+    Inherits from SeqToSeqProblem
+    
+    """
+
+    def __init__(self, params):
+        """
+   
+        Initializes BABI QA problem, calls base class initialization, sets properties using the provided parameters.
+
+        :param params: Dictionary of parameters (read from configuration file).
+        
+        """
+        super(bAbIQASingleQuestion).__init__()
+
+        self.directory = '~/data/babi/'
+
+        # boolean: is it training phase?
+        self.data_type = params['data_type']
+
+        self.use_batches = params['batch_size']
+
+        # task number to train on
+        self.tasks = params['tasks']
+
+        self.loss_function = MaskedCrossEntropyLoss()
+
+        self.tenK = params['ten_thousand_examples']
+
+        self.one_hot_embedding = params['one_hot_embedding']
+
+        self.batch_size = params['batch_size']
+
+        self.embedding_type = params['embedding_type']
+
+        self.embedding_size = params['embedding_size']
+
+        self.init_token = '<sos>'
+
+        self.pad_token = '<pad>'
+
+        self.eos_token = '<eos>'
+
+        self.use_mask = False
+
+        self.urls = ['http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2.tar.gz']
+
+        self.name = 'BABIDataset'
+
+        self.dirname = ''
+
+        self.data = self.load_data( tasks=self.tasks, tenK=self.tenK, add_punctuation=True , data_type = self.data_type)
+
+        #create an object language from Language class - This object will be used to create the words embeddings
+        self.language = Language('lang')
+
+        self.default_values = {'input_item_size': self.embedding_size , 'output_item_size':self.embedding_size}
+
+        self.data_definitions = {'sequences': {'size': [-1, -1, self.embedding_size], 'type': [torch.Tensor]},
+                                 'targets': {'size': [-1], 'type': [torch.Tensor]},
+                                 'current_questions': {'size': [-1, 1], 'type': [list, str]},
+                                 'masks': {'size': [-1], 'type': [torch.Tensor]},
+                                 }
+
+        #building the embeddings
+        if self.one_hot_embedding:
+            self.dictionaries, self.itos_dict = self.build_dictionaries_one_hot()
+        else:
+            self.dictionaries, self.itos_dict = self.build_dictionaries()
+
+
+    def __len__(self):
+        """Return the length of the questions set"""
+        return len(self.data)
+
+    def __getitem__(self, idx):
+        """
+        Getter method to access the dataset and return a sample.
+        :param idx: index of the sample to return.
+        :return: sample = {'sequence': story, 'targets': target, 'current_question': current_question, "mask": mask}
+        """
+        #get current question with indices idx
+        current_question = self.data[idx]
+
+        written_story, written_answers = current_question
+
+        current_question = [" ".join(written_story), " ".join(written_answers)]
+
+        story = self.embed_batch(written_story)
+
+        answer = self.to_dictionary_indexes(self.dictionaries, written_answers)
+
+        mask = torch.zeros((story.shape[0])).type(AppState().ByteTensor)
+
+        target = torch.zeros((story.shape[0])).type(AppState().LongTensor)
+
+        k = 0
+        for i, word in enumerate(current_question[0].split(' ')):
+            if word == '_':
+                mask[i] = 1
+                target[i] = answer[k]
+                k=k+1
+
+        #make a dictionnary with all the outputs
+        data_dict = self.create_data_dict()
+        data_dict['sequences'] = story
+        data_dict['targets'] = target
+        data_dict['current_question'] = current_question
+        data_dict['masks'] = mask
+
+        #return the final DataDict
+        return data_dict
+
+    def collate_babi(self, batch):
+
+        """            
+               Collate method that create batch from samples.
+               :param batch.
+               :return: return {'sequence': sequence, 'targets': targets, 'current_question': current_question, "mask": mask} 
+               """
+        # get sizes
+        context_length = max(d["sequences"].shape[0] for d in batch)
+        answer_length = max(d["targets"].shape[0] for d in batch)
+        batch_size = len(batch)
+        word_size = batch[0]["sequences"].shape[-1]
+
+        # create placeholders
+        sequences = torch.zeros((batch_size, context_length, word_size)).type(AppState().dtype)
+
+        targets = torch.zeros((batch_size, answer_length)).type(AppState().LongTensor)
+        mask = torch.zeros((batch_size, answer_length)).type(AppState().ByteTensor)
+
+        # padded data
+        current_question = []
+        for i, d in enumerate(batch):
+            c_shape = d["sequences"].shape
+            a_shape = d["targets"].shape
+            sequences[i, :c_shape[0], :c_shape[1]] = d["sequences"]
+            targets[i, :a_shape[0]] = d["targets"]
+            mask[i, :a_shape[0]] = d["masks"]
+            current_question.append(d["current_question"])
+
+            # make a dictionnary with all the outputs
+            data_dict = self.create_data_dict()
+            data_dict['sequences'] = sequences
+            data_dict['targets'] = targets
+            data_dict['current_question'] = current_question
+            data_dict['masks'] = mask
+
+        # return the fina DataDict
+        return data_dict
+
+    def evaluate_loss(self, data_dict, logits):
+        """ Calculates accuracy equal to mean number of correct predictions in a given batch.
+        WARNING: Applies mask to both logits and targets!
+
+        :param data_dict: DataDict({'sequences', 'sequences_length', 'targets', 'mask'}).
+
+        :param logits: Predictions being output of the model.
+
+        """
+        # Check if mask should be is used - if so, use the correct loss
+        # function.
+        if self.use_mask:
+            loss = self.loss_function(
+                logits, data_dict['targets'], data_dict['masks'])
+        else:
+            pred = logits.transpose(1, 2)
+
+            loss = self.loss_function(pred, data_dict['targets'], data_dict['masks'])
+
+        return loss
+
+
+    def build_dictionaries_one_hot(self):
+
+        """Creates the word embeddings for BABI QA with one hot vectors
+
+                - 1. Collects all datasets word
+                - 2. Uses Language object to create the embeddings
+
+                 If it is the first time you run this code, it will take longer to load the embedding from torchtext
+                 """
+        #load data
+        data = self.load_data(tasks=self.tasks, tenK=self.tenK, add_punctuation=True, data_type='train', outmod="one_hot")
+        data = data + self.load_data(tasks=self.tasks, tenK=self.tenK, add_punctuation=True, data_type='valid',
+                                     outmod="one_hot")
+        data = data + self.load_data(tasks=self.tasks, tenK=self.tenK, add_punctuation=True, data_type='test',
+                                     outmod="one_hot")
+
+        # make placeholders dictionnaries with special caracters
+        answ_to_ix = {".": 0, "?": 1, "_": 2}
+        itos_d = [".", "?", "_"]
+
+        # display a progress bar while going through the data
+        self.fix_length = 0
+        for q in tqdm(data):
+            story, answers = q
+            self.fix_length = max(self.fix_length, len(story))
+
+            # go through all the stories
+            for answer in story:
+                a = answer.lower()
+                if a not in answ_to_ix:
+                    ix = len(answ_to_ix)
+                    answ_to_ix[a] = ix
+                    itos_d.append(a)
+                    # print(a,ix)
+
+            #go through all the answers
+            for answer in answers:
+                a = answer.lower()
+                if a not in answ_to_ix:
+                    ix = len(answ_to_ix)
+                    answ_to_ix[a] = ix
+                    itos_d.append(a)
+                    # print(a, ix)
+
+        #return the corresponding dictionnaries
+        ret = (answ_to_ix)
+        return ret, itos_d
+
+    def build_dictionaries(self):
+
+        """Creates the word embeddings BABI QA 
+
+        - 1. Collects all datasets word
+        - 2. Uses Language object to create the embeddings
+
+         If it is the first time you run this code, it will take longer to load the embedding from torchtext
+         """
+
+        print(' ---> Constructing the dictionaries with word embedding, may take some time ')
+
+        # making an empty list of words meant to store all possible datasets words
+        text = []
+        tasks = self.tasks
+
+        data = self.load_data(tasks=tasks, tenK=self.tenK, add_punctuation=True, data_type='train', outmod="embedding")
+        data = data + self.load_data(tasks=tasks, tenK=self.tenK, add_punctuation=True, data_type='valid',
+                                     outmod="embedding")
+        data = data + self.load_data(tasks=tasks, tenK=self.tenK, add_punctuation=True, data_type='test',
+                                     outmod="embedding")
+
+        # make placeholders dictionnaries with special caracters
+        answ_to_ix = {".": 0, "?": 1, "_": 2}
+        itos_d = [".", "?", "_"]
+
+        # load all words from training data to a list named words_list[]
+        self.fix_length = 0
+        for q in tqdm(data):
+            # display a progress bar
+            story, answers = q
+            self.fix_length = max(self.fix_length, len(story))
+            for word in story:
+                text.extend([word.lower()])
+
+            # go through all the stories
+            for answer in story:
+                a = answer.lower()
+                if a not in answ_to_ix:
+                    ix = len(answ_to_ix)
+                    answ_to_ix[a] = ix
+                    itos_d.append(a)
+                    # print(a,ix)
+
+            # go through all the answers
+            for answer in answers:
+                a = answer.lower()
+                if a not in answ_to_ix:
+                    ix = len(answ_to_ix)
+                    answ_to_ix[a] = ix
+                    itos_d.append(a)
+                    # print(a, ix)
+
+        """ build embeddings from the chosen database / Example: glove.6B.100d """
+
+        self.language.build_pretrained_vocab(text, vectors=self.embedding_type, tokenize=self.tokenize)
+
+        # return the corresponding dictionnaries
+        ret = (answ_to_ix)
+        return ret, itos_d
+
+
+
+    def download_from_url(self, url, path):
+
+        """Download file, with logic (from tensor2tensor) for Google Drive"""
+
+        #get url and write file to path
+        if 'drive.google.com' not in url:
+            r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
+            with open(path, "wb") as file:
+                file.write(r.content)
+            return
+        print('downloading from Google Drive; may take a few minutes')
+        confirm_token = None
+        session = requests.Session()
+        response = session.get(url, stream=True)
+        for k, v in response.cookies.items():
+            if k.startswith("download_warning"):
+                confirm_token = v
+
+        if confirm_token:
+            url = url + "&confirm=" + confirm_token
+            response = session.get(url, stream=True)
+
+        #open file + write chunks
+        chunk_size = 16 * 1024
+        with open(path, "wb") as f:
+            for chunk in response.iter_content(chunk_size):
+                if chunk:
+                    f.write(chunk)
+
+    def load_data(self, path=None, root='data', tasks=[1], tenK=False, add_punctuation=True, data_type='train',
+                  outmod=''):
+
+        """loads all asked for tasks into a single file (combining multiple files) and then parses the combined file"""
+
+        if tenK:
+            self.dirname = os.path.join('tasks_1-20_v1-2', 'en-valid-10k')
+        else:
+            self.dirname = os.path.join('tasks_1-20_v1-2', 'en-valid')
+
+        if path is None:
+            path = self.download(root)
+
+        file_data = os.path.join(path, 'collected_' + data_type + outmod + '.txt')
+        with open(file_data, 'w') as tf:
+            for task in tasks:
+                with open(
+                        os.path.join(path,
+                                     'qa' + str(task) + '_' + data_type + '.txt')) as f:
+                    tf.write(f.read())
+        return self.parse(file_data, add_punctuation)
+
+    def download(self, root, check=None):
+
+        """Download and unzip an online archive (.zip, .gz, or .tgz).
+        Arguments:
+            root (str): Folder to download data to.
+            check (str or None): Folder whose existence indicates
+                that the dataset has already been downloaded, or
+                None to check the existence of root/{cls.name}.
+        Returns:
+            str: Path to extracted dataset.
+        """
+        #get path
+        path = os.path.join(root, self.name)
+        check = path if check is None else check
+
+        #download data
+        if not os.path.isdir(check):
+            for url in self.urls:
+                if isinstance(url, tuple):
+                    url, filename = url
+                else:
+                    filename = os.path.basename(url)
+                zpath = os.path.join(path, filename)
+                if not os.path.isfile(zpath):
+                    if not os.path.exists(os.path.dirname(zpath)):
+                        os.makedirs(os.path.dirname(zpath))
+                    print('downloading {}'.format(filename))
+                    self.download_from_url(url, zpath)
+                zroot, ext = os.path.splitext(zpath)
+                _, ext_inner = os.path.splitext(zroot)
+
+                #unzip the data
+                if ext == '.zip':
+                    with zipfile.ZipFile(zpath, 'r') as zfile:
+                        print('extracting')
+                        zfile.extractall(path)
+
+                # tarfile cannot handle bare .gz files
+                elif ext == '.tgz' or ext == '.gz' and ext_inner == '.tar':
+                    with tarfile.open(zpath, 'r:gz') as tar:
+                        dirs = [member for member in tar.getmembers()]
+                        tar.extractall(path=path, members=dirs)
+
+                #in case it is a gz file
+                elif ext == '.gz':
+                    with gzip.open(zpath, 'rb') as gz:
+                        with open(zroot, 'wb') as uncompressed:
+                            shutil.copyfileobj(gz, uncompressed)
+
+        #Return path to extracted dataset
+        return os.path.join(path, self.dirname)
+
+
+    def parse(self, file_data, add_punctuation):
+
+        """This method is parsing the file
+               :param file_data : data file to  be parsed
+               :param add_punctuation : boolean to decide wether we add punctuation 
+               :return: data : Parsed data
+           
+        """
+        #make empty lists
+        data, story,  story2 = [],[],[]
+        i = 0
+
+        #open file
+        with open(file_data, 'r') as f:
+            for line in f:
+                # print(line)
+                tid, text = line.rstrip('\n').split(' ', 1)
+                if tid == '1':
+                    story = []
+                    story2 = []
+                    answers = []
+                # don't delete period
+                if text.endswith('.'):
+                    for a in text[:-1].split():
+                        assert not isinstance(a, list)
+                        story.append(a)
+                    if add_punctuation:
+                        story.append('.')
+                else:
+                    # remove any leading or trailing whitespace after splitting
+                    query, answer, supporting = (x.strip() for x in text.split('\t'))
+
+                    for a in query[:-1].split(' '):
+                        story2.append(a)
+                    if add_punctuation:
+                        story2.append('?')
+                    for a in answer.split(','):
+                        answers.append(a)
+                        story2.extend(['_'])
+
+                    story_f = list(story)
+                    story_f.extend(story2)
+                    if story_f:
+                        data.append((story_f, answers))
+
+                        #Set answers and story back to empty lists
+                        answers = []
+                        story2 = []
+
+        return data
+
+
+
+if __name__ == "__main__":
+
+    """Unitest that generates a batch and displays a sample """
+
+    babi_tasks = list(range(1, 21))
+
+    params = {'directory': '/', 'tasks': babi_tasks,'data_type': 'train', 'batch_size': 10,'embedding_type' :'glove.6B.100d', 'embedding_size' :38 , 'ten_thousand_examples': True, 'one_hot_embedding': True, 'truncation_length':50 }
+
+
+
+    babi = bAbIQASingleQuestion(params)
+    sample=babi[12]
+    print(sample)
+    print('__getitem__ works.')
+
+
+    # wrap DataLoader on top of this Dataset subclass
+    from torch.utils.data.dataloader import DataLoader
+
+    batch_size = 1
+    dataloader = DataLoader(dataset=babi, collate_fn=babi.collate_babi,
+                            batch_size=batch_size, shuffle=True, num_workers=0)
+
+    # try to see if there is a speed up when generating batches w/ multiple workers
+    import time
+    s = time.time()
+    #for i, batch in enumerate(dataloader):
+    #     print('Batch # {} - {}'.format(i, type(batch)))
+    # print('Number of workers: {}'.format(dataloader.num_workers))
+    #print('time taken to exhaust the dataset for a batch size of {}: {}s'.format(batch_size, time.time() - s))
+
+    batch = next(iter(dataloader))
+    print(batch)
+
+    print('Unit test completed')
+    exit()
diff --git a/miprometheus/problems/seq_to_seq/seq_to_seq_problem.py b/miprometheus/problems/seq_to_seq/seq_to_seq_problem.py
index 953359fd..25c7d734 100644
--- a/miprometheus/problems/seq_to_seq/seq_to_seq_problem.py
+++ b/miprometheus/problems/seq_to_seq/seq_to_seq_problem.py
@@ -20,12 +20,14 @@
 
 """
 
-__author__ = "Tomasz Kornuta & Vincent Marois"
+__author__ = "Tomasz Kornuta & Vincent Marois & Vincent Albouy"
 
 from miprometheus.problems.problem import Problem
 import torch
 
 
+
+
 class SeqToSeqProblem(Problem):
     """
     Class representing base class for all sequential problems.
@@ -75,6 +77,9 @@ def evaluate_loss(self, data_dict, logits):
         return loss
 
 
+
+
+
 if __name__ == '__main__':
 
     from miprometheus.utils.param_interface import ParamInterface
diff --git a/miprometheus/problems/seq_to_seq/text2text/text_to_text_problem.py b/miprometheus/problems/seq_to_seq/text2text/text_to_text_problem.py
index 555bd333..0b0542df 100644
--- a/miprometheus/problems/seq_to_seq/text2text/text_to_text_problem.py
+++ b/miprometheus/problems/seq_to_seq/text2text/text_to_text_problem.py
@@ -51,6 +51,7 @@
 import torch
 import torch.nn as nn
 from miprometheus.problems.seq_to_seq.seq_to_seq_problem import SeqToSeqProblem
+from miprometheus.utils.app_state import AppState
 
 # global tokens
 PAD_token = 0
@@ -326,6 +327,85 @@ def tensors_from_pairs(self, pairs, input_lang, output_lang):
         """
         return [self.tensors_from_pair(pair, input_lang, output_lang) for pair in pairs]
 
+    def to_dictionary_indexes(self, dictionary, sentence):
+        """
+        Outputs indexes of the dictionary corresponding to the words in the sequence.
+        Case insensitive.
+        """
+
+        idxs = torch.tensor([dictionary[w.lower()] for w in sentence]).type(AppState().LongTensor)
+        return idxs
+
+    def indices_to_words(self, int_sentence):
+
+        sentences = []
+        for ind in int_sentence[0, :]:
+            sentences.append(self.itos_dict[ind])
+        return sentences
+
+    def embed_sentence_one_hot(self, sentence):
+        """
+        Embed an entire sentence using a pretrained embedding
+        :param sentence: A string containing the words to embed
+        :returns: FloatTensor of embedded vectors [max_sentence_length, embedding size]
+        """
+        size_hot = len(self.dictionaries)
+        outsentence = torch.zeros((len(sentence.split(" ")), size_hot))
+        # for key, value in self.dictionaries.items():
+        #    print(key, value)
+
+        # print(size_hot)
+        # embed a word at a time
+        for i, word in enumerate(sentence.split(" ")):
+            if not word.lower() == self.pad_token:
+                index = self.dictionaries[word.lower()]
+                # print(index, word)
+                outsentence[i, index] = 1
+                # print(outsentence[i,:])
+
+        return outsentence
+
+        # Change name to embed sentence
+
+    def embed_batch(self, minibatch):
+
+        ex = minibatch
+        sentence = " ".join(ex)
+
+        if self.one_hot_embedding:
+            sent_embed = self.embed_sentence_one_hot(sentence)
+        else:
+            sent_embed = self.language.embed_sentence(sentence)
+
+        return sent_embed
+
+    def tokenize(self, sentence):
+        return sentence.split(' ')
+
+        # list to string
+
+    def detokenize_story(self, minibatch):
+        a = []
+        for ex in minibatch:
+            b = []
+            # print(ex)
+            for sentence in ex:
+                b.append(" ".join(sentence))
+            a.append(b)
+        return a
+
+        # string to list
+
+    def tokenize_story(self, minibatch):
+        a = []
+        for ex in minibatch:
+            b = []
+            # print(ex)
+            for sentence in ex:
+                b.append(self.tokenize(sentence))
+            a.append(b)
+        return a
+
 
 class Lang(object):
     """
@@ -395,3 +475,5 @@ def add_word(self, word):
 
         else:  # this word has been seen before, simply update its occurrence
             self.word2count[word] += 1
+
+