diff --git a/.gitignore b/.gitignore
index 0c2217c..f5f8987 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,7 +21,7 @@ hs_err_pid*
 backend/node_modules/
 
 # config file and various api keys
-data/config.py
+data/public_data/config.py
 
 # python compiles
 data/__pycache__/
diff --git a/.travis.yml b/.travis.yml
index 1172a3f..49b183e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,19 +24,13 @@ before_script:
   - cd ..
 before_install:
   - cd data
-  - python3 -V
-  - python3 -c "import sys;print('\n'.join(sys.path))"
   - sudo apt-get -y install python3-pip
   - pip3 install bs4
   - pip3 install lxml
-  - pip3 list
-  - pip3 show lxml
   - cd ..
 install:
   - cd backend
   - npm install
 script:
   - npm test
-  - cd ..
-  - cd data
-  - python3 -m unittest test/test_moviedata.py
+
diff --git a/data/cron.py b/data/cron.py
index ee24d6d..949f194 100644
--- a/data/cron.py
+++ b/data/cron.py
@@ -1,14 +1,23 @@
-import etl.etlprocessor as processor
+from controller import ETLController
 from apscheduler.schedulers.blocking import BlockingScheduler
 
+import logging
+
+
 if __name__ == '__main__':
+    logging.basicConfig(level=logging.WARNING)
     scheduler = BlockingScheduler()
-    processor = processor.ETLProcessor()
-    scheduler.add_job(processor.update_movie_data, args=[1, 1000000, 0])
-    scheduler.add_job(processor.update_movie_data, args=[1000000, 2000000, 5])
-    # scheduler.add_job(processor.update_movie_data, args=[2000000, 3000000, 10])
-    # scheduler.add_job(processor.update_movie_data, args=[3000000, 4000000, 15])
-    # scheduler.add_job(processor.update_movie_data, args=[4000000, 5000000, 20])
-    # scheduler.add_job(processor.update_movie_data, args=[5000000, 6000000, 25])
-    # scheduler.add_job(processor.update_movie_data, args=[6000000, 7000000, 30])
+
+    controller = ETLController()
+    scheduler.add_job(controller.update_movie_data, args=[321535, 1000000, 0])
+    # scheduler.add_job(controller.update_movie_data, args=[1172158, 2000000, 5])
+    # scheduler.add_job(controller.update_movie_data, args=[2033967, 3000000, 10])
+    # scheduler.add_job(controller.update_movie_data, args=[3052760, 4000000, 15])
+
     scheduler.start()
+
+#
+# if __name__ == '__main__':
+#     logging.basicConfig(level=logging.WARNING)
+#     controller = ETLController()
+#     controller.update_movie_rating()
diff --git a/data/etl/cinemalist.py b/data/etl/cinemalist.py
deleted file mode 100644
index 0c75255..0000000
--- a/data/etl/cinemalist.py
+++ /dev/null
@@ -1,96 +0,0 @@
-from bs4 import BeautifulSoup
-from urllib import request, error
-from selenium import webdriver
-from string import capwords
-
-
-class CinemaList:
-
-    gv_cinema_list_home = "https://www.gv.com.sg/GVCinemas"
-
-    cathay_cinema_list_home = "http://www.cathaycineplexes.com.sg/cinemas/"
-
-    sb_cinema_list_home = "http://www.shaw.sg/sw_cinema.aspx"
-
-    def __init__(self):
-        self.driver = webdriver.PhantomJS()
-
-    def get_golden_village_cinema_list(self):
-        """Get a list of dictionaries contain all Golden Village
-        cinema names, and their corresponding url.
-        """
-        url = self.gv_cinema_list_home
-
-        cinema_list = []
-
-        # get raw cinema list
-        raw_cinema_url = []
-        self.driver.get(url)
-        anchors = self.driver.find_element_by_class_name("cinemas-list").find_elements_by_class_name("ng-binding")
-        for anchor in anchors:
-            raw_cinema_url.append(anchor.get_attribute("href"))
-
-        # get actual list, in each url it may contain more than one cinema
-        for cinema_url in raw_cinema_url:
-            self.driver = webdriver.PhantomJS()  # reinstantiate to avoid detach from DOM
-            self.driver.get(cinema_url)
-            div = self.driver.find_elements_by_class_name("ng-binding")
-            for item in div:
-                if item.get_attribute("ng-bind-html") == "cinema.name":
-                    cinema_name = item.text
-                    self.insert_cinema_data(cinema_list, cinema_name, cinema_url)
-        return cinema_list
-
-    def get_cathay_cinema_list(self):
-        """Get a list of dictionaries contain all cathay cinema names.
-        It's corresponding url is None because cathay does not show movies
-        schedule based on individual cinemas in their web page layouts.
-        """
-        cinema_list = []
-
-        url = self.cathay_cinema_list_home
-        web_content = request.urlopen(url).read().decode("utf-8")
-        soup = BeautifulSoup(web_content, "lxml")
-        divs = soup.find_all("div", {"class": "description"})
-        for div in divs:
-            cinema_name = capwords(div.find("h1").text)
-            self.insert_cinema_data(cinema_list, cinema_name, "http://www.cathaycineplexes.com.sg/showtimes/")
-        return cinema_list
-
-    def get_shaw_brother_cinema_list(self):
-        """Get a list of dictionaries contain all SB cinema names,
-        and their corresponding urls
-        """
-        name_list = []
-        url_list = []
-        cinema_list = []
-
-        url = self.sb_cinema_list_home
-        web_content = request.urlopen(url).read().decode("utf-8")
-        soup = BeautifulSoup(web_content, "lxml")
-        divs = soup.find_all("a", {"class": "txtHeaderBold"})
-        for div in divs:
-            name_list.append(div.text)
-
-        buy_tickets = soup.find_all("a", {"class": "txtNormalDim"})
-        for item in buy_tickets:
-            current_link = item["href"]
-            if "buytickets" in current_link:
-                url_list.append("www.shaw.sg/" + item["href"])
-
-        assert len(name_list) == len(url_list) # check whether there is misake in matching cinema name and url
-
-        for i in range(len(name_list)):
-            self.insert_cinema_data(cinema_list, name_list[i], url_list[i])
-
-        return cinema_list
-
-    @staticmethod
-    def insert_cinema_data(cinema_list, cinema_name, cinema_url):
-        inserted_tuple = {
-            "url": cinema_url,
-            "cinema_name": cinema_name
-        }
-        cinema_list.append(inserted_tuple)
-
-
diff --git a/data/etl/etlprocessor.py b/data/etl/etlprocessor.py
deleted file mode 100644
index 92fe7f5..0000000
--- a/data/etl/etlprocessor.py
+++ /dev/null
@@ -1,102 +0,0 @@
-"""
-    Core objective of this etl framework. This is the highest level API.
-    Each one of them will be run in backend on server, at desginated
-    time intervals.
-
-    It includes four main methods in total:
-        1. update movie data
-        2. update movie public rating
-        3. update the list of cinemas in Singapore
-        4. update cinema schedule for each cinema available
-"""
-import etl.extractor as extractor
-import etl.transformer as transformer
-import etl.loader as loader
-
-import utils
-import psycopg2
-import time
-
-
-from urllib import error
-
-
-class ETLProcessor:
-
-    def __init__(self):
-        self.logger = utils.initialise_logger()
-        self.logger.info("Initialise ETL process ...")
-
-        self.extractor = extractor.Extractor(self.logger)
-        self.loader = loader.Loader(self.logger)
-        self.transformer = transformer.Transformer(self.logger)
-
-    def update_movie_data(self, lower, upper, delay):
-        """updates movie data from databases (potentially more than one source)
-            it is a one time process, i.e. data will not be updated constantly
-        """
-        self.logger.info("Initialise movie data retrieval process ...")
-
-        time.sleep(delay)  # delay to avoid conflict
-        existing_movies_id = self.loader.get_movie_id_list()
-
-        for index in range(lower, upper):  # iterate all possible titles
-            imdb_id = utils.imdb_id_builder(index)
-
-            if imdb_id in existing_movies_id:
-                continue
-
-            try:
-                movie_data = self.extractor.extract_movie_data(imdb_id)
-            except error.HTTPError:
-                self.logger.error("Movie ID is not valid." + imdb_id)
-                continue
-            except Exception as e:  # need to find out the exact error type
-                self.logger.error("Movie ID type is not registered." + imdb_id)
-                self.logger.error(e)
-                continue
-
-            try:
-                self.loader.load_movie_data(movie_data)
-            except psycopg2.DataError:
-                self.logger.error("Invalid insertion! Due to the subtext are partially parsed.")
-                continue
-
-        self.logger.info("Movie data update process complete.")
-
-    def update_movie_rating(self):
-        """updates movie rating from popcorn movies (may have to change to raaw implementation in the future)
-        it is a continuous process and data will be updated constantly
-        """
-        self.logger.info("Initialise movie rating update process ...")
-
-        # get list of existing movies
-        id_list = self.loader.get_movie_id_list()
-
-        self.logger.info("Movie rating update process complete.")
-
-    def update_cinema_schedule(self):
-        """
-        updates movie rating from various theatres official page
-        it is a continuous process and data will be updated constantly
-        """
-        self.logger.info("Initialise movie showing update process ...")
-        # get a list of cinemas
-        cinema_list = self.loader.get_cinema_list()  # [0]:cimena_id, [1]:cinema_name, [2]:url
-        # for each cinema
-        for cinema in cinema_list:
-            self.extractor.extract_cinema_schedule(cinema)
-
-            # get all schedules
-            # load into database based on cinema id and imdb id
-            break
-
-        self.logger.info("Movie showing update process complete.")
-
-    def update_cinema_list(self):
-        """update cinema list from various theatres websites"""
-        self.logger.info("Initialise cinema list update process ...")
-        cinema_list = self.extractor.extract_cinema_list()
-        self.loader.load_cinema_list(cinema_list)
-        self.logger.info("Cinema list update process complete.")
-
diff --git a/data/etl/extractor.py b/data/etl/extractor.py
deleted file mode 100644
index b25c108..0000000
--- a/data/etl/extractor.py
+++ /dev/null
@@ -1,52 +0,0 @@
-"""Façade class for various lower level extractors"""
-from etl.moviedata import MovieData
-from etl.movierating import MovieRating
-from etl.cinemalist import CinemaList
-from etl.movieshowing import MovieShowing
-
-
-class Extractor:
-
-    def __init__(self, logger):
-        self.logger = logger
-
-    @staticmethod
-    def extract_movie_data(movie_id):
-        """given imdb_id, return the metadata of that movie from imdb"""
-        data_model = MovieData(movie_id)
-        data_model.build_soup(data_model.get_html_content())
-        data_model.extract_process()
-        return data_model.get_movie_data()
-
-    @staticmethod
-    def extract_movie_rating(movie_id):
-        """given imdb_id, return a list of dictionaries that contain respective
-        rating and votes from each ratings sources
-        """
-        data_model = MovieRating(movie_id)
-        return data_model.get_movie_ratings()
-
-    @staticmethod
-    def extract_cinema_list():
-        """return a list of dictionaries contains all the cinema names and its
-        respective urls
-        """
-        data_model = CinemaList()
-        final_list = []
-        final_list.extend(data_model.get_golden_village_cinema_list())
-        final_list.extend(data_model.get_cathay_cinema_list())
-        final_list.extend(data_model.get_shaw_brother_cinema_list())
-        return final_list
-
-    @staticmethod
-    def extract_cinema_schedule(cinema):
-        data_model = MovieShowing(cinema)
-        data_model.extract_cinema_schedule()
-        return
-
-
-
-
-
-
-
diff --git a/data/etl/loader.py b/data/etl/loader.py
deleted file mode 100644
index e6252ba..0000000
--- a/data/etl/loader.py
+++ /dev/null
@@ -1,77 +0,0 @@
-"""handles all interactions with database"""
-import config
-import psycopg2
-import logging
-
-
-class Loader:
-
-    def __init__(self, logger):
-        self.cursor, self.conn = config.database_connection()
-        self.logger = logger
-
-    # ========
-    #   LOAD
-    # ========
-    def load_movie_data(self, movie_data):
-        try:
-            self.cursor.execute("INSERT INTO movies (movie_id, title, production_year, rated, plot, actors, "
-                                "language, country, runtime, poster_url, genre, director, released, type) "
-                                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
-                                (movie_data['movie_id'], movie_data['title'], movie_data['production_year'],
-                                 movie_data['rated'],  movie_data['plot'], movie_data['actors'], movie_data['language'],
-                                 movie_data['country'], movie_data['runtime'], movie_data['poster_url'],
-                                 movie_data['genre'], movie_data['director'], movie_data['released'], movie_data['type']))
-            self.conn.commit()
-        except psycopg2.IntegrityError:
-            logging.error("UNIQUE CONSTRAINT violated in Table: movies " + movie_data['movie_id'])
-
-    def load_movie_rating(self, movie_rating):
-        self.cursor.execute("INSERT INTO public_ratings (vote, score, movie_id, source_id) VALUES (%s, %s, %s, %s) "
-                            "ON CONFLICT (movie_id, source_id) "
-                            "DO UPDATE SET (vote, score) = (%s, %s) "
-                            "WHERE public_ratings.movie_id=%s AND public_ratings.source_id=%s",
-                            (movie_rating['votes'], movie_rating['score'], movie_rating['movie_id'],
-                             movie_rating['source_id'], movie_rating['votes'], movie_rating['score'],
-                             movie_rating['movie_id'], movie_rating['source_id']))
-        self.conn.commit()
-
-    def load_cinema_list(self, cinema_list):
-        for cinema in cinema_list:
-            self.cursor.execute("INSERT INTO cinemas (cinema_name, url) VALUES (%s, %s) "
-                                "ON CONFLICT (cinema_name) "
-                                "DO UPDATE SET (cinema_name, url) = (%s, %s)"
-                                "WHERE cinemas.cinema_name=%s",
-                                (cinema['cinema_name'], cinema['url'], cinema['cinema_name'], cinema['url'],
-                                 cinema['cinema_name']))
-
-            self.conn.commit()
-
-    def load_cinema_schedule(self, cinema_schedule):
-        pass
-
-    # ========
-    #   GET
-    # ========
-    def get_movie_id_list(self):
-        self.cursor.execute("SELECT movie_id FROM movies")
-        data_object = self.cursor.fetchall()
-        id_list = []
-        for item in data_object:
-            id_list.append(item[0])
-        return id_list
-
-    def get_movie_validation_info(self, movie_id):
-        self.cursor.execute("SELECT title, released, director FROM movies WHERE movie_id=%s", (movie_id, ))
-        data_object = self.cursor.fetchone()
-        return data_object
-
-    def get_cinema_list(self):
-        """return a list of tuples that contains the information of
-        each cinema"""
-        self.cursor.execute("SELECT * FROM cinemas")
-        data_object= self.cursor.fetchall()
-        cinema_list = []
-        for item in data_object:
-            cinema_list.append(item)
-        return cinema_list
diff --git a/data/etl/moviedata.py b/data/etl/moviedata.py
deleted file mode 100644
index 5bc8670..0000000
--- a/data/etl/moviedata.py
+++ /dev/null
@@ -1,235 +0,0 @@
-"""
-    data class for all imdb movies
-"""
-from bs4 import BeautifulSoup
-from urllib import request, error
-
-import lxml
-import html
-import utils
-
-
-class MovieData:
-
-    # statics
-    IMDB_URL_FORMAT = "http://www.imdb.com/title/{}/"
-
-    title = None
-    production_year = None
-    rated = None
-    plot = None
-    actors = None
-    language = None
-    country = None
-    genre = None
-    poster_url = None
-    released = None
-    runtime = None
-    director = None
-    type = None
-    subtext = None
-    soup = None
-
-    def __init__(self, imdb_id):
-        """
-        it takes an imdb_id to instantiate a MovieData object, upon instantiation,
-        it will get relevant html content and store as instance attribute
-        :param imdb_id:
-        """
-        self.imdb_id = imdb_id
-
-    # main logic
-    def get_html_content(self):
-        """
-        get html source based on imdb_id
-        :return: string
-        """
-        url = self.IMDB_URL_FORMAT.format(self.imdb_id)
-        request_result = html.unescape(request.urlopen(url).read().decode("utf-8"))
-        return request_result
-
-    def build_soup(self, request_result):
-        """
-        build soup based on html content in string format
-        :param request_result:
-        :return:
-        """
-        self.soup = BeautifulSoup(request_result, "lxml")  # soup builder
-
-    def build_soup_for_test(self, html_file_io_wrapper):
-        self.soup = BeautifulSoup(html_file_io_wrapper, "lxml")
-
-    def extract_process(self):
-        """
-        main logic for extraction of imdb data
-        :return:
-        """
-        self.extract_title_and_year()
-        self.extract_poster()
-        self.extract_credits()
-        self.extract_plot()
-        self.extract_subtext()
-        self.extract_rated()
-        self.extract_genre()
-        self.extract_release()
-        self.extract_runtime()
-
-    # get
-    def get_movie_data(self):
-        """
-        return a dict that contains all data to extractor
-        :return: dictionary of data in various type
-        """
-        movie_data = utils.get_movie_data_dict(self.actors, self.country, self.director, self.genre, self.imdb_id,
-                                               None, self.plot, self.poster_url, self.production_year, self.rated,
-                                               self.released, self.runtime, self.title, self.type)
-        return movie_data
-
-    # extraction nodes
-    def extract_title_and_year(self):
-        """
-        return title and production year of a movie
-        :return: title in string, production year in integer or None
-        """
-        title_wrapper = self.soup.find("h1").text.split("\xa0")
-        self.title = title_wrapper[0]
-        self.production_year = title_wrapper[1].replace("(", "").replace(")", "").replace(" ", "")
-        if self.production_year == "":
-            self.production_year = None
-            return self.title, self.production_year
-        return self.title, int(self.production_year)
-
-    def extract_poster(self):
-        """
-        return the url of poster of one movie
-        :return:
-        """
-        poster = self.soup.find("div", {"class": "poster"})
-        try:
-            self.poster_url = poster.find("img")['src']
-        except AttributeError:
-            self.poster_url = None
-        return self.poster_url
-
-    def extract_credits(self):
-        """
-        return the directors and actors of the movie. If there is more than
-        one director or actor, it will display a string with multiple tokens,
-        separated by comma
-        :return: credits info in string format or None
-        """
-        credits_text = self.soup.find_all("div", {"class": "credit_summary_item"})
-        for item in credits_text:
-            current_text = item.text
-            if "Directors:" in current_text:
-                self.director = current_text.replace("Directors:", "").split("|")[0]\
-                    .replace("\n", "").replace("  ", "").strip()
-            elif "Director:" in current_text:
-                self.director = current_text.replace("Director:", "").strip()
-            elif "Stars" in current_text:
-                self.actors = current_text.replace("Stars:", "").split("|")[0]\
-                    .replace("\n", "").replace("  ", "").strip()
-            elif "Star" in current_text:
-                self.actors = current_text.replace("Star:", "").strip()
-        return self.actors, self.director
-
-    def extract_plot(self):
-        """
-        return the plot of one movie
-        :return: plot in string format or None
-        """
-        self.plot = self.soup.find("div", {"class": "summary_text"}).text.replace("\n", "").strip().split("    ")[0]
-        if "Add a Plot" in self.plot:
-            self.plot = None
-        return self.plot
-
-    def extract_subtext(self):
-        """
-        retrieve the subtext tag for other extraction nodes
-        :return:
-        """
-        self.subtext = self.soup.find("div", {"class": "subtext"})
-
-    def extract_rated(self):
-        """
-        return the rating of a movie
-        :return:
-        """
-        metas = self.subtext.find_all("meta")
-        for meta in metas:
-            if meta['itemprop'] == "contentRating":
-                self.rated = meta['content']
-        return self.rated
-
-    def extract_release(self):
-        """
-        parse the last token in subtext element. it determines the type of the object,
-        it may also determine the release date and country
-        :return:
-        """
-        self.type = 'movie'  # default movie type
-        anchors = self.subtext.find_all("a")
-        for anchor in anchors:
-            if anchor.has_attr('title'):
-                release_text = anchor.text
-                if "Episode aired" in release_text:
-                    self.type = "episode"
-                    release_text = release_text.replace("Episode aired", "").replace("\n", "").strip()
-                    self.released = utils.transform_date_imdb(release_text)
-                elif "TV Series" in release_text:
-                    self.type = "tv"
-                elif "TV Episode" in release_text:
-                    self.type = "episode"
-                elif "TV Special" in release_text:
-                    self.type = "tv-special"
-                    release_text = release_text.replace("TV Special", "").replace("\n", "").strip()
-                    self.released = utils.transform_date_imdb(release_text)
-                elif "Video Game" in release_text:
-                    self.type = "video-game"
-                elif "Video game released" in release_text:
-                    self.type = "video-game"
-                    release_text = release_text.replace("Video game released", "").replace("\n", "").strip()
-                    self.released = utils.transform_date_imdb(release_text)
-                elif "Video" in release_text:
-                    self.type = "video"
-                    release_text = release_text.replace("Video", "").replace("\n", "").strip()
-                    self.released = utils.transform_date_imdb(release_text)
-                elif "TV Mini-Series" in release_text:
-                    self.type = "tv-mini"
-                elif "TV Movie" in release_text:
-                    self.type = "tv-movie"
-                    release_text = release_text.replace("TV Movie", "").replace("\n", "").strip()
-                    self.released = utils.transform_date_imdb(release_text)
-                elif "TV Short" in release_text:
-                    self.type = "tv-short"
-                else:
-                    release_text = release_text.replace("\n", "").strip()
-                    self.released, self.country = utils.split_release_and_country_imdb(release_text)
-                    self.released = utils.transform_date_imdb(self.released)
-        return self.released, self.country, self.type
-
-    def extract_genre(self):
-        """
-        parse the html content and return the genre of the movie
-        :return:
-        """
-        genre_list = []
-        spans = self.subtext.find_all("span", {"class": "itemprop"})
-        for span in spans:
-            genre_list.append(span.text)
-        if len(genre_list) > 0:
-            self.genre = ", ".join(genre_list)
-        return self.genre
-
-    def extract_runtime(self):
-        """
-        parse the html content and return the runtime of the movie
-        :return:
-        """
-        time_tag = self.subtext.find("time")
-        try:
-            time_text = time_tag['datetime']
-            self.runtime = int(time_text.replace("PT", "").replace("M", "").replace(",", ""))
-        except TypeError:
-            return None
-        return self.runtime
diff --git a/data/etl/movierating.py b/data/etl/movierating.py
deleted file mode 100644
index cc68575..0000000
--- a/data/etl/movierating.py
+++ /dev/null
@@ -1,111 +0,0 @@
-from urllib import request, error
-from bs4 import BeautifulSoup
-
-import utils
-import json
-
-
-class MovieRating:
-
-    trakt_header = {
-        'Content-Type': 'application/json',
-        'trakt-api-version': '2',
-        'trakt-api-key': '411a8f0219456de5e3e10596486c545359a919b6ebb10950fa86896c1a8ac99b'
-    }
-
-    wemakesites_api_key = "5a7e0693-af96-4d43-89a3-dc8ca00cf355"
-
-    imdb_url_format = "http://www.imdb.com/title/{}/"
-
-    # omdb setup
-    omdb_plot_option = "full"  # attribute for omdb
-
-    omdb_content_type = "json"  # return type for omdb requests
-
-    # douban
-    douban_url_format = "https://movie.douban.com/subject_search?search_text={}"
-    metacritic_url_format = "http://www.metacritic.com/search/movie/{}/results"
-
-    def __init__(self, movie_id):
-        self.movie_id = movie_id
-
-    def get_movie_ratings(self):
-        movie_ratings = []
-
-        rating, votes = self.extract_trakt_rating()
-        movie_ratings.append(utils.get_movie_rating_dict(rating, votes, self.movie_id, 'Trakt'))
-
-        rating, votes = self.extract_imdb_rating()
-        movie_ratings.append(utils.get_movie_rating_dict(rating, votes, self.movie_id, 'IMDb'))
-
-        rating, votes = self.extract_douban_rating()
-        movie_ratings.append(utils.get_movie_rating_dict(rating, votes, self.movie_id, 'Douban'))
-        return movie_ratings
-
-    def extract_trakt_rating(self):
-        """
-        given imdb_id, return the current rating and total number of votes of this movie in trakt.tv database
-        :param movie_id:
-        :return: rating and votes in STRING format
-        """
-        request_result = request.Request('https://api.trakt.tv/movies/{}/ratings'.format(self.movie_id),
-                                          headers=self.trakt_header)
-        try:
-            json_result = json.loads(request.urlopen(request_result).read().decode("utf-8"))
-        except error.HTTPError:
-            return None, None
-
-        return str(json_result['rating']), str(json_result['votes'])
-
-    def extract_imdb_rating(self):
-        """
-        given imdb_id, return the current rating and total number of votes of this movie in imdb database
-        :param movie_id:
-        :return: rating and votes in STRING format
-        """
-        url = self.imdb_url_format.format(self.movie_id)
-        request_result = request.urlopen(url).read()
-        soup = BeautifulSoup(request_result, "lxml")
-        div = soup.find('div', {'class': 'ratingValue'})
-
-        try:
-            parse_list = div.find("strong")['title'].split(" based on ")
-        except AttributeError:
-            return None, None
-
-        rating = parse_list[0]
-        votes = parse_list[1].split(" ")[0].replace(",", "")
-        return rating, votes
-
-    def extract_douban_rating(self):
-        """
-        given imdb_id, return the current rating and total number of votes of this movie in douban database
-        :param movie_id:
-        :return: rating and votes in STRING format
-        """
-        url = self.douban_url_format.format(self.movie_id)
-        request_result = request.urlopen(url).read()
-        soup = BeautifulSoup(request_result, "lxml")
-
-        try:
-            rating = soup.find("span", {'class': 'rating_nums'}).text
-            votes = soup.find("span", {'class': 'pl'}).text.replace("人评价","")[1: -1].replace(",", "")  # remove parenthesis and words
-        except AttributeError:
-            return None, None
-
-        return rating, votes
-
-    # def extract_metacritic_rating(self, imdb_id, search_string, director, release_date):
-    #     # bad request, on hold, need to use selenium
-    #     url = self.metacritic_url_format.format(html.escape(search_string))
-    #     call_result = request.urlopen(url).read()
-    #     soup = BeautifulSoup(call_result, "lxml")
-    #     results = soup.find('li', {'class': 'result'})
-    #     print(results)
-    #     pass
-    #
-    # def extract_rotten_tomatoes_rating(self, imdb_id):
-    #     pass
-    #
-    # def extract_letterboxd_rating(self, movie_id):
-    #     pass
diff --git a/data/etl/movieshowing.py b/data/etl/movieshowing.py
deleted file mode 100644
index 7fc7996..0000000
--- a/data/etl/movieshowing.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from urllib import request, error
-from bs4 import BeautifulSoup
-from selenium import webdriver
-
-
-import html
-
-
-class MovieShowing:
-
-    imdb_search_format = "http://www.imdb.com/find?&q={}"
-
-    def __init__(self, cinema):
-        self.driver = webdriver.PhantomJS()
-        self.cinema_id, self.cinema_name, self.cinema_url = cinema
-
-    def extract_cinema_schedule(self):
-        """retrieve one cinema schedule based on the given url,
-        return a list of dictionaries contains """
-        # retrieve title, (type like 3D) and schedule time
-        print(self.cinema_url)
-        self.driver.get(self.cinema_url)
-
-        # find imdb id
-        # create tuple cinema_id, movie_id, type, schedule
-
-    def extract(self):
-        url = "http://www.imdb.com/find?&q=harry+potter+and+deathly+hallows"
-        soup = BeautifulSoup(request.urlopen(url).read().decode("utf-8"), "lxml")
-        anchors = soup.find_all("a")
-        for item in anchors:
-            try:
-                current_href = item['href']
-            except KeyError:
-                continue
-            if "/title" in current_href:
-                print(current_href)
-
-    def match(self):
-        print(self.build_search_url("Tu ying dang an"))
-
-    def build_search_url(self, search_title):
-        search_query = html.escape(search_title.lower())
-        return self.imdb_search_format.format(search_query)
-
-
-if __name__ == '__main__':
-    app = MovieShowing()
-    app.match()
diff --git a/data/etl/transformer.py b/data/etl/transformer.py
deleted file mode 100644
index b21931b..0000000
--- a/data/etl/transformer.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import datetime
-
-
-class Transformer:
-
-    def __init__(self, logger):
-        self.logger = logger
-
-    # ==============
-    #   Movie Data
-    # ==============
-    @staticmethod
-    def split_release_and_country_imdb(release_country):
-        """
-        given a string containing released date and country of a movie, return both fields
-        :param release_country: string
-        :return: string, string
-        """
-        released, country = release_country.replace(")", "").split("(")
-        released = released.strip()  # remove last white space
-        return released, country
-
-    @staticmethod
-    def transform_time_imdb(runtime):
-        """
-        given a string of time in various format from imdb, return in minutes
-        :param runtime: string
-        :return: string
-        """
-        runtime = runtime.replace(" ", "").replace("min", "")
-        if "h" in runtime:
-            [hours, minutes] = runtime.split("h")
-            if minutes == "":
-                minutes = 0
-            runtime = int(hours) * 60 + int(minutes)
-        return str(runtime)
-
-    @staticmethod
-    def transform_date_imdb(input_text):
-        """
-        given a date of string from imdb, return date in %Y-%m-%d format
-        :param input_text: string
-        :return: string
-        """
-        length_of_date = len(input_text.split(" "))
-        if length_of_date == 3:
-            input_text = datetime.datetime.strptime(input_text, '%d %B %Y').strftime('%Y-%m-%d')
-        elif length_of_date == 2:
-            input_text = datetime.datetime.strptime(input_text, '%B %Y').strftime('%Y-%m-%d')
-        elif length_of_date == 1:
-            if input_text == "":
-                return None
-            else:
-                input_text = datetime.datetime.strptime(input_text, '%Y').strftime('%Y-%m-%d')
-        return input_text
-    # ================
-    #   Movie Rating
-    # ================
-
-    @staticmethod
-    def movie_rating_votes(votes):
-        votes = votes.replace(",", "")
-        return votes
-
-    @staticmethod
-    def is_hour(input_text):
-        if 'h' in input_text:
-            try:
-                time = int(input_text.strip().replace("h", ""))
-            except ValueError:
-                return False
-            return True
-        return False
-
-    # ================
-    #   Now Showing
-    # ================
diff --git a/data/algo/__init__.py b/data/movie_id_matcher/__init__.py
similarity index 100%
rename from data/algo/__init__.py
rename to data/movie_id_matcher/__init__.py
diff --git a/data/movie_id_matcher/matcher.py b/data/movie_id_matcher/matcher.py
new file mode 100644
index 0000000..1f92578
--- /dev/null
+++ b/data/movie_id_matcher/matcher.py
@@ -0,0 +1,100 @@
+"""
+given title and some additional information of a movie
+match certain id (e.g. imdb id)
+"""
+from urllib import request
+from bs4 import BeautifulSoup
+from selenium import webdriver
+from datetime import datetime
+
+
+class MovieIDMatcher:
+
+    _IMDB_SEARCH_URL_FORMAT = "http://www.imdb.com/find?&q={}&s=tt&ttype=ft&exact=true"
+
+    def __init__(self):
+        self.driver = webdriver.PhantomJS()
+
+    def match_imdb_id_for_cinema_schedule(self, title):
+        """return the MOST possible imdb id of the movie from all recent showing"""
+        possible_result = []
+        possible_imdb_list = self._extract_imdb_possible(title)
+
+        for movie in possible_imdb_list:
+            movie_id, movie_title = movie
+            titles, infos = self._parse_imdb_search_text(movie_title)
+
+            # check year
+            current_year = datetime.now().strftime("%Y")
+            last_year = str(int(current_year) - 1)
+            next_year = str(int(current_year) + 1)
+
+            if current_year in infos or next_year in infos or last_year in infos:
+                possible_result.append(movie_id)
+
+            # check type is not tv
+            if "Short" is not infos and "TV" is not infos:
+                possible_result.append(movie_id)
+
+        # use the first
+        try:
+            imdb_id = possible_result[0]
+        except IndexError:
+            return None
+        return imdb_id
+
+    def _extract_imdb_possible(self, title):
+        """return a list of possible imdb id in string format"""
+        if " :" in title:
+            title = title.replace(" :", ":")
+        possible_list = []
+        search_query = self._imdb_search_query_builder(title)
+        url = self._IMDB_SEARCH_URL_FORMAT.format(search_query)
+        self.driver.get(url)
+        elements = self.driver.find_elements_by_class_name("findResult")
+        for element in elements:
+            td = element.find_element_by_class_name("result_text")
+            current_imdb = td.find_element_by_css_selector("a").get_attribute("href").split("/")[4]
+            current_text = td.text.strip()
+            possible_list.append((current_imdb, current_text))
+
+        return possible_list[:3]
+
+    @staticmethod
+    def _parse_imdb_search_text(text):
+        """parse out the searched text generated from imdb search
+        query, two variable will be returned. First is a list that
+        consists of the movie title obtained, possibly more than one.
+        Second return is a list that contains all possible
+        information stored in a bracket, such as year, type and
+        other strange information
+        :return list, list
+        """
+        title_list = []
+        info_list = []
+
+        segments = text.split("aka")
+        segments = [segment.strip() for segment in segments]  # remove extra white space
+
+        for segment in segments:
+            first_bracket_index = segment.find("(")
+
+            # title list
+            title_found = segment[:first_bracket_index].strip().replace("\"", "")
+            title_list.append(title_found)
+
+            # info list
+            tags = segment[first_bracket_index:].split(")")[:-1]
+            tags = [info.replace("(", "").strip() for info in tags]
+            info_list.extend(tags)
+        return title_list, info_list
+
+    @staticmethod
+    def _build_soup(url):
+        soup = BeautifulSoup(request.urlopen(url).read().decode("utf-8"), "lxml")
+        return soup
+
+    @staticmethod
+    def _imdb_search_query_builder(movie_title):
+        """parse the movie title according to the query"""
+        return movie_title.lower()
diff --git a/data/etl/__init__.py b/data/movie_id_matcher/test/__init__.py
similarity index 100%
rename from data/etl/__init__.py
rename to data/movie_id_matcher/test/__init__.py
diff --git a/data/movie_id_matcher/test/test_matcher.py b/data/movie_id_matcher/test/test_matcher.py
new file mode 100644
index 0000000..d6e9313
--- /dev/null
+++ b/data/movie_id_matcher/test/test_matcher.py
@@ -0,0 +1,107 @@
+from matcher import MovieIDMatcher
+
+import unittest
+
+
+class TestMovieIDMatcher(unittest.TestCase):
+
+    def setUp(self):
+        self.matcher = MovieIDMatcher()
+
+    def test_extract_imdb_possible(self):
+
+        def helper(title, expect_result):
+            matcher = MovieIDMatcher()
+            test_result = matcher._extract_imdb_possible(title)
+            self.assertEqual(test_result, expect_result)
+
+        helper("Collide", [
+            ("tt2126235", "Collide (I) (2016)"),
+            ("tt2834052", "Collide"),
+            ("tt1230120", "Collide (II) (2010)")
+        ])
+
+        helper("Cook up a storm", [
+            ("tt6315750", "Cook Up a Storm (2017)")
+        ])
+
+        helper("Kung Fu Yoga", [
+            ('tt4217392', 'Kung-Fu Yoga (2017)\naka "Kung Fu Yoga"')
+        ])
+
+        helper("The Lego Batman Movie", [
+            ('tt4116284', 'The LEGO Batman Movie (2017)')
+        ])
+
+        helper("Rings", [
+            ('tt0498381', 'Rings (2017)'),
+            ('tt0152191', 'Rings (1993)')
+        ])
+
+        helper("Hidden Figures", [
+            ('tt4846340', 'Hidden Figures (2016)')
+        ])
+
+        helper("Sleepless", [
+            ('tt2072233', 'Sleepless (III) (2017)'),
+            ('tt0220827', 'Sleepless (2001)'),
+            ('tt5039992', 'Sleepless (II) (2017)')
+        ])
+
+        helper("Fist Fight", [
+            ('tt3401882', 'Fist Fight (2017)')
+        ])
+
+        helper("Siew Lup", [
+            ('tt6550794', 'Siew Lup (2017)')
+        ])
+
+        helper("Jackie", [
+            ('tt1619029', 'Jackie (V) (2016)'),
+            ('tt2108546', 'Jackie (II) (2012)'),
+            ('tt5249954', 'Jackie')
+        ])
+
+        helper("John Wick: Chapter 2", [
+            ('tt4425200', 'John Wick: Chapter 2 (2017)')
+        ])
+
+        helper("John Wick : Chapter 2", [
+            ('tt4425200', 'John Wick: Chapter 2 (2017)')
+        ])
+
+        helper("Resident Evil: The Final Chapter", [
+            ('tt2592614', 'Resident Evil: The Final Chapter (2016)')
+        ])
+
+    def test_parse_imdb_search_text(self):
+        self.assertEqual(
+            self.matcher._parse_imdb_search_text("Collide (I) (2016)"), (["Collide"], ["I", "2016"]))
+        self.assertEqual(
+            self.matcher._parse_imdb_search_text("Collide (2017) (Short)"), (["Collide"], ["2017", "Short"]))
+        self.assertEqual(
+            self.matcher._parse_imdb_search_text("Cook Up a Storm (2017)"), (["Cook Up a Storm"], ["2017"]))
+        self.assertEqual(
+            self.matcher._parse_imdb_search_text("Cooking Up a Storm (2015) (TV Episode)"),
+            (["Cooking Up a Storm"], ["2015", "TV Episode"]))
+        self.assertEqual(
+            self.matcher._parse_imdb_search_text('The King of Queens (1998) (TV Series) aka "Kung av Queens"'),
+            (["The King of Queens", "Kung av Queens"], ["1998", "TV Series"]))
+        self.assertEqual(
+            self.matcher._parse_imdb_search_text('Kung-Fu Yoga (2017)\naka "Kung Fu Yoga"'),
+            (["Kung-Fu Yoga", "Kung Fu Yoga"], ["2017"]))
+
+    def test_match_imdb_id(self):
+
+        def helper(title, expect_result):
+            matcher = MovieIDMatcher()
+            test_result = matcher.match_imdb_id_for_cinema_schedule(title)
+            self.assertEqual(test_result, expect_result)
+
+        helper("Collide", "tt2126235")
+        helper("Cook up a storm", "tt6315750")
+        helper("Kung Fu Yoga", 'tt4217392')
+        helper("The Lego Batman Movie", 'tt4116284')
+        helper("Rings", 'tt0498381')
+
+
diff --git a/data/test/__init__.py b/data/public_data/__init__.py
similarity index 100%
rename from data/test/__init__.py
rename to data/public_data/__init__.py
diff --git a/data/public_data/cinema.py b/data/public_data/cinema.py
new file mode 100644
index 0000000..323baa6
--- /dev/null
+++ b/data/public_data/cinema.py
@@ -0,0 +1,273 @@
+"""
+    This class retrieves movie schedule from different sources and
+    parse all data into required format
+"""
+from datetime import datetime
+from bs4 import BeautifulSoup
+from urllib import request
+from selenium import webdriver
+from string import capwords
+from transformer import CinemaScheduleTransformer, GeneralTransformer
+
+
+class CinemaList:
+
+    GOLDEN_VILLAGE_LIST_HOME = "https://www.gv.com.sg/GVCinemas"
+
+    CATHAY_LIST_HOME = "http://www.cathaycineplexes.com.sg/cinemas/"
+
+    SHAW_BROTHER_LIST_HOME = "http://www.shaw.sg/sw_cinema.aspx"
+
+    def __init__(self):
+        self.driver = webdriver.PhantomJS()
+
+    def get_latest_cinema_list(self):
+        """
+        return the latest cinema list to the processor in the format of
+        [{
+            "url": ...
+            "cinema_name: ...
+            "provider": ...
+        }, {
+            "url": ...
+            "cinema_name: ...
+            "provider": ...
+        }]
+        :return: list
+        """
+        cinema_list = []
+        cinema_list.extend(self._extract_cathay_cinema_list())
+        cinema_list.extend(self._extract_sb_cinema_list())
+        cinema_list.extend(self._extract_gv_cinema_list())
+        return cinema_list
+
+    def _extract_gv_cinema_list(self):
+        """
+        return a list of dictionaries contain all Golden Village
+        cinema names, and their corresponding url.
+        """
+        url = self.GOLDEN_VILLAGE_LIST_HOME
+
+        cinema_list = []
+
+        # get raw cinema list
+        raw_cinema_url = []
+        self.driver.get(url)
+        anchors = self.driver.find_element_by_class_name("cinemas-list").find_elements_by_class_name("ng-binding")
+        for anchor in anchors:
+            raw_cinema_url.append(anchor.get_attribute("href"))
+
+        # get actual list, in each url it may contain more than one cinema
+        for cinema_url in raw_cinema_url:
+            self.driver = webdriver.PhantomJS()  # reinstantiate to avoid detach from DOM
+            self.driver.get(cinema_url)
+            div = self.driver.find_elements_by_class_name("ng-binding")
+            for item in div:
+                if item.get_attribute("ng-bind-html") == "cinema.name":
+                    cinema_name = item.text
+                    self.insert_cinema_data(cinema_list, cinema_name, cinema_url, "gv")
+        return cinema_list
+
+    def _extract_cathay_cinema_list(self):
+        """Get a list of dictionaries contain all cathay cinema names.
+        It's corresponding url is None because cathay does not show movies
+        schedule based on individual cinemas in their web page layouts.
+        """
+        cinema_list = []
+
+        url = self.CATHAY_LIST_HOME
+        web_content = request.urlopen(url).read().decode("utf-8")
+        soup = BeautifulSoup(web_content, "lxml")
+        divs = soup.find_all("div", {"class": "description"})
+        for div in divs:
+            cinema_name = capwords(div.find("h1").text)
+            self.insert_cinema_data(cinema_list, cinema_name, "http://www.cathaycineplexes.com.sg/showtimes/", "cathay")
+        return cinema_list
+
+    def _extract_sb_cinema_list(self):
+        """Get a list of dictionaries contain all SB cinema names,
+        and their corresponding urls
+        """
+        name_list = []
+        url_list = []
+        cinema_list = []
+
+        # get names
+        url = self.SHAW_BROTHER_LIST_HOME
+        web_content = request.urlopen(url).read().decode("utf-8")
+        soup = BeautifulSoup(web_content, "lxml")
+        divs = soup.find_all("a", {"class": "txtHeaderBold"})
+        for div in divs:
+            name_list.append(div.text)
+
+        # get url
+        buy_tickets = soup.find_all("a", {"class": "txtNormalDim"})
+        for item in buy_tickets:
+            current_link = item["href"]
+            if "buytickets" in current_link:
+                url_list.append("http://" + "www.shaw.sg/" + item["href"])
+
+        assert len(name_list) == len(url_list)  # check whether there is mistake in matching cinema name and url
+
+        for i in range(len(name_list)):
+            self.insert_cinema_data(cinema_list, name_list[i], url_list[i], "sb")
+        return cinema_list
+
+    @staticmethod
+    def insert_cinema_data(cinema_list, cinema_name, cinema_url, provider):
+        inserted_tuple = {
+            "url": cinema_url,
+            "cinema_name": cinema_name,
+            "provider": provider
+        }
+        cinema_list.append(inserted_tuple)
+
+
+class CinemaSchedule:
+    """
+    This class handles all operations related to the extraction
+    of movie schedules in cinemas
+    """
+    def __init__(self, cinema_name, cinema_url, cinema_provider):
+        self.driver = webdriver.PhantomJS()
+        self.driver.set_window_size(1124, 850)  # set browser size
+
+        self.cinema_name = cinema_name
+        self.cinema_url = cinema_url
+        self.provider = cinema_provider
+
+    def extract_cinema_schedule(self):
+        """
+        it will auto select the extract method based on the url
+        or cinema name given, return the formatted data object
+        that can be used by Loader
+        :return: a list of dictionary
+        """
+        if self.provider == "gv":
+            cinema_object = self._extract_golden_village()
+        elif self.provider == "sb":
+            cinema_object = self._extract_shaw_brother()
+        elif self.provider == "cathay":
+            cinema_object = self._extract_cathay()
+        else:
+            raise Exception("Invalid Cinema provider")
+
+        return CinemaScheduleTransformer.parse_cinema_object_to_data(cinema_object)
+
+    def _extract_golden_village(self):
+        self.driver.get(self.cinema_url)
+        # retrieve title, (type like 3D) and schedule time raw data
+        tabs = self.driver.find_elements_by_class_name("ng-binding")
+
+        cinema_schedule = {}
+        date_counter = 0
+        for tab in tabs:
+            if tab.get_attribute("ng-bind-html") == "day.day":
+                current_date = GeneralTransformer.get_singapore_date(date_counter)
+                if tab.text == "Advance Sales":  # reach the end of tabs
+                    break
+
+                tab.click()
+                rows = self.driver.find_elements_by_class_name("row")
+
+                for row in rows:
+                    # get movie title
+                    current_title = None
+                    current_time = []
+
+                    # get movie title
+                    anchors = row.find_elements_by_class_name("ng-binding")
+                    for anchor in anchors:
+                        if anchor.get_attribute("ng-bind-html") == "getFilmTitle(movie)":
+                            current_title = anchor.text
+
+                    # get movie schedule
+                    buttons = row.find_elements_by_css_selector("button")
+                    for button in buttons:
+                        if button.get_attribute("ng-bind-html") == "time.time":
+                            current_time.append(current_date + " " +
+                                                GeneralTransformer.convert_12_to_24_hour_time(button.text))
+
+                    # store
+                    if current_title is not None:
+                        if current_title in cinema_schedule:
+                            cinema_schedule[current_title].extend(current_time)
+                        else:
+                            cinema_schedule[current_title] = current_time
+
+            date_counter += 1
+        return cinema_schedule
+
+    def _extract_cathay(self):
+        self.driver.get(self.cinema_url)
+        cathay_id = CinemaScheduleTransformer.get_id_from_cathay_cinema_name(self.cinema_name)
+        outer_div = self.driver.find_element_by_id("ContentPlaceHolder1_wucST{}_tabs".format(cathay_id))
+        tabbers = outer_div.find_elements_by_class_name("tabbers")
+
+        date_counter = 0
+        cinema_schedule = {}
+        for tabber in tabbers:  # for each day
+            current_date = GeneralTransformer.get_singapore_date(date_counter)
+            rows = tabber.find_elements_by_class_name("movie-container")
+            for row in rows:
+                try:
+                    row_content = row.get_attribute("innerHTML")
+                    soup = BeautifulSoup(row_content, "lxml")
+                    current_title = soup.find("strong").text
+
+                    current_time = []
+                    times = soup.find_all("a", {"class": "cine_time"})
+                    for show_time in times:
+                        current_time.append(current_date + " " + show_time.text + ":00")
+
+                    if current_title is not None:
+                        if current_title in cinema_schedule:
+                            cinema_schedule[current_title].extend(current_time)
+                        else:
+                            cinema_schedule[current_title] = current_time
+                except AttributeError:
+                    break
+
+            date_counter += 1
+        return cinema_schedule
+
+    def _extract_shaw_brother(self):
+        self.driver.get(self.cinema_url)
+        show_dates = []
+        options = self.driver.find_element_by_id("ctl00_Content_ddlShowDate").find_elements_by_css_selector(
+            "option")
+        for show_date in options:
+            show_dates.append(show_date.get_attribute("value"))
+
+        cinema_schedule = {}
+        for show_date in show_dates:  # each day
+            current_date = datetime.strptime(show_date, "%m/%d/%Y").strftime("%Y-%m-%d")
+            self.driver.find_element_by_xpath(
+                "//select[@id='ctl00_Content_ddlShowDate']/option[@value='{}']".format(show_date)).click()
+            rows = self.driver.find_elements_by_class_name("panelSchedule")
+            for row in rows[2:]:  # remove table header
+                current_title, schedule = row.text.strip().split("\n", 1)
+                if "PM" in schedule or "AM" in schedule:
+                    # title
+                    current_title = current_title.split("   ")[1]
+
+                    # time
+                    current_time = []
+                    schedule = schedule.replace("+", "").replace("*", "")
+                    schedule = schedule.replace(" PM", "PM").replace(" AM", "AM").replace("\n", " ")
+                    if "(" in schedule:
+                        bracket_index = schedule.find("(")
+                        schedule = schedule[:bracket_index]  # remove anything behind bracket
+                    schedule = schedule.split(" ")
+
+                    for item in schedule:
+                        if item != "":
+                            current_time.append(current_date + " " +
+                                                GeneralTransformer.convert_12_to_24_hour_time(item))
+
+                    if current_title is not None:
+                        if current_title in cinema_schedule:
+                            cinema_schedule[current_title].extend(current_time)
+                        else:
+                            cinema_schedule[current_title] = current_time
+        return cinema_schedule
diff --git a/data/public_data/config.py b/data/public_data/config.py
new file mode 100644
index 0000000..8dec046
--- /dev/null
+++ b/data/public_data/config.py
@@ -0,0 +1,15 @@
+import psycopg2
+
+
+def database_connection():
+    try:
+        connect_str = "dbname='production' " \
+                      "user='postgres' " \
+                      "host='128.199.231.190' " + \
+                      "password=''"
+
+        conn = psycopg2.connect(connect_str)
+        cursor = conn.cursor()
+        return cursor, conn
+    except Exception as e:
+        print(e)
diff --git a/data/public_data/controller.py b/data/public_data/controller.py
new file mode 100644
index 0000000..82b5621
--- /dev/null
+++ b/data/public_data/controller.py
@@ -0,0 +1,196 @@
+"""
+    Core objective of this etl framework. This is the highest level API.
+
+    Each one of them will be run in backend on server, at designated
+    time intervals.
+
+    It includes four main methods in total:
+        1. update movie data
+        2. update movie public rating
+        3. update the list of cinemas in Singapore
+        4. update cinema schedule for each cinema available
+"""
+from cinema import CinemaList, CinemaSchedule
+from movie import MovieData, MovieRating
+from loader import Loader
+from movie_id_matcher.matcher import MovieIDMatcher
+from urllib import error
+from transformer import GeneralTransformer
+from http import client
+
+import utils
+import time
+import logging
+import psycopg2
+
+
+class ETLController:
+
+    def __init__(self):
+        self.loader = Loader()
+
+    def update_movie_data(self, lower, upper, delay):
+        """
+        updates movie data from IMDb
+        :param lower: integer
+        :param upper: integer
+        :param delay: integer
+        :return: None
+        """
+        logging.warning("Initialise movie data retrieval process ...")
+        logging.warning("Range: " + str(lower) + " to " + str(upper) + ", starting in " + str(delay) + "s ...")
+
+        time.sleep(delay)  # delay to avoid database transaction lock during multi-thread process
+        existing_movies_id = self.loader.get_movie_id_list()
+
+        for index in range(lower, upper):  # iterate all possible titles
+            current_imdb_id = GeneralTransformer.build_imdb_id(index)
+
+            if index % 1000 == 0:  # id monitor
+                logging.warning("Currently at: " + current_imdb_id)
+
+            if current_imdb_id in existing_movies_id:
+                continue
+
+            try:
+                self._update_single_movie_data(current_imdb_id)
+            except error.HTTPError:  # invalid id will cause an 404 error
+                continue
+            except utils.InvalidMovieTypeException:  # ignore all non-movie types
+                continue
+            except psycopg2.InterfaceError:  # database connection lost after a long time
+                logging.error("Reestablishing database connection")
+                self.loader = Loader()
+                continue
+            except ConnectionResetError or TimeoutError or client.IncompleteRead:
+                logging.error("Connection reset by remote host, reconnecting in 5s ...")
+                time.sleep(5)
+
+                # try again
+                try:
+                    self._update_single_movie_data(current_imdb_id)
+                except:  # skip any error
+                    continue
+            except Exception as e:  # unknown error
+                logging.error("Unknown error occurs. Please examine.")
+                logging.error(e)
+                logging.error(current_imdb_id)
+
+        logging.warning("Movie data update process complete.")
+
+    def update_movie_rating(self):
+        """
+        updates movie rating from various websites
+        """
+        logging.warning("Initialise movie rating update process ...")
+
+        existing_movies_id = self.loader.get_movie_id_list()
+        for current_id in existing_movies_id:
+            self._update_single_movie_rating(current_id)
+
+        logging.warning("Movie rating update process complete.")
+
+    def update_cinema_list(self):
+        """
+        Update cinema list from various theatres websites
+        :return: None
+        """
+        logging.warning("Initialise cinema list update process ...")
+
+        cinema_list_object = CinemaList()
+        cinema_list = cinema_list_object.get_latest_cinema_list()
+        self.loader.load_cinema_list(cinema_list)
+
+        logging.warning("Cinema list update process complete.")
+
+    def update_cinema_schedule(self):
+        """
+        Update latest cinema schedule from cinema list.
+
+        It passes an empty dictionary to each cinema schedule object,
+        every iteration it will append that cinema's schedule to the
+        dictionary.
+
+        IMDb ID is obtained using MovieMatcher module in the process.
+
+
+        The dictionary should be structured using title and imdb_id
+        as the top level keys, follow by other data.
+
+        {
+            title: {
+                "imdb_id": ...
+                "content": [{
+                        "cinema_id": ...
+                        "schedule": [...]
+                        "type": ...
+                    }
+                ]
+            }
+        }
+        """
+        logging.warning("Initialise cinema schedule update process ...")
+
+        cinema_schedule_data = {}
+
+        # retrieve schedule
+        cinema_list = self.loader.get_cinema_list()
+        self._cinema_schedule_retrieve(cinema_list, cinema_schedule_data)
+
+        # match id and check existence
+        matcher = MovieIDMatcher()
+        for title, content in cinema_schedule_data.items():
+            imdb_id = matcher.match_imdb_id_for_cinema_schedule(title)
+            content['imdb_id'] = imdb_id
+            self.movie_list = self.loader.get_movie_id_list()
+            self._update_single_movie_data(imdb_id)
+
+        # load data
+        self.loader.load_cinema_schedule(cinema_schedule_data)
+
+        logging.warning("Cinema schedule update process complete.")
+
+    def _update_single_movie_data(self, imdb_id):
+        """
+        given imdb id, extract movie data and store it in database
+        :param imdb_id: string
+        :return: None
+        """
+        data_model = MovieData(imdb_id)
+        current_movie_data = data_model.get_movie_data()
+        self.loader.load_movie_data(current_movie_data)
+
+    def _update_single_movie_rating(self, current_id):
+        """
+        given imdb id, extract movie ratings from various sources and
+        store them in database
+        :param current_id: string
+        :return: None
+        """
+        data_model = MovieRating(current_id)
+        movie_rating = data_model.get_movie_ratings()
+        self.loader.load_movie_rating(movie_rating)
+
+    @staticmethod
+    def _cinema_schedule_retrieve(cinema_list, cinema_schedule_data):
+        for cinema in cinema_list:
+            cinema_id, cinema_name, provider, cinema_url = cinema
+            cinema_schedule = CinemaSchedule(cinema_name, cinema_url, provider)
+            current_schedules = cinema_schedule.extract_cinema_schedule()
+
+            # parse schedules and update data
+            for movie in current_schedules:
+                current_title = movie['title']
+                if movie['title'] not in cinema_schedule_data:
+                    cinema_schedule_data[current_title] = {}
+                    current_title = cinema_schedule_data[current_title]
+                    current_title['content'] = []
+                else:
+                    current_title = cinema_schedule_data[current_title]
+
+                del movie['title']
+                movie['cinema_id'] = cinema_id
+                current_title['content'].append(movie)
+
+
+
diff --git a/data/public_data/loader.py b/data/public_data/loader.py
new file mode 100644
index 0000000..336e466
--- /dev/null
+++ b/data/public_data/loader.py
@@ -0,0 +1,106 @@
+"""handles all interactions with database"""
+import logging
+
+import psycopg2
+
+import config
+
+
+class Loader:
+
+    def __init__(self):
+        self.cursor, self.conn = config.database_connection()
+
+    # ========
+    #   LOAD
+    # ========
+    def load_movie_data(self, movie_data):
+        """
+        load movie data into database, if movie_id exists, it will update accordingly
+        :param movie_data: dictionary
+        :return: None
+        """
+        if movie_data['type'] != "movie":  # does not store any non movie content
+            return
+
+        self.cursor.execute("INSERT INTO movies (movie_id, title, production_year, rated, plot, actors, "
+                            "language, country, runtime, poster_url, genre, director, released, type) "
+                            "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) "
+                            "ON CONFLICT (movie_id) "
+                            "DO UPDATE SET (title, production_year, rated, plot, actors, "
+                            "language, country, runtime, poster_url, genre, director, released, type) = "
+                            "(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
+                            "WHERE movies.movie_id=%s",
+                            (movie_data['movie_id'], movie_data['title'], movie_data['production_year'],
+                             movie_data['rated'],  movie_data['plot'], movie_data['actors'], movie_data['language'],
+                             movie_data['country'], movie_data['runtime'], movie_data['poster_url'],
+                             movie_data['genre'], movie_data['director'], movie_data['released'],
+                             movie_data['type'],
+                             movie_data['title'], movie_data['production_year'],
+                             movie_data['rated'], movie_data['plot'], movie_data['actors'], movie_data['language'],
+                             movie_data['country'], movie_data['runtime'], movie_data['poster_url'],
+                             movie_data['genre'], movie_data['director'], movie_data['released'],
+                             movie_data['type'],
+                             movie_data['movie_id']))
+        self.conn.commit()
+
+    def load_movie_rating(self, movie_ratings):
+        for movie_rating in movie_ratings:
+            self.cursor.execute("INSERT INTO public_ratings (vote, score, movie_id, source_id) VALUES (%s, %s, %s, %s) "
+                                "ON CONFLICT (movie_id, source_id) "
+                                "DO UPDATE SET (vote, score) = (%s, %s) "
+                                "WHERE public_ratings.movie_id=%s AND public_ratings.source_id=%s",
+                                (movie_rating['votes'], movie_rating['score'], movie_rating['movie_id'],
+                                 movie_rating['source_id'], movie_rating['votes'], movie_rating['score'],
+                                 movie_rating['movie_id'], movie_rating['source_id']))
+        self.conn.commit()
+
+    def load_cinema_list(self, cinema_list):
+        for cinema in cinema_list:
+            self.cursor.execute("INSERT INTO cinemas (cinema_name, url, provider) VALUES (%s, %s, %s) "
+                                "ON CONFLICT (cinema_name) "
+                                "DO UPDATE SET (cinema_name, url, provider) = (%s, %s, %s)"
+                                "WHERE cinemas.cinema_name=%s",
+                                (cinema['cinema_name'], cinema['url'], cinema['provider'], cinema['cinema_name'],
+                                 cinema['url'], cinema['provider'], cinema['cinema_name']))
+
+        self.conn.commit()
+
+    def load_cinema_schedule(self, cinema_schedule):
+        for title, cinema_content in cinema_schedule.items():
+            movie_id = cinema_content['imdb_id']
+            for cinema in cinema_content['content']:
+                cinema_id = cinema['cinema_id']
+                additional_info = cinema['type']
+                schedule_list = cinema['schedule']
+                for timing in schedule_list:
+                    try:
+                        self.cursor.execute("INSERT INTO showings (cinema_id, movie_id, type, schedule) "
+                                            "VALUES (%s, %s, %s, %s)", (cinema_id, movie_id, additional_info, timing))
+                    except psycopg2.IntegrityError:
+                        continue
+                    except psycopg2.InternalError:
+                        continue
+        self.conn.commit()
+
+    # ========
+    #   GET
+    # ========
+    def get_movie_id_list(self):
+        self.cursor.execute("SELECT movie_id FROM movies")
+        data_object = self.cursor.fetchall()
+        id_list = []
+        for item in data_object:
+            id_list.append(item[0])
+        return id_list
+
+    def get_movie_validation_info(self, movie_id):
+        self.cursor.execute("SELECT title, released, director FROM movies WHERE movie_id=%s", (movie_id, ))
+        data_object = self.cursor.fetchone()
+        return data_object
+
+    def get_cinema_list(self):
+        self.cursor.execute("SELECT * FROM cinemas")
+        data_object = self.cursor.fetchall()
+        return data_object
+
diff --git a/data/public_data/movie.py b/data/public_data/movie.py
new file mode 100644
index 0000000..7d4a5a1
--- /dev/null
+++ b/data/public_data/movie.py
@@ -0,0 +1,322 @@
+from bs4 import BeautifulSoup
+from urllib import request, error
+
+import html
+import utils
+import json
+
+
+class MovieData:
+    """
+    This class handles all operations related to movie data
+    extraction
+    """
+
+    title = None
+    production_year = None
+    rated = None
+    plot = None
+    actors = None
+    language = None
+    country = None
+    genre = None
+    poster_url = None
+    released = None
+    runtime = None
+    director = None
+    type = None
+    subtext = None
+    soup = None
+
+    def __init__(self, imdb_id):
+        """
+        It takes an imdb_id to instantiate a MovieData object, upon instantiation,
+        it will get relevant html content and store as instance attribute
+        :param imdb_id:
+        """
+        self.imdb_id = imdb_id
+        if imdb_id != "mock-id":  # special identifier for test cases. i.e. normal instantiation
+            self._build_soup(self._get_html_content())
+            self._extract_process()
+
+    def get_movie_data(self):
+        """
+        return a dict that contains all data to extractor
+        :return: dictionary of data in various type
+        """
+        movie_data = utils.get_movie_data_dict(self.actors, self.country, self.director, self.genre, self.imdb_id,
+                                               None, self.plot, self.poster_url, self.production_year, self.rated,
+                                               self.released, self.runtime, self.title, self.type)
+        return movie_data
+
+    def _extract_process(self):
+        """
+        main logic for extraction of imdb data
+        :return:
+        """
+        self._extract_subtext()
+        self._extract_release()
+        self._extract_rated()
+        self._extract_genre()
+        self._extract_release()
+        self._extract_runtime()
+        self._extract_title_and_year()
+        self._extract_poster()
+        self._extract_credits()
+        self._extract_plot()
+
+    def _get_html_content(self):
+        """
+        get html source based on imdb_id
+        :return: string
+        """
+        url = utils.UrlFormatter.IMDB_URL_FORMAT.value.format(self.imdb_id)
+        request_result = html.unescape(request.urlopen(url).read().decode("utf-8"))
+        return request_result
+
+    def _build_soup(self, request_result):
+        """
+        build soup based on html content in string format
+        :param request_result:
+        :return: None
+        """
+        self.soup = BeautifulSoup(request_result, "lxml")  # soup builder
+
+    def _build_soup_for_test(self, html_file_io_wrapper):
+        """
+        build soup based on imported html source code file
+        :param html_file_io_wrapper:
+        :return: None
+        """
+        self.soup = BeautifulSoup(html_file_io_wrapper, "lxml")
+
+    def _extract_title_and_year(self):
+        """
+        return title and production year of a movie
+        :return: string or None, int or None
+        """
+        title_wrapper = self.soup.find("h1").text.split("\xa0")
+        self.title = title_wrapper[0]
+        self.production_year = title_wrapper[1].replace("(", "").replace(")", "").replace(" ", "")
+        if self.production_year == "":
+            self.production_year = None
+            return self.title, self.production_year
+        return self.title, int(self.production_year)
+
+    def _extract_poster(self):
+        """
+        return the url of poster of one movie
+        :return: string or None
+        """
+        poster = self.soup.find("div", {"class": "poster"})
+        try:
+            self.poster_url = poster.find("img")['src']
+        except AttributeError:
+            self.poster_url = None
+        return self.poster_url
+
+    def _extract_credits(self):
+        """
+        return the directors and actors of the movie. If there is more than
+        one director or actor, it will display a string with multiple tokens,
+        separated by comma
+        :return: string or None, string or None
+        """
+        credits_text = self.soup.find_all("div", {"class": "credit_summary_item"})
+        for item in credits_text:
+            current_text = item.text
+            if "Directors:" in current_text:
+                self.director = current_text.replace("Directors:", "").split("|")[0]\
+                    .replace("\n", "").replace("  ", "").strip()
+            elif "Director:" in current_text:
+                self.director = current_text.replace("Director:", "").strip()
+            elif "Stars" in current_text:
+                self.actors = current_text.replace("Stars:", "").split("|")[0]\
+                    .replace("\n", "").replace("  ", "").strip()
+            elif "Star" in current_text:
+                self.actors = current_text.replace("Star:", "").strip()
+        return self.actors, self.director
+
+    def _extract_plot(self):
+        """
+        return the plot of one movie
+        :return: string or None
+        """
+        try:
+            self.plot = self.soup.find("div", {"class": "summary_text"}).text.replace("\n", "").strip().split("    ")[0]
+        except AttributeError:
+            self.plot = None
+
+        if self.plot is not None and "Add a Plot" in self.plot:
+            self.plot = None
+        return self.plot
+
+    def _extract_subtext(self):
+        """
+        retrieve the subtext tag for other extraction nodes
+        :return: None
+        """
+        self.subtext = self.soup.find("div", {"class": "subtext"})
+
+    def _extract_rated(self):
+        """
+        return the rating(i.e. PG, R, M) of a movie
+        Not to confused with user rating
+        :return: string or None
+        """
+        metas = self.subtext.find_all("meta")
+        for meta in metas:
+            if meta['itemprop'] == "contentRating":
+                self.rated = meta['content']
+        return self.rated
+
+    def _extract_release(self):
+        """
+        parse the last token in subtext element,
+        determine the release date and country
+        If it is not a movie, raise an exception
+        :return: datetime or None, string or None, string
+        """
+        self.type = 'movie'  # default movie type
+        anchors = self.subtext.find_all("a")
+        for anchor in anchors:
+            if anchor.has_attr('title'):
+                release_text = anchor.text
+                if "Episode aired" in release_text:
+                    raise utils.InvalidMovieTypeException("Invalid movie type.")
+                elif "TV Series" in release_text:
+                    raise utils.InvalidMovieTypeException("Invalid movie type.")
+                elif "TV Episode" in release_text:
+                    raise utils.InvalidMovieTypeException("Invalid movie type.")
+                elif "TV Special" in release_text:
+                    raise utils.InvalidMovieTypeException("Invalid movie type.")
+                elif "Video Game" in release_text:
+                    raise utils.InvalidMovieTypeException("Invalid movie type.")
+                elif "Video game released" in release_text:
+                    raise utils.InvalidMovieTypeException("Invalid movie type.")
+                elif "Video" in release_text:
+                    raise utils.InvalidMovieTypeException("Invalid movie type.")
+                elif "TV Mini-Series" in release_text:
+                    raise utils.InvalidMovieTypeException("Invalid movie type.")
+                elif "TV Movie" in release_text:
+                    raise utils.InvalidMovieTypeException("Invalid movie type.")
+                elif "TV Short" in release_text:
+                    raise utils.InvalidMovieTypeException("Invalid movie type.")
+                release_text = release_text.replace("\n", "").strip()
+                self.released, self.country = utils.split_release_and_country_imdb(release_text)
+                self.released = utils.transform_date_imdb(self.released)
+        return self.released, self.country, self.type
+
+    def _extract_genre(self):
+        """
+        parse the html content and return the genre of the movie
+        :return: string or None
+        """
+        genre_list = []
+        spans = self.subtext.find_all("span", {"class": "itemprop"})
+        for span in spans:
+            genre_list.append(span.text)
+        if len(genre_list) > 0:
+            self.genre = ", ".join(genre_list)
+        return self.genre
+
+    def _extract_runtime(self):
+        """
+        parse the html content and return the runtime of the movie
+        :return: int or None
+        """
+        time_tag = self.subtext.find("time")
+        try:
+            time_text = time_tag['datetime']
+            self.runtime = int(time_text.replace("PT", "").replace("M", "").replace(",", ""))
+        except TypeError:
+            return None
+        return self.runtime
+
+
+class MovieRating:
+
+    TRAKT_HEADER = {
+        'Content-Type': 'application/json',
+        'trakt-api-version': '2',
+        'trakt-api-key': '411a8f0219456de5e3e10596486c545359a919b6ebb10950fa86896c1a8ac99b'
+    }
+
+    imdb_url_format = "http://www.imdb.com/title/{}/"
+
+    douban_url_format = "https://movie.douban.com/subject_search?search_text={}"
+
+    metacritic_url_format = "http://www.metacritic.com/search/movie/{}/results"
+
+    def __init__(self, movie_id):
+        self.movie_id = movie_id
+
+    def get_movie_ratings(self):
+        """
+        get a list of votes and ratings from each source
+        :return: list
+        """
+        movie_ratings = []
+
+        rating, votes = self._extract_trakt_rating()
+        movie_ratings.append(utils.get_movie_rating_dict(rating, votes, self.movie_id, 'Trakt'))
+
+        rating, votes = self._extract_imdb_rating()
+        movie_ratings.append(utils.get_movie_rating_dict(rating, votes, self.movie_id, 'IMDb'))
+
+        rating, votes = self._extract_douban_rating()
+        movie_ratings.append(utils.get_movie_rating_dict(rating, votes, self.movie_id, 'Douban'))
+        return movie_ratings
+
+    def _extract_trakt_rating(self):
+        """
+        given imdb_id, return the current rating and total number of votes of this movie in trakt.tv database
+        :return: string or None, string or None
+        """
+        request_result = request.Request('https://api.trakt.tv/movies/{}/ratings'.format(self.movie_id),
+                                         headers=self.TRAKT_HEADER)
+        try:
+            json_result = json.loads(request.urlopen(request_result).read().decode("utf-8"))
+        except error.HTTPError:
+            return None, None
+
+        return str(json_result['rating']), str(json_result['votes'])
+
+    def _extract_imdb_rating(self):
+        """
+        given imdb_id, return the current rating and total number of votes of this movie in imdb database
+        :return: string or None, string or None
+        """
+        url = self.imdb_url_format.format(self.movie_id)
+        request_result = request.urlopen(url).read()
+        soup = BeautifulSoup(request_result, "lxml")
+        div = soup.find('div', {'class': 'ratingValue'})
+
+        try:
+            parse_list = div.find("strong")['title'].split(" based on ")
+        except AttributeError:
+            return None, None
+
+        rating = parse_list[0]
+        votes = parse_list[1].split(" ")[0].replace(",", "")
+        return rating, votes
+
+    def _extract_douban_rating(self):
+        """
+        given imdb_id, return the current rating and total number of votes of this movie in douban database
+        :return: string or None, string or None
+        """
+        url = self.douban_url_format.format(self.movie_id)
+        request_result = request.urlopen(url).read()
+        soup = BeautifulSoup(request_result, "lxml")
+
+        try:
+            rating = soup.find("span", {'class': 'rating_nums'}).text
+
+            # remove parenthesis and words
+            votes = soup.find("span", {'class': 'pl'}).text.replace("人评价","")[1: -1].replace(",", "")
+        except AttributeError:
+            return None, None
+
+        return rating, votes
+
diff --git a/data/public_data/test/__init__.py b/data/public_data/test/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/data/public_data/test/data_movie_data/__init__.py b/data/public_data/test/data_movie_data/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/data/test/test_data_moviedata/get_html_source.py b/data/public_data/test/data_movie_data/get_html_source.py
similarity index 100%
rename from data/test/test_data_moviedata/get_html_source.py
rename to data/public_data/test/data_movie_data/get_html_source.py
diff --git a/data/test/test_data_moviedata/tt0000001.html b/data/public_data/test/data_movie_data/tt0000001.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0000001.html
rename to data/public_data/test/data_movie_data/tt0000001.html
diff --git a/data/test/test_data_moviedata/tt0000004.html b/data/public_data/test/data_movie_data/tt0000004.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0000004.html
rename to data/public_data/test/data_movie_data/tt0000004.html
diff --git a/data/test/test_data_moviedata/tt0000007.html b/data/public_data/test/data_movie_data/tt0000007.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0000007.html
rename to data/public_data/test/data_movie_data/tt0000007.html
diff --git a/data/test/test_data_moviedata/tt0000012.html b/data/public_data/test/data_movie_data/tt0000012.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0000012.html
rename to data/public_data/test/data_movie_data/tt0000012.html
diff --git a/data/test/test_data_moviedata/tt0000019.html b/data/public_data/test/data_movie_data/tt0000019.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0000019.html
rename to data/public_data/test/data_movie_data/tt0000019.html
diff --git a/data/test/test_data_moviedata/tt0000025.html b/data/public_data/test/data_movie_data/tt0000025.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0000025.html
rename to data/public_data/test/data_movie_data/tt0000025.html
diff --git a/data/test/test_data_moviedata/tt0000399.html b/data/public_data/test/data_movie_data/tt0000399.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0000399.html
rename to data/public_data/test/data_movie_data/tt0000399.html
diff --git a/data/test/test_data_moviedata/tt0000481.html b/data/public_data/test/data_movie_data/tt0000481.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0000481.html
rename to data/public_data/test/data_movie_data/tt0000481.html
diff --git a/data/test/test_data_moviedata/tt0000502.html b/data/public_data/test/data_movie_data/tt0000502.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0000502.html
rename to data/public_data/test/data_movie_data/tt0000502.html
diff --git a/data/test/test_data_moviedata/tt0000869.html b/data/public_data/test/data_movie_data/tt0000869.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0000869.html
rename to data/public_data/test/data_movie_data/tt0000869.html
diff --git a/data/test/test_data_moviedata/tt0001304.html b/data/public_data/test/data_movie_data/tt0001304.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0001304.html
rename to data/public_data/test/data_movie_data/tt0001304.html
diff --git a/data/test/test_data_moviedata/tt0010781.html b/data/public_data/test/data_movie_data/tt0010781.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0010781.html
rename to data/public_data/test/data_movie_data/tt0010781.html
diff --git a/data/test/test_data_moviedata/tt0030298.html b/data/public_data/test/data_movie_data/tt0030298.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0030298.html
rename to data/public_data/test/data_movie_data/tt0030298.html
diff --git a/data/test/test_data_moviedata/tt0039445.html b/data/public_data/test/data_movie_data/tt0039445.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0039445.html
rename to data/public_data/test/data_movie_data/tt0039445.html
diff --git a/data/test/test_data_moviedata/tt0039624.html b/data/public_data/test/data_movie_data/tt0039624.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0039624.html
rename to data/public_data/test/data_movie_data/tt0039624.html
diff --git a/data/test/test_data_moviedata/tt0395865.html b/data/public_data/test/data_movie_data/tt0395865.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0395865.html
rename to data/public_data/test/data_movie_data/tt0395865.html
diff --git a/data/test/test_data_moviedata/tt0460648.html b/data/public_data/test/data_movie_data/tt0460648.html
similarity index 100%
rename from data/test/test_data_moviedata/tt0460648.html
rename to data/public_data/test/data_movie_data/tt0460648.html
diff --git a/data/test/test_data_moviedata/tt1234567.html b/data/public_data/test/data_movie_data/tt1234567.html
similarity index 100%
rename from data/test/test_data_moviedata/tt1234567.html
rename to data/public_data/test/data_movie_data/tt1234567.html
diff --git a/data/test/test_data_moviedata/tt2345678.html b/data/public_data/test/data_movie_data/tt2345678.html
similarity index 100%
rename from data/test/test_data_moviedata/tt2345678.html
rename to data/public_data/test/data_movie_data/tt2345678.html
diff --git a/data/test/test_data_moviedata/tt3107288.html b/data/public_data/test/data_movie_data/tt3107288.html
similarity index 100%
rename from data/test/test_data_moviedata/tt3107288.html
rename to data/public_data/test/data_movie_data/tt3107288.html
diff --git a/data/test/test_data_moviedata/tt3783958.html b/data/public_data/test/data_movie_data/tt3783958.html
similarity index 100%
rename from data/test/test_data_moviedata/tt3783958.html
rename to data/public_data/test/data_movie_data/tt3783958.html
diff --git a/data/test/test_data_moviedata/tt4346792.html b/data/public_data/test/data_movie_data/tt4346792.html
similarity index 100%
rename from data/test/test_data_moviedata/tt4346792.html
rename to data/public_data/test/data_movie_data/tt4346792.html
diff --git a/data/public_data/test/test_cinema_schedule.py b/data/public_data/test/test_cinema_schedule.py
new file mode 100644
index 0000000..960c271
--- /dev/null
+++ b/data/public_data/test/test_cinema_schedule.py
@@ -0,0 +1,53 @@
+from cinema import CinemaSchedule, CinemaList
+
+import unittest
+
+
+class TestCinemaSchedule(unittest.TestCase):
+
+    def setUp(self):
+        self.gv_schedule = CinemaSchedule(('1', 'GV Tiong Bahru', 'https://www.gv.com.sg/GVCinemaDetails#/cinema/03', "gv"))
+        self.cathay_schedule = CinemaSchedule(('32', 'The Cathay Cineplex', 'http://www.cathaycineplexes.com.sg/showtimes/', "cathay"))
+        self.shaw_schedule = CinemaSchedule(('39', 'Shaw Theatres Lido', 'http://www.shaw.sg/sw_buytickets.aspx?'
+                                                                    'filmCode=&cplexCode=30 210 236 39 155 56 75 124 '
+                                                                    '123 77 76 246 36 85 160 0&date=', "sb"))
+        self.cinema_list = CinemaList()
+
+    def test_convert_12_to_24_hour(self):
+        # to be added in more test cases
+        self.assertEqual(self.gv_schedule._convert_12_to_24_hour_time("8:25pm"), "20:25:00")
+
+    def test_movie_title_parser(self):
+        # gv
+        self.assertEqual(self.gv_schedule._movie_title_parser("Logan*"), ("Logan", ["No free pass"]))
+
+        # cathay
+        self.assertEqual(self.cathay_schedule._movie_title_parser("*Hidden Figures PG (Dolby Digital)"),
+                         ("Hidden Figures", ["Dolby Digital"]))
+        self.assertEqual(self.cathay_schedule._movie_title_parser("*T2 Trainspotting R21 (Dolby Digital)"),
+                         ("T2 Trainspotting", ["Dolby Digital"]))
+        self.assertEqual(self.cathay_schedule._movie_title_parser("Fifty Shades Darker R21 (Dolby Digital)"),
+                         ("Fifty Shades Darker", ["Dolby Digital"]))
+        self.assertEqual(self.cathay_schedule._movie_title_parser("John Wick : Chapter 2 M18 (Dolby Digital)"),
+                         ("John Wick : Chapter 2", ["Dolby Digital"]))
+        self.assertEqual(self.cathay_schedule._movie_title_parser("*Before I Fall PG13 (Dolby Digital)"),
+                         ("Before I Fall", ["Dolby Digital"]))
+
+        # shaw
+        self.assertEqual(self.shaw_schedule._movie_title_parser("Logan [D]"),
+                         ("Logan", ["Digital"]))
+        self.assertEqual(self.shaw_schedule._movie_title_parser("Siew Lup [M] [D]"),
+                         ("Siew Lup", ['Digital']))
+        self.assertEqual(self.shaw_schedule._movie_title_parser("Jackie [D]"),
+                         ("Jackie", ["Digital"]))
+        self.assertEqual(self.shaw_schedule._movie_title_parser("Hidden Figures [D]"),
+                         ("Hidden Figures", ["Digital"]))
+        self.assertEqual(self.shaw_schedule._movie_title_parser("Logan [IMAX]"),
+                         ("Logan", ["IMAX"]))
+        self.assertEqual(self.shaw_schedule._movie_title_parser("John Wick: Chapter 2 [D]"),
+                         ("John Wick: Chapter 2", ["Digital"]))
+        self.assertEqual(self.shaw_schedule._movie_title_parser("The Lego Batman Movie [D]"),
+                         ("The Lego Batman Movie", ["Digital"]))
+
+
+
diff --git a/data/test/test_moviedata.py b/data/public_data/test/test_movie_data.py
similarity index 58%
rename from data/test/test_moviedata.py
rename to data/public_data/test/test_movie_data.py
index 102f00e..f4c605b 100644
--- a/data/test/test_moviedata.py
+++ b/data/public_data/test/test_movie_data.py
@@ -1,7 +1,7 @@
 import unittest
 import os
 
-from etl.moviedata import MovieData
+from public_data.movie import MovieData
 
 
 class TestMovieData(unittest.TestCase):
@@ -11,9 +11,6 @@ class TestMovieData(unittest.TestCase):
                     'tt0000025', 'tt0010781', 'tt0000481', 'tt0000012', 'tt0000399', 'tt0039624', 'tt0030298',
                     'tt0039445']
 
-    def __init__(self, *args, **kwargs):
-        super(TestMovieData, self).__init__(*args, **kwargs)
-
     def test_extract_title_and_year(self):
         """
         test the extractor of movie title and production year
@@ -22,7 +19,7 @@ def test_extract_title_and_year(self):
         :return:
         """
 
-        def helper_test(imdb_id, expected):
+        def helper(imdb_id, expected):
             """
             takes in imdb id and the tuple of expected result
             :param imdb_id:
@@ -31,18 +28,15 @@ def helper_test(imdb_id, expected):
             """
             data_model = MovieData("mock-id")
             test_data_directory = os.path.realpath(
-                os.path.join(os.getcwd(), "test/test_data_moviedata/{}.html".format(imdb_id)))
+                os.path.join(os.getcwd(), "data_movie_data/{}.html".format(imdb_id)))
             io_wrapper = open(test_data_directory, encoding="utf8")
-            data_model.build_soup_for_test(io_wrapper)
-            data_model.extract_process()
-            self.assertEqual(data_model.extract_title_and_year(), expected)
+            data_model._build_soup_for_test(io_wrapper)
+            data_model._extract_process()
+            self.assertEqual(data_model._extract_title_and_year(), expected)
             io_wrapper.close()
 
-        helper_test(self.test_id_list[0], ('Carmencita', 1894))
-        helper_test(self.test_id_list[1], ('The Top 14 Perform', None))
-        helper_test(self.test_id_list[2], ('Hot Properties', None))
-        helper_test(self.test_id_list[3], ('Episode dated 24 March 2004', None))
-        helper_test(self.test_id_list[7], ('La La Land', 2016))
+        helper(self.test_id_list[0], ('Carmencita', 1894))
+        helper(self.test_id_list[7], ('La La Land', 2016))
 
     def test_extract_poster(self):
         """
@@ -51,7 +45,7 @@ def test_extract_poster(self):
         :return:
         """
 
-        def helper_test(imdb_id, expected):
+        def helper(imdb_id, expected):
             """
             takes in imdb id and the tuple of expected result
             :param imdb_id:
@@ -61,21 +55,18 @@ def helper_test(imdb_id, expected):
             data_model = MovieData("mock-id")
 
             test_data_directory = os.path.realpath(
-                os.path.join(os.getcwd(), "test/test_data_moviedata/{}.html".format(imdb_id)))
+                os.path.join(os.getcwd(), "data_movie_data/{}.html".format(imdb_id)))
             io_wrapper = open(test_data_directory, encoding="utf8")
-            data_model.build_soup_for_test(io_wrapper)
-            data_model.extract_process()
-            self.assertEqual(data_model.extract_poster(), expected)
+            data_model._build_soup_for_test(io_wrapper)
+            data_model._extract_process()
+            self.assertEqual(data_model._extract_poster(), expected)
             io_wrapper.close()
 
-        helper_test(self.test_id_list[0],
+        helper(self.test_id_list[0],
                     "https://images-na.ssl-images-amazon.com/images/"
                     "M/MV5BMjAzNDEwMzk3OV5BMl5BanBnXkFtZTcwOTk4OTM5Ng@@._V1_UY268_CR6,0,182,268_AL_.jpg")
-        helper_test(self.test_id_list[1],
-                    "https://images-na.ssl-images-amazon.com/images/"
-                    "M/MV5BMTMxMjU0MTMxMl5BMl5BanBnXkFtZTcwNjY4Mjc3MQ@@._V1_UY268_CR2,0,182,268_AL_.jpg")
-        helper_test(self.test_id_list[13], None)
-        helper_test(self.test_id_list[14], None)
+        helper(self.test_id_list[13], None)
+        helper(self.test_id_list[14], None)
 
     def test_extract_credits(self):
         """
@@ -94,20 +85,17 @@ def helper_test(imdb_id, expected):
             """
             data_model = MovieData("mock-id")
             test_data_directory = os.path.realpath(
-                os.path.join(os.getcwd(), "test/test_data_moviedata/{}.html".format(imdb_id)))
+                os.path.join(os.getcwd(), "data_movie_data/{}.html".format(imdb_id)))
             io_wrapper = open(test_data_directory, encoding="utf8")
-            data_model.build_soup_for_test(io_wrapper)
-            data_model.extract_process()
-            self.assertEqual(data_model.extract_credits(), expected)
+            data_model._build_soup_for_test(io_wrapper)
+            data_model._extract_process()
+            self.assertEqual(data_model._extract_credits(), expected)
             io_wrapper.close()
 
         helper_test(self.test_id_list[16], (None, None))
         helper_test(self.test_id_list[14], (None, "Birt Acres"))
         helper_test(self.test_id_list[17], (None, "Auguste Lumière, Louis Lumière"))
-        helper_test(self.test_id_list[3], ("Agustín Bravo", None))
-        helper_test(self.test_id_list[5], ("Grant Gustin, Candice Patton, Danielle Panabaker", None))
         helper_test(self.test_id_list[0], ("Carmencita", "William K.L. Dickson"))
-        helper_test(self.test_id_list[1], ("Joshua Allen, Stephen Boss, Cat Deeley", "Don Weiner"))
         helper_test(self.test_id_list[18], ("Thomas White", "George S. Fleming, Edwin S. Porter"))
         helper_test(self.test_id_list[15], ("Ruth Roland, George Larkin, Mark Strong", "Robert Ellis, Louis J. Gasnier"))
 
@@ -126,20 +114,16 @@ def helper_test(imdb_id, expected):
             """
             data_model = MovieData("mock-id")
             test_data_directory = os.path.realpath(
-                os.path.join(os.getcwd(), "test/test_data_moviedata/{}.html".format(imdb_id)))
+                os.path.join(os.getcwd(), "data_movie_data/{}.html".format(imdb_id)))
             io_wrapper = open(test_data_directory, encoding="utf8")
-            data_model.build_soup_for_test(io_wrapper)
-            data_model.extract_process()
-            self.assertEqual(data_model.extract_plot(), expected)
+            data_model._build_soup_for_test(io_wrapper)
+            data_model._extract_process()
+            self.assertEqual(data_model._extract_plot(), expected)
             io_wrapper.close()
 
         helper_test(self.test_id_list[0], "Performing on what looks like a small wooden stage, wearing a dress with a "
                                           "hoop skirt and white high-heeled pumps, Carmencita does a dance with kicks "
                                           "and twirls, a smile always on her face.")
-        helper_test(self.test_id_list[1], "Host Cat Deeley promised at the outset that the final 14 dancers will face "
-                                          "some changes and the competition would get more difficult for the final "
-                                          "seven couples...")
-        helper_test(self.test_id_list[3], None)
 
     def test_extract_rated(self):
         """
@@ -156,16 +140,14 @@ def helper_test(imdb_id, expected):
             """
             data_model = MovieData("mock-id")
             test_data_directory = os.path.realpath(
-                os.path.join(os.getcwd(), "test/test_data_moviedata/{}.html".format(imdb_id)))
+                os.path.join(os.getcwd(), "data_movie_data/{}.html".format(imdb_id)))
             io_wrapper = open(test_data_directory, encoding="utf8")
-            data_model.build_soup_for_test(io_wrapper)
-            data_model.extract_process()
-            self.assertEqual(data_model.extract_rated(), expected)
+            data_model._build_soup_for_test(io_wrapper)
+            data_model._extract_process()
+            self.assertEqual(data_model._extract_rated(), expected)
             io_wrapper.close()
 
-        helper_test(self.test_id_list[4], "TV-14")
         helper_test(self.test_id_list[0], "NOT RATED")
-        helper_test(self.test_id_list[1], None)
 
     def test_extract_release(self):
         """
@@ -182,24 +164,13 @@ def helper_test(imdb_id, expected):
             """
             data_model = MovieData("mock-id")
             test_data_directory = os.path.realpath(
-                os.path.join(os.getcwd(), "test/test_data_moviedata/{}.html".format(imdb_id)))
+                os.path.join(os.getcwd(), "data_movie_data/{}.html".format(imdb_id)))
             io_wrapper = open(test_data_directory, encoding="utf8")
-            data_model.build_soup_for_test(io_wrapper)
-            data_model.extract_process()
-            self.assertEqual(data_model.extract_release(), expected)
+            data_model._build_soup_for_test(io_wrapper)
+            data_model._extract_process()
+            self.assertEqual(data_model._extract_release(), expected)
             io_wrapper.close()
 
-        # episodes
-        helper_test(self.test_id_list[1], ('2008-07-02', None, 'episode'))
-        helper_test(self.test_id_list[3], ('2004-03-24', None, 'episode'))
-        helper_test(self.test_id_list[4], ('2015-10-06', None, 'episode'))
-
-        # tv
-        helper_test(self.test_id_list[2], (None, None, 'tv'))
-        helper_test(self.test_id_list[5], (None, None, 'tv'))
-        helper_test(self.test_id_list[6], (None, None, 'tv'))
-
-        # # movies
         helper_test(self.test_id_list[0], ('1894-03-10', 'USA', 'movie'))
         helper_test(self.test_id_list[7], ('2016-12-25', 'USA', 'movie'))
         helper_test(self.test_id_list[8], ('1892-10-28', 'France', 'movie'))
@@ -208,11 +179,6 @@ def helper_test(imdb_id, expected):
         helper_test(self.test_id_list[11], ('1913-01-10', 'Germany', 'movie'))
         helper_test(self.test_id_list[12], (None, None, 'movie'))
 
-        # tv-movies
-        helper_test(self.test_id_list[19], (None, None, 'tv-movie'))
-        helper_test(self.test_id_list[20], ('1938-07-24', None, 'tv-movie'))
-        helper_test(self.test_id_list[21], ('1947-12-09', None, 'tv-movie'))
-
     def test_extract_genre(self):
         """
         test the genre token of subtext
@@ -228,16 +194,14 @@ def helper_test(imdb_id, expected):
             """
             data_model = MovieData("mock-id")
             test_data_directory = os.path.realpath(
-                os.path.join(os.getcwd(), "test/test_data_moviedata/{}.html".format(imdb_id)))
+                os.path.join(os.getcwd(), "data_movie_data/{}.html".format(imdb_id)))
             io_wrapper = open(test_data_directory, encoding="utf8")
-            data_model.build_soup_for_test(io_wrapper)
-            data_model.extract_process()
-            self.assertEqual(data_model.extract_genre(), expected)
+            data_model._build_soup_for_test(io_wrapper)
+            data_model._extract_process()
+            self.assertEqual(data_model._extract_genre(), expected)
             io_wrapper.close()
 
         helper_test(self.test_id_list[0], 'Documentary, Short')
-        helper_test(self.test_id_list[1], 'Game-Show, Music, Reality-TV')
-        helper_test(self.test_id_list[2], 'Comedy')
         helper_test(self.test_id_list[12], None)
 
     def test_extract_runtime(self):
@@ -255,19 +219,12 @@ def helper_test(imdb_id, expected):
             """
             data_model = MovieData("mock-id")
             test_data_directory = os.path.realpath(
-                os.path.join(os.getcwd(), "test/test_data_moviedata/{}.html".format(imdb_id)))
+                os.path.join(os.getcwd(), "data_movie_data/{}.html".format(imdb_id)))
             io_wrapper = open(test_data_directory, encoding="utf8")
-            data_model.build_soup_for_test(io_wrapper)
-            data_model.extract_process()
-            self.assertEqual(data_model.extract_runtime(), expected)
+            data_model._build_soup_for_test(io_wrapper)
+            data_model._extract_process()
+            self.assertEqual(data_model._extract_runtime(), expected)
             io_wrapper.close()
 
         helper_test(self.test_id_list[0], 1)
-        helper_test(self.test_id_list[1], 60)
-        helper_test(self.test_id_list[2], 30)
-        helper_test(self.test_id_list[3], 75)
-        helper_test(self.test_id_list[4], 43)
         helper_test(self.test_id_list[12], None)
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/data/public_data/test/test_movie_rating.py b/data/public_data/test/test_movie_rating.py
new file mode 100644
index 0000000..da598f6
--- /dev/null
+++ b/data/public_data/test/test_movie_rating.py
@@ -0,0 +1,56 @@
+import unittest
+import random
+import data.utils as utils
+from bs4 import BeautifulSoup
+from urllib import request, error
+from data.etl.movierating import MovieRating
+
+
+class TestMovieRating(unittest.TestCase):
+
+    test_id_list = ['tt0000001', 'tt1234567', 'tt0460648', 'tt2345678', 'tt4346792', 'tt3107288', 'tt0395865',
+                    'tt3783958', 'tt0000004', 'tt0000007', 'tt0000502', 'tt0001304', 'tt0000869', 'tt0000019',
+                    'tt0000025', 'tt0010781', 'tt0000481', 'tt0000012', 'tt0000399', 'tt0039624', 'tt0030298',
+                    'tt0039445']
+
+    def __init__(self, *args, **kwargs):
+        super(TestMovieRating, self).__init__(*args, **kwargs)
+
+    def test_extract_trakt_tv_ratings(self):
+        self.assertEqual(MovieRating(self.test_id_list[0])._extract_trakt_rating(), ('4.66667', '9'))
+        self.assertEqual(MovieRating(self.test_id_list[1])._extract_trakt_rating(), ('0.0', '0'))
+        self.assertEqual(MovieRating(self.test_id_list[2])._extract_trakt_rating(), (None, None))
+        self.assertEqual(MovieRating(self.test_id_list[3])._extract_trakt_rating(), (None, None))
+        self.assertEqual(MovieRating(self.test_id_list[4])._extract_trakt_rating(), (None, None))
+        self.assertEqual(MovieRating(self.test_id_list[7])._extract_trakt_rating(), ('7.92902', '4973'))
+
+        # new movie data for type validation
+        rating, votes = MovieRating(self.test_id_list[7])._extract_douban_rating()
+        self.assertTrue(utils.is_numeric(rating.isnumeric()))
+        self.assertTrue(utils.is_numeric(votes.isnumeric()))
+
+    def test_extract_imdb_rating(self):
+        self.assertEqual(MovieRating(self.test_id_list[0])._extract_imdb_rating(), ('5.8', '1232'))
+        self.assertEqual(MovieRating(self.test_id_list[1])._extract_imdb_rating(), ('5.3', '13'))
+        self.assertEqual(MovieRating(self.test_id_list[2])._extract_imdb_rating(), ('6.4', '227'))
+        self.assertEqual(MovieRating(self.test_id_list[3])._extract_imdb_rating(), (None, None))
+        self.assertEqual(MovieRating(self.test_id_list[4])._extract_imdb_rating(), ('8.5', '4265'))
+        self.assertEqual(MovieRating(self.test_id_list[7])._extract_imdb_rating(), ('8.5', '170403'))
+
+        # new movie data for type validation
+        rating, votes = MovieRating(self.test_id_list[7])._extract_douban_rating()
+        self.assertTrue(utils.is_numeric(rating.isnumeric()))
+        self.assertTrue(utils.is_numeric(votes.isnumeric()))
+
+    def test_extract_douban_rating(self):
+        # old movie data for value validation
+        self.assertEqual(MovieRating(self.test_id_list[0])._extract_douban_rating(), ('7.1', '164'))
+        self.assertEqual(MovieRating(self.test_id_list[1])._extract_douban_rating(), (None, None))
+        self.assertEqual(MovieRating(self.test_id_list[2])._extract_douban_rating(), (None, None))
+        self.assertEqual(MovieRating(self.test_id_list[3])._extract_douban_rating(), (None, None))
+        self.assertEqual(MovieRating(self.test_id_list[4])._extract_douban_rating(), ('7.5', '5416'))
+
+        # new movie data for type validation
+        rating, votes = MovieRating(self.test_id_list[7])._extract_douban_rating()
+        self.assertTrue(utils.is_numeric(rating.isnumeric()))
+        self.assertTrue(utils.is_numeric(votes.isnumeric()))
diff --git a/data/test/test_extractor.py b/data/public_data/test/test_something.py
similarity index 86%
rename from data/test/test_extractor.py
rename to data/public_data/test/test_something.py
index d1bb9de..ea9b0e6 100644
--- a/data/test/test_extractor.py
+++ b/data/public_data/test/test_something.py
@@ -3,7 +3,7 @@
 import unittest
 
 
-class TestExtractor(unittest.TestCase):
+class TestMovieRating(unittest.TestCase):
 
     test_id_list = ['tt0000001', 'tt1234567', 'tt0460648', 'tt2345678', 'tt4346792', 'tt3107288', 'tt0395865',
                     'tt3783958', 'tt0000004', 'tt0000007', 'tt0000502', 'tt0001304', 'tt0000869', 'tt0000019',
@@ -11,7 +11,7 @@ class TestExtractor(unittest.TestCase):
                     'tt0039445']
 
     def __init__(self, *args, **kwargs):
-        super(TestExtractor, self).__init__(*args, **kwargs)
+        super(TestMovieRating, self).__init__(*args, **kwargs)
 
     def test_extract_movie_rating(self):
         data_model = Extractor(None).extract_movie_rating(self.test_id_list[0])
@@ -25,4 +25,4 @@ def test_extract_movie_rating(self):
                 self.assertEqual(item['score'], '7.1')
             if item['source_id'] == 3:
                 self.assertEqual(item['votes'], '9')
-                self.assertEqual(item['score'], '4.66667')
+                self.assertEqual(item['score'], '4.66667')
\ No newline at end of file
diff --git a/data/test/test_transformer.py b/data/public_data/test/test_utils.py
similarity index 94%
rename from data/test/test_transformer.py
rename to data/public_data/test/test_utils.py
index 848eec5..e11a042 100644
--- a/data/test/test_transformer.py
+++ b/data/public_data/test/test_utils.py
@@ -1,5 +1,6 @@
 import unittest
-from data.etl.transformer import Transformer
+
+from transformer import Transformer
 
 
 class TestTransformer(unittest.TestCase):
@@ -33,7 +34,4 @@ def test_transform_time_imdb(self):
         self.assertEqual(self.transformer.transform_time_imdb("1h"), "60")
         self.assertEqual(self.transformer.transform_time_imdb("2h40min"), "160")
         self.assertEqual(self.transformer.transform_time_imdb("1h   40min   "), "100")
-        self.assertEqual(self.transformer.transform_time_imdb("1h   40min"), "100")
-
-if __name__ == '__main__':
-    unittest.main()
+        self.assertEqual(self.transformer.transform_time_imdb("1h   40min"), "100")
\ No newline at end of file
diff --git a/data/public_data/transformer.py b/data/public_data/transformer.py
new file mode 100644
index 0000000..f001526
--- /dev/null
+++ b/data/public_data/transformer.py
@@ -0,0 +1,209 @@
+from pytz import timezone
+from datetime import datetime, timedelta
+
+import time
+
+
+class Transformer:
+
+    @staticmethod
+    def split_release_and_country_imdb(release_country):
+        """
+        given a string containing released date and country of a movie, return both fields
+        :param release_country: string
+        :return: string, string
+        """
+        released, country = release_country.replace(")", "").split("(")
+        released = released.strip()  # remove last white space
+        return released, country
+
+    @staticmethod
+    def transform_time_imdb(runtime):
+        """
+        given a string of time in various format from imdb, return in minutes
+        :param runtime: string
+        :return: string
+        """
+        runtime = runtime.replace(" ", "").replace("min", "")
+        if "h" in runtime:
+            [hours, minutes] = runtime.split("h")
+            if minutes == "":
+                minutes = 0
+            runtime = int(hours) * 60 + int(minutes)
+        return str(runtime)
+
+    @staticmethod
+    def transform_date_imdb(input_text):
+        """
+        given a date of string from imdb, return date in %Y-%m-%d format
+        :param input_text: string
+        :return: string
+        """
+        length_of_date = len(input_text.split(" "))
+        if length_of_date == 3:
+            input_text = datetime.strptime(input_text, '%d %B %Y').strftime('%Y-%m-%d')
+        elif length_of_date == 2:
+            input_text = datetime.strptime(input_text, '%B %Y').strftime('%Y-%m-%d')
+        elif length_of_date == 1:
+            if input_text == "":
+                return None
+            else:
+                input_text = datetime.strptime(input_text, '%Y').strftime('%Y-%m-%d')
+        return input_text
+
+    @staticmethod
+    def movie_rating_votes(votes):
+        votes = votes.replace(",", "")
+        return votes
+
+
+class GeneralTransformer:
+
+    @staticmethod
+    def get_singapore_date(n):
+        """get the date of n days from now in SGT"""
+        today = (datetime.fromtimestamp(time.time(), timezone("Singapore")) + timedelta(days=n)).strftime(
+            "%Y-%m-%d")
+        return today
+
+    @staticmethod
+    def convert_12_to_24_hour_time(time_string):
+        """
+        convert time in 12 hour string format to 24 hour string format
+        :param time_string: string
+        :return: string
+        """
+        return datetime.strptime(time_string, "%I:%M%p").strftime("%H:%M:%S")
+
+    @staticmethod
+    def build_imdb_id(i):
+        """
+        this function takes in an integer and converts it to an imdb id
+        :param i: integer
+        :return: string
+        """
+        current_imdb_number = "{0:0=7d}".format(i)
+        imdb_id = "tt" + current_imdb_number
+        return imdb_id
+
+
+class CinemaScheduleTransformer:
+
+    @staticmethod
+    def get_id_from_cathay_cinema_name(cinema_name):
+        """get cathay internal id from their cinema name for web elements"""
+        mapper = {
+            "Cathay Cineplex Amk Hub": "",
+            "Cathay Cineplex Causeway Point": "1",
+            "Cathay Cineplex Cineleisure Orchard": "2",
+            "Cathay Cineplex Downtown East": "3",
+            "Cathay Cineplex Jem": "4",
+            "The Cathay Cineplex": "5",
+            "Cathay Cineplex West Mall": "6"
+        }
+        return mapper[cinema_name]
+
+    def parse_cinema_object_to_data(self, cinema_object):
+        """
+        parse the cinema object in the format:
+        (based on self.provider, parsing strategy may vary)
+        {
+            movie_title: a list of movie schedule
+        }
+        to the format that can be consumed by loader class and
+        subsequently being stored into the database
+        {
+            "title": ...,
+            "schedule": [...],
+            "type": ...
+
+        In the process, it will complete 2 additional tasks
+        besides rearranging the dictionary -- parse the movie
+        title into title and additional information such as
+        "3D" "Dolby Digital", and match the title to imdb id
+
+        It will also return another list of imdb id found in this
+        process and subjected to movie data extraction process if
+        imdb id is not present in database
+        :return: dictionary
+        """
+        data_object = []
+
+        # parse title
+        for key, value in cinema_object.items():
+            if "Zen Zone" in key:  # strange thing in gv
+                continue
+            title, additional_info = self._movie_title_parser(key)
+            data_object.append(
+                {
+                    "title": title,
+                    "schedule": value,
+                    "type": additional_info
+                })
+        return data_object
+
+    def _movie_title_parser(self, title):
+        additional_info = []
+        if self.provider == "gv":
+            if "`" in title:
+                title = title.replace("`", "\'")
+            if "*" in title:
+                title = title.replace("*", "")
+                additional_info.append("No free pass")
+            if "(Eng Sub)" in title:
+                title = title.replace("(Eng Sub)", "")
+                additional_info.append("English sub only")
+            if "(Atmos)" in title:
+                title = title.replace("(Atmos)", "")
+                additional_info.append("Atmos")
+            if "Dessert Set" in title:
+                title = title.replace("Dessert Set", "")
+                additional_info.append("Dessert Set")
+            if "(D-Box)" in title:
+                title = title.replace("(D-Box)", "")
+                additional_info.append("(D-Box)")
+        elif self.provider == "cathay":
+            if "*" in title:
+                title = title.replace("*", "")
+                # have not figure out the meaning of *
+            if "(Dolby Digital)" in title:
+                tokens = title.split(" ")
+                splitter = tokens.index("(Dolby")
+                title = " ".join(tokens[:splitter - 1])
+                additional_info.append("Dolby Digital")
+            if "(Dolby Atmos)" in title:
+                tokens = title.split(" ")
+                splitter = tokens.index("(Dolby")
+                title = " ".join(tokens[:splitter - 1])
+                additional_info.append("Dolby Atmos")
+                title = title.replace("Atmos", "")
+        elif self.provider == "sb":
+            # special rules
+            if "Kungfu" in title:
+                title = title.replace("Kungfu", "Kung-fu")
+
+            # general rules
+            if "`" in title:
+                title = title.replace("`", "\'")
+            if "[D]" in title:
+                title = title.replace("[D]", "")
+                additional_info.append("Digital")
+            if "[IMAX]" in title:
+                title = title.replace("[IMAX]", "")
+                additional_info.append("IMAX")
+            if "[M]" in title:
+                title = title.replace("[M]", "")
+            if "[IMAX 3D]" in title:
+                title = title.replace("[IMAX 3D]", "")
+                additional_info.append("IMAX")
+                additional_info.append("3D")
+
+        else:
+            raise Exception("Invalid cinema provider")
+
+        title = title.strip()
+        additional_info = ",".join(additional_info)
+        return title, additional_info
+
+
+
diff --git a/data/utils.py b/data/public_data/utils.py
similarity index 56%
rename from data/utils.py
rename to data/public_data/utils.py
index de70119..e634608 100644
--- a/data/utils.py
+++ b/data/public_data/utils.py
@@ -1,43 +1,80 @@
-"""
-    This file contains miscellaneous functions used by all classes
-"""
-import logging
-import datetime
+from enum import Enum
+from datetime import datetime
 
 
-# ==============
-#   Logger
-# ==============
-def initialise_logger():
+class UrlFormatter(Enum):
+
+    IMDB_URL_FORMAT = "http://www.imdb.com/title/{}/"
+
+
+class InvalidMovieTypeException(Exception):
+    pass
+
+
+def split_release_and_country_imdb(release_country):
     """
-        initialise general logger, create general.log file in current directory
+    given a string containing released date and country of a movie, return both fields
+    :param release_country: string
+    :return: string, string
     """
-    logger = logging.getLogger("general_logger")
-    logger.setLevel(logging.INFO)
-    file_handler = logging.FileHandler('general.log', mode='w')
-    file_handler.setLevel(logging.DEBUG)
-    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-    file_handler.setFormatter(formatter)
-    logger.addHandler(file_handler)
-    return logger
+    released, country = release_country.replace(")", "").split("(")
+    released = released.strip()  # remove last white space
+    return released, country
 
 
-def initialise_test_logger():
+def transform_time_imdb(runtime):
     """
-        initialise a console logger for testing classes
+    given a string of time in various format from imdb, return in minutes
+    :param runtime: string
+    :return: string
     """
-    logger = logging.getLogger("test_logger")
-    logger.setLevel(logging.DEBUG)
-    return logger
+    runtime = runtime.replace(" ", "").replace("min", "")
+    if "h" in runtime:
+        [hours, minutes] = runtime.split("h")
+        if minutes == "":
+            minutes = 0
+        runtime = int(hours) * 60 + int(minutes)
+    return str(runtime)
 
 
-# ==============
-#   Movie Data
-# ==============
-def get_movie_data_dict(actors, country, director, genre, imdb_id, language, plot, poster_url,
-                        production_year, rated, released, runtime, title, type):
+def transform_date_imdb(input_text):
+    """
+    given a date of string from imdb, return date in %Y-%m-%d format
+    :param input_text: string
+    :return: string
     """
-        this is the data model of movie data.
+    length_of_date = len(input_text.split(" "))
+    if length_of_date == 3:
+        input_text = datetime.strptime(input_text, '%d %B %Y').strftime('%Y-%m-%d')
+    elif length_of_date == 2:
+        input_text = datetime.strptime(input_text, '%B %Y').strftime('%Y-%m-%d')
+    elif length_of_date == 1:
+        if input_text == "":
+            return None
+        else:
+            input_text = datetime.strptime(input_text, '%Y').strftime('%Y-%m-%d')
+    return input_text
+
+
+def get_movie_data_dict(actors, country, director, genre, imdb_id, language, plot, poster_url, production_year, rated,
+                        released, runtime, title, type):
+    """
+    this is the data model of movie data.
+    :param actors: string
+    :param country: string
+    :param director: string
+    :param genre: string
+    :param imdb_id: string
+    :param language: string
+    :param plot: string
+    :param poster_url: string
+    :param production_year: integer
+    :param rated: string
+    :param released: datetime
+    :param runtime: string
+    :param title: string
+    :param type: string
+    :return: dictionary
     """
     movie_data = {
         "movie_id": imdb_id,
@@ -77,63 +114,6 @@ def get_movie_rating_dict(score, votes, imdb_id, rating_source):
     return movie_rating
 
 
-def imdb_id_builder(i):
-    """
-        this function takes in an integer and converts it to an imdb id
-    """
-    current_imdb_number = "{0:0=7d}".format(i)
-    imdb_id = "tt" + current_imdb_number
-    return imdb_id
-
-
-def split_release_and_country_imdb(release_country):
-    """
-    given a string containing released date and country of a movie, return both fields
-    :param release_country: string
-    :return: string, string
-    """
-    released, country = release_country.replace(")", "").split("(")
-    released = released.strip()  # remove last white space
-    return released, country
 
 
-def transform_time_imdb(runtime):
-    """
-    given a string of time in various format from imdb, return in minutes
-    :param runtime: string
-    :return: string
-    """
-    runtime = runtime.replace(" ", "").replace("min", "")
-    if "h" in runtime:
-        [hours, minutes] = runtime.split("h")
-        if minutes == "":
-            minutes = 0
-        runtime = int(hours) * 60 + int(minutes)
-    return str(runtime)
-
-
-def transform_date_imdb(input_text):
-    """
-    given a date of string from imdb, return date in %Y-%m-%d format
-    :param input_text: string
-    :return: string
-    """
-    length_of_date = len(input_text.split(" "))
-    if length_of_date == 3:
-        input_text = datetime.datetime.strptime(input_text, '%d %B %Y').strftime('%Y-%m-%d')
-    elif length_of_date == 2:
-        input_text = datetime.datetime.strptime(input_text, '%B %Y').strftime('%Y-%m-%d')
-    elif length_of_date == 1:
-        if input_text == "":
-            return None
-        else:
-            input_text = datetime.datetime.strptime(input_text, '%Y').strftime('%Y-%m-%d')
-    return input_text
-
 
-def is_numeric(number):
-    try:
-        float(number)
-    except ValueError:
-        return False
-    return True
diff --git a/data/recommedation_algo/__init__.py b/data/recommedation_algo/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/data/test/test_cinemalist.py b/data/test/test_cinemalist.py
deleted file mode 100644
index 54e6373..0000000
--- a/data/test/test_cinemalist.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from data.etl.cinemalist import CinemaList
-
-import unittest
-
-
-class TestCinemaList(unittest.TestCase):
-
-    def setUp(self):
-        self.cinema_list = CinemaList()
-
-    # def test_get_golden_village(self):
-    #     cinemalist = CinemaList()
-    #     cinemalist.get_golden_village_cinema_list()
-
-    # def test_get_cathay(self):
-    #     self.cinema_list.get_cathay()
-
-    # def test_get_shaw_brother(self):
-    #     self.cinema_list.get_shaw_brother()
diff --git a/data/test/test_movierating.py b/data/test/test_movierating.py
deleted file mode 100644
index fe2c5b1..0000000
--- a/data/test/test_movierating.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import unittest
-import random
-import data.utils as utils
-from bs4 import BeautifulSoup
-from urllib import request, error
-from data.etl.movierating import MovieRating
-
-
-class TestMovieRating(unittest.TestCase):
-
-    test_id_list = ['tt0000001', 'tt1234567', 'tt0460648', 'tt2345678', 'tt4346792', 'tt3107288', 'tt0395865',
-                    'tt3783958', 'tt0000004', 'tt0000007', 'tt0000502', 'tt0001304', 'tt0000869', 'tt0000019',
-                    'tt0000025', 'tt0010781', 'tt0000481', 'tt0000012', 'tt0000399', 'tt0039624', 'tt0030298',
-                    'tt0039445']
-
-    def __init__(self, *args, **kwargs):
-        super(TestMovieRating, self).__init__(*args, **kwargs)
-
-    def test_extract_trakt_tv_ratings(self):
-        self.assertEqual(MovieRating(self.test_id_list[0]).extract_trakt_rating(), ('4.66667', '9'))
-        self.assertEqual(MovieRating(self.test_id_list[1]).extract_trakt_rating(), ('0.0', '0'))
-        self.assertEqual(MovieRating(self.test_id_list[2]).extract_trakt_rating(), (None, None))
-        self.assertEqual(MovieRating(self.test_id_list[3]).extract_trakt_rating(), (None, None))
-        self.assertEqual(MovieRating(self.test_id_list[4]).extract_trakt_rating(), (None, None))
-        self.assertEqual(MovieRating(self.test_id_list[7]).extract_trakt_rating(), ('7.92902', '4973'))
-
-        # new movie data for type validation
-        rating, votes = MovieRating(self.test_id_list[7]).extract_douban_rating()
-        self.assertTrue(utils.is_numeric(rating.isnumeric()))
-        self.assertTrue(utils.is_numeric(votes.isnumeric()))
-
-    def test_extract_imdb_rating(self):
-        self.assertEqual(MovieRating(self.test_id_list[0]).extract_imdb_rating(), ('5.8', '1232'))
-        self.assertEqual(MovieRating(self.test_id_list[1]).extract_imdb_rating(), ('5.3', '13'))
-        self.assertEqual(MovieRating(self.test_id_list[2]).extract_imdb_rating(), ('6.4', '227'))
-        self.assertEqual(MovieRating(self.test_id_list[3]).extract_imdb_rating(), (None, None))
-        self.assertEqual(MovieRating(self.test_id_list[4]).extract_imdb_rating(), ('8.5', '4265'))
-        self.assertEqual(MovieRating(self.test_id_list[7]).extract_imdb_rating(), ('8.5', '170403'))
-
-        # new movie data for type validation
-        rating, votes = MovieRating(self.test_id_list[7]).extract_douban_rating()
-        self.assertTrue(utils.is_numeric(rating.isnumeric()))
-        self.assertTrue(utils.is_numeric(votes.isnumeric()))
-
-    def test_extract_douban_rating(self):
-        # old movie data for value validation
-        self.assertEqual(MovieRating(self.test_id_list[0]).extract_douban_rating(), ('7.1', '164'))
-        self.assertEqual(MovieRating(self.test_id_list[1]).extract_douban_rating(), (None, None))
-        self.assertEqual(MovieRating(self.test_id_list[2]).extract_douban_rating(), (None, None))
-        self.assertEqual(MovieRating(self.test_id_list[3]).extract_douban_rating(), (None, None))
-        self.assertEqual(MovieRating(self.test_id_list[4]).extract_douban_rating(), ('7.5', '5416'))
-
-        # new movie data for type validation
-        rating, votes = MovieRating(self.test_id_list[7]).extract_douban_rating()
-        self.assertTrue(utils.is_numeric(rating.isnumeric()))
-        self.assertTrue(utils.is_numeric(votes.isnumeric()))