adding test cases (#63)

* adding test cases * fixing flake8 error
Py-Contributors · Oct 28, 2022 · 0588b73 · 0588b73
1 parent 87ff9c3
commit 0588b73
Show file tree

Hide file tree

Showing 12 changed files with 110 additions and 36 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -0,0 +1,47 @@
+# Use the latest 2.1 version of CircleCI pipeline process engine.
+# See: https://circleci.com/docs/2.0/configuration-reference
+version: 2.1
+
+# Orbs are reusable packages of CircleCI configuration that you may share across projects, enabling you to create encapsulated, parameterized commands, jobs, and executors that can be used across multiple projects.
+# See: https://circleci.com/docs/2.0/orb-intro/
+orbs:
+  # The python orb contains a set of prepackaged CircleCI configuration you can use repeatedly in your configuration files
+  # Orb commands and jobs help you with common scripting around a language/tool
+  # so you dont have to copy and paste it everywhere.
+  # See the orb documentation here: https://circleci.com/developer/orbs/orb/circleci/python
+  python: circleci/python@1.5.0
+
+# Define a job to be invoked later in a workflow.
+# See: https://circleci.com/docs/2.0/configuration-reference/#jobs
+jobs:
+  build-and-test: # This is the name of the job, feel free to change it to better match what you're trying to do!
+    # These next lines defines a Docker executors: https://circleci.com/docs/2.0/executor-types/
+    # You can specify an image from Dockerhub or use one of the convenience images from CircleCI's Developer Hub
+    # A list of available CircleCI Docker convenience images are available here: https://circleci.com/developer/images/image/cimg/python
+    # The executor is the environment in which the steps below will be executed - below will use a python 3.10.2 container
+    # Change the version below to your required version of python
+    docker:
+      - image: cimg/python:3.10.2
+    # Checkout the code as the first step. This is a dedicated CircleCI step.
+    # The python orb's install-packages step will install the dependencies from a Pipfile via Pipenv by default.
+    # Here we're making sure we use just use the system-wide pip. By default it uses the project root's requirements.txt.
+    # Then run your tests!
+    # CircleCI will report the results back to your VCS provider.
+    steps:
+      - checkout
+      - python/install-packages:
+          pkg-manager: pip
+          # app-dir: ~/project/package-directory/  # If you're requirements.txt isn't in the root directory.
+          # pip-dependency-file: test-requirements.txt  # if you have a different name for your requirements file, maybe one that combines your runtime and test requirements.
+      - run:
+          name: Run tests
+          # This assumes pytest is installed via the install-package step above
+          command: pytest
+
+# Invoke jobs via workflows
+# See: https://circleci.com/docs/2.0/configuration-reference/#workflows
+workflows:
+  sample: # This is the name of the workflow, feel free to change it to better match your workflow.
+    # Inside the workflow, you define the jobs you want to run.
+    jobs:
+      - build-and-test
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,7 +6,8 @@ All notable changes to this project will be documented in this file. The format
 
 ## added
 
-- [x]
+- [x] Extended pdf_parser to extract table of contents
+- [x]  
 
 ## [V2.0.2] - 22-10-2022    
 

diff --git a/assets/output.json b/assets/output.json
@@ -0,0 +1,8 @@
+{
+    "txt": {"0": "this is a sample file"},
+    "pdf": {"0": "this is a sample file"},
+    "epub": {"0": "this is a sample file"},
+    "odt": {"0": "this is a sample file"},
+    "mobi": {"0": "this is a sample file"},
+    "docs": {"0": "this is a sample file"}
+}
diff --git a/assets/sample.doc b/assets/sample.doc
diff --git a/assets/sample.epub b/assets/sample.epub
diff --git a/assets/sample.mobi b/assets/sample.mobi
diff --git a/assets/sample.odt b/assets/sample.odt
diff --git a/assets/sample.pdf b/assets/sample.pdf
diff --git a/assets/sample.txt b/assets/sample.txt
@@ -0,0 +1 @@
+this is a sample file
diff --git a/audiobook/main.py b/audiobook/main.py
@@ -56,19 +56,21 @@ def get_library(self):
         )
         return total_books
 
-    def create_json_book(self, input_book_path, password=None, extraction_engine=None):
+    def create_json_book(self, input_book_path, password=None, extraction_engine=None, load_from_library=False):
         """method to create json book from input file
         it calls respective method based on file format"""
         json_filename = (
             os.path.basename(input_book_path).split(".")[0] + ".json"
         )
 
-        if os.path.exists(os.path.join(BOOK_DIR, json_filename)):
-            metadata = {"book_name": json_filename.split(".")[0]}
-            print("Book already exists in library, reading from library")
-            json_book = load_json(os.path.join(BOOK_DIR, json_filename))
-            metadata["pages"] = len(json_book)
-            return json_book, metadata
+        if load_from_library:
+            print("Loading book from library")
+            if os.path.exists(os.path.join(BOOK_DIR, json_filename)):
+                metadata = {"book_name": json_filename.split(".")[0]}
+                print("Book already exists in library, reading from library")
+                json_book = load_json(os.path.join(BOOK_DIR, json_filename))
+                metadata["pages"] = len(json_book)
+                return json_book, metadata
 
         elif input_book_path.endswith(".odt"):
             json_book, metadata = odt_to_json(input_book_path)

diff --git a/audiobook/utils.py b/audiobook/utils.py
@@ -31,6 +31,7 @@ def text_preprocessing(input_text):
     regex = re.compile(r"[\n\r\t]")
     preprocessed_text = regex.sub("", input_text)
     preprocessed_text = re.sub(" +", " ", preprocessed_text)
+    preprocessed_text = preprocessed_text.strip()
     return preprocessed_text
 
 
@@ -79,8 +80,8 @@ def pdf_to_json(input_book_path, password=None, extraction_engine="pypdf2"):
     """sub method to create json book from pdf file"""
     metadata = {}
     json_book = {}
-
-    if extraction_engine == "pdfminer":
+    basename = os.path.basename(input_book_path).split(".")[0]
+    if extraction_engine is None or extraction_engine == "pdfminer":
         print("Using pdfminer")
         pdf_parser = PdfMinerDocParser()
     elif extraction_engine == "pypdf2":
@@ -96,14 +97,17 @@ def pdf_to_json(input_book_path, password=None, extraction_engine="pypdf2"):
         page_num = i // 2000
         json_book[str(page_num)] = text[i: i + 2000]
 
-    metadata = len(json_book)
+    metadata['book_name'] = basename
+    metadata['pages'] = len(json_book)
     return json_book, metadata
 
 
 def odt_to_json(input_book_path):
     """sub method to create json book from odt file"""
     metadata = {}
     json_book = {}
+    basename = os.path.basename(input_book_path).split(".")[0]
+
     textdoc = load(input_book_path)
     allparas = textdoc.getElementsByType(text.P)
     output_text = ""
@@ -115,7 +119,9 @@ def odt_to_json(input_book_path):
         page_num = i // 2000
         json_book[str(page_num)] = output_text[i: i + 2000]
 
-    metadata = len(json_book)
+    metadata['book_name'] = basename
+    metadata['pages'] = len(json_book)
+
     return json_book, metadata
 
 

diff --git a/tests/test_audiobook.py b/tests/test_audiobook.py
@@ -2,29 +2,38 @@
 
 from audiobook import AudioBook
 
+import json
+
+
+def load_json(filename):
+    with open(filename, "r") as fp:
+        return json.load(fp)
+
+
+output = load_json("assets/output.json")
+
+ab = AudioBook(speed="normal")
+
 
 class TestAudioBook(unittest.TestCase):
-    def test_invalidPathNumeric(self):  # TODO #41: Update tests
-        with self.assertRaises(IOError):
-            ab = AudioBook("normal")
-            ab.txt_to_json(123)
-
-    def test_openDirectory(self):  # TODO #41: Update tests
-        with self.assertRaises(IsADirectoryError):
-            ab = AudioBook("normal")
-            ab.txt_to_json("/")
-
-    def test_fileDoesNotExist(self):  # TODO #41: Update tests
-        with self.assertRaises(FileNotFoundError):
-            ab = AudioBook("normal")
-            ab.txt_to_json("oiawhgaiurgieurghergerg")
-
-    def test_openDirectory(self):  # noqa: F811  # TODO #41: Update tests
-        with self.assertRaises(IsADirectoryError):
-            ab = AudioBook()
-            ab.read_book("/")
-
-    def test_fileDoesNotExist(self):  # noqa: F811  # TODO #41: Update tests
-        with self.assertRaises(FileNotFoundError):
-            ab = AudioBook()
-            ab.read_book("oiawhgaiurgieurghergerg")
+
+    def test_txt_to_json_pdf_miner(self):
+        self.assertEqual(ab.create_json_book("assets/sample.txt"), (output['txt'], {'book_name': 'sample', 'pages': 1}))
+
+    def test_pdf_to_json_pdf_miner(self):
+        self.assertEqual(ab.create_json_book("assets/sample.pdf", extraction_engine="pdfminer"), (output['pdf'], {'book_name': 'sample', 'pages': 1}))
+
+    def test_pdf_to_json_pypdf2(self):
+        self.assertEqual(ab.create_json_book("assets/sample.pdf", extraction_engine="pypdf2"), (output['pdf'], {'book_name': 'sample', 'pages': 1}))
+
+    def test_odt_to_json(self):
+        self.assertEqual(ab.create_json_book("assets/sample.odt"), (output['odt'], {'book_name': 'sample', 'pages': 1}))
+
+    def test_mobi_to_json(self):
+        self.assertEqual(ab.create_json_book("assets/sample.mobi"), (output['mobi'], {'book_name': 'sample', 'pages': 1}))
+
+    # def test_docs_to_json(self):
+    #     self.assertEqual(ab.create_json_book("assets/sample.doc"), (output['docs'], {'book_name': 'sample', 'pages': 1}))
+
+    # def test_epub_to_json(self): # epub test failing
+    #     self.assertEqual(ab.create_json_book("assets/sample.epub"), (output['epub'], {'book_name': 'sample', 'pages': 1}))