Skip to content

Commit

Permalink
adding test cases (#63)
Browse files Browse the repository at this point in the history
* adding test cases

* fixing flake8 error
  • Loading branch information
codeperfectplus authored Oct 28, 2022
1 parent 87ff9c3 commit 0588b73
Show file tree
Hide file tree
Showing 12 changed files with 110 additions and 36 deletions.
47 changes: 47 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Use the latest 2.1 version of CircleCI pipeline process engine.
# See: https://circleci.com/docs/2.0/configuration-reference
version: 2.1

# Orbs are reusable packages of CircleCI configuration that you may share across projects, enabling you to create encapsulated, parameterized commands, jobs, and executors that can be used across multiple projects.
# See: https://circleci.com/docs/2.0/orb-intro/
orbs:
# The python orb contains a set of prepackaged CircleCI configuration you can use repeatedly in your configuration files
# Orb commands and jobs help you with common scripting around a language/tool
# so you dont have to copy and paste it everywhere.
# See the orb documentation here: https://circleci.com/developer/orbs/orb/circleci/python
python: circleci/python@1.5.0

# Define a job to be invoked later in a workflow.
# See: https://circleci.com/docs/2.0/configuration-reference/#jobs
jobs:
build-and-test: # This is the name of the job, feel free to change it to better match what you're trying to do!
# These next lines defines a Docker executors: https://circleci.com/docs/2.0/executor-types/
# You can specify an image from Dockerhub or use one of the convenience images from CircleCI's Developer Hub
# A list of available CircleCI Docker convenience images are available here: https://circleci.com/developer/images/image/cimg/python
# The executor is the environment in which the steps below will be executed - below will use a python 3.10.2 container
# Change the version below to your required version of python
docker:
- image: cimg/python:3.10.2
# Checkout the code as the first step. This is a dedicated CircleCI step.
# The python orb's install-packages step will install the dependencies from a Pipfile via Pipenv by default.
# Here we're making sure we use just use the system-wide pip. By default it uses the project root's requirements.txt.
# Then run your tests!
# CircleCI will report the results back to your VCS provider.
steps:
- checkout
- python/install-packages:
pkg-manager: pip
# app-dir: ~/project/package-directory/ # If you're requirements.txt isn't in the root directory.
# pip-dependency-file: test-requirements.txt # if you have a different name for your requirements file, maybe one that combines your runtime and test requirements.
- run:
name: Run tests
# This assumes pytest is installed via the install-package step above
command: pytest

# Invoke jobs via workflows
# See: https://circleci.com/docs/2.0/configuration-reference/#workflows
workflows:
sample: # This is the name of the workflow, feel free to change it to better match your workflow.
# Inside the workflow, you define the jobs you want to run.
jobs:
- build-and-test
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ All notable changes to this project will be documented in this file. The format

## added

- [x]
- [x] Extended pdf_parser to extract table of contents
- [x]

## [V2.0.2] - 22-10-2022

Expand Down
8 changes: 8 additions & 0 deletions assets/output.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"txt": {"0": "this is a sample file"},
"pdf": {"0": "this is a sample file"},
"epub": {"0": "this is a sample file"},
"odt": {"0": "this is a sample file"},
"mobi": {"0": "this is a sample file"},
"docs": {"0": "this is a sample file"}
}
Binary file added assets/sample.doc
Binary file not shown.
Binary file added assets/sample.epub
Binary file not shown.
Binary file added assets/sample.mobi
Binary file not shown.
Binary file added assets/sample.odt
Binary file not shown.
Binary file added assets/sample.pdf
Binary file not shown.
1 change: 1 addition & 0 deletions assets/sample.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
this is a sample file
16 changes: 9 additions & 7 deletions audiobook/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,21 @@ def get_library(self):
)
return total_books

def create_json_book(self, input_book_path, password=None, extraction_engine=None):
def create_json_book(self, input_book_path, password=None, extraction_engine=None, load_from_library=False):
"""method to create json book from input file
it calls respective method based on file format"""
json_filename = (
os.path.basename(input_book_path).split(".")[0] + ".json"
)

if os.path.exists(os.path.join(BOOK_DIR, json_filename)):
metadata = {"book_name": json_filename.split(".")[0]}
print("Book already exists in library, reading from library")
json_book = load_json(os.path.join(BOOK_DIR, json_filename))
metadata["pages"] = len(json_book)
return json_book, metadata
if load_from_library:
print("Loading book from library")
if os.path.exists(os.path.join(BOOK_DIR, json_filename)):
metadata = {"book_name": json_filename.split(".")[0]}
print("Book already exists in library, reading from library")
json_book = load_json(os.path.join(BOOK_DIR, json_filename))
metadata["pages"] = len(json_book)
return json_book, metadata

elif input_book_path.endswith(".odt"):
json_book, metadata = odt_to_json(input_book_path)
Expand Down
14 changes: 10 additions & 4 deletions audiobook/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def text_preprocessing(input_text):
regex = re.compile(r"[\n\r\t]")
preprocessed_text = regex.sub("", input_text)
preprocessed_text = re.sub(" +", " ", preprocessed_text)
preprocessed_text = preprocessed_text.strip()
return preprocessed_text


Expand Down Expand Up @@ -79,8 +80,8 @@ def pdf_to_json(input_book_path, password=None, extraction_engine="pypdf2"):
"""sub method to create json book from pdf file"""
metadata = {}
json_book = {}

if extraction_engine == "pdfminer":
basename = os.path.basename(input_book_path).split(".")[0]
if extraction_engine is None or extraction_engine == "pdfminer":
print("Using pdfminer")
pdf_parser = PdfMinerDocParser()
elif extraction_engine == "pypdf2":
Expand All @@ -96,14 +97,17 @@ def pdf_to_json(input_book_path, password=None, extraction_engine="pypdf2"):
page_num = i // 2000
json_book[str(page_num)] = text[i: i + 2000]

metadata = len(json_book)
metadata['book_name'] = basename
metadata['pages'] = len(json_book)
return json_book, metadata


def odt_to_json(input_book_path):
"""sub method to create json book from odt file"""
metadata = {}
json_book = {}
basename = os.path.basename(input_book_path).split(".")[0]

textdoc = load(input_book_path)
allparas = textdoc.getElementsByType(text.P)
output_text = ""
Expand All @@ -115,7 +119,9 @@ def odt_to_json(input_book_path):
page_num = i // 2000
json_book[str(page_num)] = output_text[i: i + 2000]

metadata = len(json_book)
metadata['book_name'] = basename
metadata['pages'] = len(json_book)

return json_book, metadata


Expand Down
57 changes: 33 additions & 24 deletions tests/test_audiobook.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,38 @@

from audiobook import AudioBook

import json


def load_json(filename):
with open(filename, "r") as fp:
return json.load(fp)


output = load_json("assets/output.json")

ab = AudioBook(speed="normal")


class TestAudioBook(unittest.TestCase):
def test_invalidPathNumeric(self): # TODO #41: Update tests
with self.assertRaises(IOError):
ab = AudioBook("normal")
ab.txt_to_json(123)

def test_openDirectory(self): # TODO #41: Update tests
with self.assertRaises(IsADirectoryError):
ab = AudioBook("normal")
ab.txt_to_json("/")

def test_fileDoesNotExist(self): # TODO #41: Update tests
with self.assertRaises(FileNotFoundError):
ab = AudioBook("normal")
ab.txt_to_json("oiawhgaiurgieurghergerg")

def test_openDirectory(self): # noqa: F811 # TODO #41: Update tests
with self.assertRaises(IsADirectoryError):
ab = AudioBook()
ab.read_book("/")

def test_fileDoesNotExist(self): # noqa: F811 # TODO #41: Update tests
with self.assertRaises(FileNotFoundError):
ab = AudioBook()
ab.read_book("oiawhgaiurgieurghergerg")

def test_txt_to_json_pdf_miner(self):
self.assertEqual(ab.create_json_book("assets/sample.txt"), (output['txt'], {'book_name': 'sample', 'pages': 1}))

def test_pdf_to_json_pdf_miner(self):
self.assertEqual(ab.create_json_book("assets/sample.pdf", extraction_engine="pdfminer"), (output['pdf'], {'book_name': 'sample', 'pages': 1}))

def test_pdf_to_json_pypdf2(self):
self.assertEqual(ab.create_json_book("assets/sample.pdf", extraction_engine="pypdf2"), (output['pdf'], {'book_name': 'sample', 'pages': 1}))

def test_odt_to_json(self):
self.assertEqual(ab.create_json_book("assets/sample.odt"), (output['odt'], {'book_name': 'sample', 'pages': 1}))

def test_mobi_to_json(self):
self.assertEqual(ab.create_json_book("assets/sample.mobi"), (output['mobi'], {'book_name': 'sample', 'pages': 1}))

# def test_docs_to_json(self):
# self.assertEqual(ab.create_json_book("assets/sample.doc"), (output['docs'], {'book_name': 'sample', 'pages': 1}))

# def test_epub_to_json(self): # epub test failing
# self.assertEqual(ab.create_json_book("assets/sample.epub"), (output['epub'], {'book_name': 'sample', 'pages': 1}))

0 comments on commit 0588b73

Please sign in to comment.