Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: docs/docx file support adding #40

Merged
merged 3 commits into from
Oct 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion audiobook/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
supported_file_types = (".pdf", ".txt", ".epub")
supported_file_types = (".pdf", ".txt", ".epub", ".docx", ".doc")
speed_dict = {
"slow": 100,
"normal": 150,
Expand Down
3 changes: 3 additions & 0 deletions audiobook/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from audiobook.utils import mobi_to_json
from audiobook.utils import epub_to_json
from audiobook.utils import html_to_json
from audiobook.utils import docs_to_json


from audiobook.config import speed_dict
Expand Down Expand Up @@ -82,6 +83,8 @@ def create_json_book(self, input_book_path, password=None):
json_book, metadata = mobi_to_json(input_book_path)
elif input_book_path.startswith("http"):
json_book, metadata = html_to_json(input_book_path)
elif input_book_path.endswith((".docx", ".doc")):
json_book, metadata = docs_to_json(input_book_path)

write_json_file(json_book, os.path.join(BOOK_DIR, json_filename))

Expand Down
15 changes: 13 additions & 2 deletions audiobook/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import re
import os

import docx2txt
import mobi
import json
import PyPDF2
Expand Down Expand Up @@ -110,10 +112,19 @@ def txt_to_json(input_book_path):
metadata["book_name"] = book_name
return json_book, metadata


def docs_to_json(input_book_path):
""" sub method to create json book from docs file """
pass
metadata = {}
json_book = {}
book_name = os.path.basename(input_book_path).split(".")[0]
book_data = docx2txt.process(input_book_path)
for i in range(0, len(book_data), 2000):
page_num = i // 2000
json_book[str(page_num)] = book_data[i:i + 2000]

metadata["pages"] = len(json_book)
metadata["book_name"] = book_name
return json_book, metadata

def epub_to_json(input_book_path):
metadata = {}
Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@ ebooklib==0.17.1
beautifulsoup4==4.11.1
html2text==2020.1.16
mobi==0.3.3
docx2txt>=0.8
requests>=2.28.1
tqdm>=4.64.1