When active, the encoding among other attributes of all EPUB files processed by CWA will be checked and fixed to ensure maximum compatibility with Amazon's Send-to-Kindle Service (TLDR: if you've ever had EPUB files that Amazon just constantly rejects for seemingly no reason, this should prevent that from happening again)
+
This tool was adapted from the kindle-epub-fix.netlify.app tool made by innocenat
+
+
Automatic Backup Settings
{% if cwa_settings['auto_backup_imports'] %}
diff --git a/root/etc/s6-overlay/s6-rc.d/cwa-auto-library/run b/root/etc/s6-overlay/s6-rc.d/cwa-auto-library/run
index d8f99d6..e7263ed 100644
--- a/root/etc/s6-overlay/s6-rc.d/cwa-auto-library/run
+++ b/root/etc/s6-overlay/s6-rc.d/cwa-auto-library/run
@@ -1,6 +1,6 @@
#!/bin/bash
-python3 /app/calibre-web-automated/scripts/auto-library.py
+python3 /app/calibre-web-automated/scripts/auto_library.py
if [[ $? == 1 ]]
then
diff --git a/root/etc/s6-overlay/s6-rc.d/cwa-auto-zipper/run b/root/etc/s6-overlay/s6-rc.d/cwa-auto-zipper/run
index 1b9ff0e..35d6ec8 100644
--- a/root/etc/s6-overlay/s6-rc.d/cwa-auto-zipper/run
+++ b/root/etc/s6-overlay/s6-rc.d/cwa-auto-zipper/run
@@ -40,7 +40,7 @@ do
echo "[cwa-auto-zipper] Next run in $SECS seconds."
sleep $SECS & # We sleep in the background to make the script interruptible via SIGTERM when running in docker
wait $!
- python3 /app/calibre-web-automated/scripts/auto-zip.py
+ python3 /app/calibre-web-automated/scripts/auto_zip.py
if [[ $? == 1 ]]
then
echo "[cwa-auto-zipper] Error occurred during script initialisation (see errors above)."
diff --git a/root/etc/s6-overlay/s6-rc.d/cwa-ingest-service/run b/root/etc/s6-overlay/s6-rc.d/cwa-ingest-service/run
index 07b9e80..5958eee 100644
--- a/root/etc/s6-overlay/s6-rc.d/cwa-ingest-service/run
+++ b/root/etc/s6-overlay/s6-rc.d/cwa-ingest-service/run
@@ -13,6 +13,6 @@ echo "[cwa-ingest-service]: Watching folder: $WATCH_FOLDER"
s6-setuidgid abc inotifywait -m -r --format="%e %w%f" -e close_write -e moved_to "$WATCH_FOLDER" |
while read -r events filepath ; do
echo "[cwa-ingest-service]: New files detected - $filepath - Starting Ingest Processor..."
- python3 /app/calibre-web-automated/scripts/ingest-processor.py "$filepath"
+ python3 /app/calibre-web-automated/scripts/ingest_processor.py "$filepath"
done
diff --git a/root/etc/s6-overlay/s6-rc.d/cwa-init-remove-locks/run b/root/etc/s6-overlay/s6-rc.d/cwa-init-remove-locks/run
index 9ffe7ce..1de2704 100644
--- a/root/etc/s6-overlay/s6-rc.d/cwa-init-remove-locks/run
+++ b/root/etc/s6-overlay/s6-rc.d/cwa-init-remove-locks/run
@@ -1,6 +1,6 @@
#!/bin/bash
-declare -a lockFiles=("ingest-processor.lock" "convert-library.lock" "cover_enforcer.lock")
+declare -a lockFiles=("ingest_processor.lock" "convert_library.lock" "cover_enforcer.lock")
echo "[cwa-init-remove-locks] Checking for leftover lock files from previous instance..."
diff --git a/scripts/auto-library.py b/scripts/auto_library.py
similarity index 100%
rename from scripts/auto-library.py
rename to scripts/auto_library.py
diff --git a/scripts/auto-zip.py b/scripts/auto_zip.py
similarity index 100%
rename from scripts/auto-zip.py
rename to scripts/auto_zip.py
diff --git a/scripts/convert-library.py b/scripts/convert_library.py
similarity index 93%
rename from scripts/convert-library.py
rename to scripts/convert_library.py
index 01591b1..ecd740b 100644
--- a/scripts/convert-library.py
+++ b/scripts/convert_library.py
@@ -11,6 +11,7 @@
import atexit
from cwa_db import CWA_DB
+from kindle_epub_fixer import EPUBFixer
logger = logging.getLogger(__name__)
@@ -25,7 +26,7 @@ def print_and_log(string) -> None:
# already running, then the script is closed, the user is notified and the program
# exits with code 2
try:
- lock = open(tempfile.gettempdir() + '/convert-library.lock', 'x')
+ lock = open(tempfile.gettempdir() + '/convert_library.lock', 'x')
lock.close()
except FileExistsError:
print_and_log("[convert-library]: CANCELLING... convert-library was initiated but is already running")
@@ -34,7 +35,7 @@ def print_and_log(string) -> None:
# Defining function to delete the lock on script exit
def removeLock():
- os.remove(tempfile.gettempdir() + '/convert-library.lock')
+ os.remove(tempfile.gettempdir() + '/convert_library.lock')
# Will automatically run when the script exits
atexit.register(removeLock)
@@ -60,9 +61,10 @@ def __init__(self) -> None: #args
self.cwa_settings = self.db.cwa_settings
self.target_format = self.cwa_settings['auto_convert_target_format']
self.convert_ignored_formats = self.cwa_settings['auto_convert_ignored_formats']
+ self.kindle_epub_fixer = self.cwa_settings['kindle_epub_fixer']
- self.supported_book_formats = ['azw', 'azw3', 'azw4', 'cbz', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'docx', 'epub', 'fb2', 'fbz', 'html', 'htmlz', 'lit', 'lrf', 'mobi', 'odt', 'pdf', 'prc', 'pdb', 'pml', 'rb', 'rtf', 'snb', 'tcr', 'txt', 'txtz']
- self.hierarchy_of_success = ['epub', 'lit', 'mobi', 'azw', 'azw3', 'fb2', 'fbz', 'azw4', 'prc', 'odt', 'lrf', 'pdb', 'cbz', 'pml', 'rb', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'snb', 'tcr', 'pdf', 'docx', 'rtf', 'html', 'htmlz', 'txtz', 'txt']
+ self.supported_book_formats = {'azw', 'azw3', 'azw4', 'cbz', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'docx', 'epub', 'fb2', 'fbz', 'html', 'htmlz', 'lit', 'lrf', 'mobi', 'odt', 'pdf', 'prc', 'pdb', 'pml', 'rb', 'rtf', 'snb', 'tcr', 'txt', 'txtz'}
+ self.hierarchy_of_success = {'epub', 'lit', 'mobi', 'azw', 'azw3', 'fb2', 'fbz', 'azw4', 'prc', 'odt', 'lrf', 'pdb', 'cbz', 'pml', 'rb', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'snb', 'tcr', 'pdf', 'docx', 'rtf', 'html', 'htmlz', 'txtz', 'txt'}
self.current_book = 1
self.ingest_folder, self.library_dir, self.tmp_conversion_dir = self.get_dirs('/app/calibre-web-automated/dirs.json')
@@ -128,8 +130,8 @@ def convert_library(self):
continue
if self.target_format == "kepub":
- successful, target_filepath = self.convert_to_kepub(filename, file_extension)
- if not successful:
+ convert_successful, target_filepath = self.convert_to_kepub(filename, file_extension)
+ if not convert_successful:
print_and_log(f"[convert-library]: Conversion of {os.path.basename(file)} was unsuccessful. See the following error:\n{e}")
self.current_book += 1
continue
@@ -152,7 +154,13 @@ def convert_library(self):
self.current_book += 1
continue
- try: # Import converted book to library. As of V3.0.0, "add_format" is used instead of add
+ if self.target_format == "epub" and self.kindle_epub_fixer:
+ try:
+ EPUBFixer(target_filepath).process()
+ except Exception as e:
+ print_and_log(f"[convert-library] An error occurred while processing {os.path.basename(target_filepath)} with the kindle-epub-fixer. See the following error:\n{e}")
+
+ try: # Import converted book to library. As of V3.0.0, "add_format" is used instead of "add"
subprocess.run(["calibredb", "add_format", book_id, target_filepath, f"--library-path={self.library_dir}"], check=True)
if self.cwa_settings['auto_backup_imports']:
diff --git a/scripts/cwa_db.py b/scripts/cwa_db.py
index 43d7499..cdbb230 100644
--- a/scripts/cwa_db.py
+++ b/scripts/cwa_db.py
@@ -34,7 +34,8 @@ def __init__(self, verbose=False):
"auto_convert_target_format": "epub",
"auto_convert_ignored_formats":"",
"auto_ingest_ignored_formats":"",
- "auto_metadata_enforcement":1}
+ "auto_metadata_enforcement":1,
+ "kindle_epub_fixer":1}
self.tables, self.schema = self.make_tables()
self.ensure_settings_schema_match()
diff --git a/scripts/cwa_schema.sql b/scripts/cwa_schema.sql
index 3e85ad3..5c85456 100644
--- a/scripts/cwa_schema.sql
+++ b/scripts/cwa_schema.sql
@@ -31,5 +31,6 @@ CREATE TABLE IF NOT EXISTS cwa_settings(
auto_convert_target_format TEXT DEFAULT "epub" NOT NULL,
auto_convert_ignored_formats TEXT DEFAULT "" NOT NULL,
auto_ingest_ignored_formats TEXT DEFAULT "" NOT NULL,
- auto_metadata_enforcement SMALLINT DEFAULT 1 NOT NULL
+ auto_metadata_enforcement SMALLINT DEFAULT 1 NOT NULL,
+ kindle_epub_fixer SMALLINT DEFAULT 1 NOT NULL
);
\ No newline at end of file
diff --git a/scripts/ingest-processor.py b/scripts/ingest_processor.py
similarity index 76%
rename from scripts/ingest-processor.py
rename to scripts/ingest_processor.py
index 7c8e939..09dacfd 100644
--- a/scripts/ingest-processor.py
+++ b/scripts/ingest_processor.py
@@ -9,13 +9,14 @@
from pathlib import Path
from cwa_db import CWA_DB
+from kindle_epub_fixer import EPUBFixer
# Creates a lock file unless one already exists meaning an instance of the script is
# already running, then the script is closed, the user is notified and the program
# exits with code 2
try:
- lock = open(tempfile.gettempdir() + '/ingest-processor.lock', 'x')
+ lock = open(tempfile.gettempdir() + '/ingest_processor.lock', 'x')
lock.close()
except FileExistsError:
print("[ingest-processor] CANCELLING... ingest-processor initiated but is already running")
@@ -23,7 +24,7 @@
# Defining function to delete the lock on script exit
def removeLock():
- os.remove(tempfile.gettempdir() + '/ingest-processor.lock')
+ os.remove(tempfile.gettempdir() + '/ingest_processor.lock')
# Will automatically run when the script exits
atexit.register(removeLock)
@@ -46,18 +47,20 @@ def __init__(self, filepath: str):
self.db = CWA_DB()
self.cwa_settings = self.db.cwa_settings
- self.auto_convert_on = self.db.cwa_settings['auto_convert']
- self.target_format = self.db.cwa_settings['auto_convert_target_format']
- self.ingest_ignored_formats = self.db.cwa_settings['auto_ingest_ignored_formats']
- self.convert_ignored_formats = self.db.cwa_settings['auto_convert_ignored_formats']
+ self.auto_convert_on = self.cwa_settings['auto_convert']
+ self.target_format = self.cwa_settings['auto_convert_target_format']
+ self.ingest_ignored_formats = self.cwa_settings['auto_ingest_ignored_formats']
+ self.convert_ignored_formats = self.cwa_settings['auto_convert_ignored_formats']
+ self.kindle_epub_fixer = self.cwa_settings['kindle_epub_fixer']
- self.supported_book_formats = ['azw', 'azw3', 'azw4', 'cbz', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'docx', 'epub', 'fb2', 'fbz', 'html', 'htmlz', 'lit', 'lrf', 'mobi', 'odt', 'pdf', 'prc', 'pdb', 'pml', 'rb', 'rtf', 'snb', 'tcr', 'txtz', 'txt', 'kepub']
- # self.hierarchy_of_success = ['epub', 'lit', 'mobi', 'azw', 'epub', 'azw3', 'fb2', 'fbz', 'azw4', 'prc', 'odt', 'lrf', 'pdb', 'cbz', 'pml', 'rb', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'snb', 'tcr', 'pdf', 'docx', 'rtf', 'html', 'htmlz', 'txtz', 'txt']
+ self.supported_book_formats = {'azw', 'azw3', 'azw4', 'cbz', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'docx', 'epub', 'fb2', 'fbz', 'html', 'htmlz', 'lit', 'lrf', 'mobi', 'odt', 'pdf', 'prc', 'pdb', 'pml', 'rb', 'rtf', 'snb', 'tcr', 'txtz', 'txt', 'kepub'}
+ self.hierarchy_of_success = {'epub', 'lit', 'mobi', 'azw', 'epub', 'azw3', 'fb2', 'fbz', 'azw4', 'prc', 'odt', 'lrf', 'pdb', 'cbz', 'pml', 'rb', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'snb', 'tcr', 'pdf', 'docx', 'rtf', 'html', 'htmlz', 'txtz', 'txt'}
self.ingest_folder, self.library_dir, self.tmp_conversion_dir = self.get_dirs("/app/calibre-web-automated/dirs.json")
self.filepath = filepath # path of the book we're targeting
self.filename = os.path.basename(filepath)
self.is_target_format = bool(self.filepath.endswith(self.target_format))
+ self.can_convert, self.input_format = self.can_convert_check()
def get_dirs(self, dirs_json_path: str) -> tuple[str, str, str]:
@@ -72,10 +75,20 @@ def get_dirs(self, dirs_json_path: str) -> tuple[str, str, str]:
return ingest_folder, library_dir, tmp_conversion_dir
- def convert_book(self, import_format: str, end_format: str=None) -> tuple[bool, str]:
+ def can_convert_check(self) -> tuple[bool, str]:
+ """When the current filepath isn't of the target format, this function will check if the file is able to be converted to the target format,
+ returning a can_convert bool with the answer"""
+ can_convert = False
+ input_format = Path(self.filepath).suffix[1:]
+ if input_format in self.supported_book_formats:
+ can_convert = True
+ return can_convert, input_format
+
+
+ def convert_book(self, end_format: str=None) -> tuple[bool, str]:
"""Uses the following terminal command to convert the books provided using the calibre converter tool:\n\n--- ebook-convert myfile.input_format myfile.output_format\n\nAnd then saves the resulting files to the calibre-web import folder."""
print(f"\n[ingest-processor]: Starting conversion process for {self.filename}...", flush=True)
- print(f"[ingest-processor]: Converting file from {import_format} to {self.target_format} format...\n", flush=True)
+ print(f"[ingest-processor]: Converting file from {self.input_format} to {self.target_format} format...\n", flush=True)
print(f"[ingest-processor]: START_CON: Converting {self.filename}...\n", flush=True)
if end_format == None:
@@ -94,7 +107,7 @@ def convert_book(self, import_format: str, end_format: str=None) -> tuple[bool,
shutil.copy2(self.filepath, f"/config/processed_books/converted/{os.path.basename(original_filepath)}")
self.db.conversion_add_entry(original_filepath.stem,
- import_format,
+ self.input_format,
self.target_format,
str(self.cwa_settings["auto_backup_conversions"]))
@@ -106,20 +119,20 @@ def convert_book(self, import_format: str, end_format: str=None) -> tuple[bool,
return False, ""
- # Kepubify can only convert EPUBs to Kepubs.
- def convert_to_kepub(self, import_format: str) -> None:
+ # Kepubify can only convert EPUBs to Kepubs
+ def convert_to_kepub(self) -> None:
"""Kepubify is limited in that it can only convert from epub to kepub, therefore any files not already in epub need to first be converted to epub, and then to kepub"""
- if import_format == "epub":
+ if self.input_format == "epub":
print(f"[ingest-processor]: File in epub format, converting directly to kepub...", flush=True)
converted_filepath = self.filepath
- result = True
+ convert_successful = True
else:
print("\n[ingest-processor]: *** NOTICE TO USER: Kepubify is limited in that it can only convert from epubs. To get around this, CWA will automatically convert other"
"supported formats to epub using the Calibre's conversion tools & then use Kepubify to produce your desired kepubs. Obviously multi-step conversions aren't ideal"
"so if you notice issues with your converted files, bare in mind starting with epubs will ensure the best possible results***\n", flush=True)
- result, converted_filepath = self.convert_book(import_format, end_format="epub")
+ convert_successful, converted_filepath = self.convert_book(self.input_format, end_format="epub")
- if result:
+ if convert_successful:
converted_filepath = Path(converted_filepath)
target_filepath = f"{self.tmp_conversion_dir}{converted_filepath.stem}.kepub"
try:
@@ -128,7 +141,7 @@ def convert_to_kepub(self, import_format: str) -> None:
shutil.copy2(self.filepath, f"/config/processed_books/converted/{os.path.basename(converted_filepath)}")
self.db.conversion_add_entry(converted_filepath.stem,
- import_format,
+ self.input_format,
self.target_format,
str(self.cwa_settings["auto_backup_conversions"]))
@@ -139,27 +152,20 @@ def convert_to_kepub(self, import_format: str) -> None:
shutil.copy2(converted_filepath, f"/config/processed_books/failed/{os.path.basename(original_filepath)}")
return False, ""
else:
- print(f"[ingest-processor]: An error occurred when converting the original {import_format} to epub. Cancelling kepub conversion...", flush=True)
+ print(f"[ingest-processor]: An error occurred when converting the original {self.input_format} to epub. Cancelling kepub conversion...", flush=True)
return False, ""
- def can_convert_check(self) -> tuple[bool, str]:
- """When the current filepath isn't of the target format, this function will check if the file is able to be converted to the target format,
- returning a can_convert bool with the answer"""
- can_convert = False
- import_format = Path(self.filepath).suffix[1:]
- if import_format in self.supported_book_formats:
- can_convert = True
- return can_convert, import_format
-
-
def delete_current_file(self) -> None:
"""Deletes file just processed from ingest folder"""
os.remove(self.filepath) # Removes processed file
subprocess.run(["find", f"{self.ingest_folder}", "-type", "d", "-empty", "-delete"]) # Removes any now empty folders
- def add_book_to_library(self, book_path) -> None:
+ def add_book_to_library(self, book_path:str) -> None:
+ if self.target_format == "epub" and self.kindle_epub_fixer:
+ self.kindle_epub_fixer(book_path)
+
print("[ingest-processor]: Importing new book to CWA...")
import_path = Path(book_path)
import_filename = os.path.basename(book_path)
@@ -178,6 +184,13 @@ def add_book_to_library(self, book_path) -> None:
shutil.copy2(book_path, f"/config/processed_books/failed/{import_filename}")
+ def kindle_epub_fixer(self, filepath:str) -> None:
+ try:
+ EPUBFixer(filepath).process()
+ except Exception as e:
+ print(f"[ingest-processor] An error occurred while processing {os.path.basename(filepath)} with the kindle-epub-fixer. See the following error:\n{e}")
+
+
def empty_tmp_con_dir(self):
try:
files = os.listdir(self.tmp_conversion_dir)
@@ -215,28 +228,27 @@ def main(filepath=sys.argv[1]):
print(f"\n[ingest-processor]: No conversion needed for {nbp.filename}, importing now...", flush=True)
nbp.add_book_to_library(filepath)
else:
- can_convert, import_format = nbp.can_convert_check()
- if nbp.auto_convert_on and can_convert: # File can be converted to target format and Auto-Converter is on
+ if nbp.auto_convert_on and nbp.can_convert: # File can be converted to target format and Auto-Converter is on
- if import_format in nbp.convert_ignored_formats: # File could be converted & the converter is activated but the user has specified files of this format should not be converted
+ if nbp.input_format in nbp.convert_ignored_formats: # File could be converted & the converter is activated but the user has specified files of this format should not be converted
print(f"\n[ingest-processor]: {nbp.filename} not in target format but user has told CWA not to convert this format so importing the file anyway...", flush=True)
nbp.add_book_to_library(filepath)
- result = False
+ convert_successful = False
elif nbp.target_format == "kepub": # File is not in the convert ignore list and target is kepub, so we start the kepub conversion process
- result, converted_filepath = nbp.convert_to_kepub(import_format)
+ convert_successful, converted_filepath = nbp.convert_to_kepub()
else: # File is not in the convert ignore list and target is not kepub, so we start the regular conversion process
- result, converted_filepath = nbp.convert_book(import_format)
+ convert_successful, converted_filepath = nbp.convert_book()
- if result: # If previous conversion process was successful, remove tmp files and import into library
+ if convert_successful: # If previous conversion process was successful, remove tmp files and import into library
nbp.add_book_to_library(converted_filepath)
- nbp.empty_tmp_con_dir()
- elif can_convert and not nbp.auto_convert_on: # Books not in target format but Auto-Converter is off so files are imported anyway
+ elif nbp.can_convert and not nbp.auto_convert_on: # Books not in target format but Auto-Converter is off so files are imported anyway
print(f"\n[ingest-processor]: {nbp.filename} not in target format but CWA Auto-Convert is deactivated so importing the file anyway...", flush=True)
nbp.add_book_to_library(filepath)
else:
- print(f"[ingest-processor]: Cannot convert {nbp.filepath}. {import_format} is currently unsupported / is not a known ebook format.", flush=True)
+ print(f"[ingest-processor]: Cannot convert {nbp.filepath}. {nbp.input_format} is currently unsupported / is not a known ebook format.", flush=True)
+ nbp.empty_tmp_con_dir()
nbp.set_library_permissions()
nbp.delete_current_file()
del nbp # New in Version 2.0.0, should drastically reduce memory usage with large ingests
diff --git a/scripts/kindle_epub_fixer.py b/scripts/kindle_epub_fixer.py
new file mode 100644
index 0000000..b4dd98d
--- /dev/null
+++ b/scripts/kindle_epub_fixer.py
@@ -0,0 +1,115 @@
+import zipfile
+import os
+import re
+import xml.etree.ElementTree as ET
+
+### Code adapted from https://github.com/innocenat/kindle-epub-fix
+### Translated from Javascript to Python by community member tedderstar
+### & modified and integrated into CWA by CrocodileStick
+
+class EPUBFixer:
+ def __init__(self, epub_path):
+ self.epub_path = epub_path
+ self.files = {}
+ self.fixed_problems = []
+
+ def read_epub(self):
+ with zipfile.ZipFile(self.epub_path, 'r') as zip_ref:
+ for file in zip_ref.namelist():
+ ext = os.path.splitext(file)[1]
+ if ext in ['.html', '.xhtml', '.xml', '.css', '.opf', '.ncx', '.svg']:
+ self.files[file] = zip_ref.read(file).decode('utf-8')
+ else:
+ self.files[file] = zip_ref.read(file)
+
+ def fix_encoding(self):
+ encoding_declaration = ''
+ xml_declaration_pattern = re.compile(r'^<\?xml.*?\?>', re.IGNORECASE)
+
+ for filename, content in self.files.items():
+ if filename.endswith(('.html', '.xhtml')):
+ if not xml_declaration_pattern.match(content):
+ self.files[filename] = f"{encoding_declaration}\n{content}"
+ self.fixed_problems.append(f"Fixed encoding for file {filename}")
+
+ def fix_language(self):
+ allowed_languages = {# ISO 639-1
+ 'af', 'gsw', 'ar', 'eu', 'nb', 'br', 'ca', 'zh', 'kw', 'co', 'da', 'nl', 'stq', 'en', 'fi', 'fr', 'fy', 'gl',
+ 'de', 'gu', 'hi', 'is', 'ga', 'it', 'ja', 'lb', 'mr', 'ml', 'gv', 'frr', 'nb', 'nn', 'pl', 'pt', 'oc', 'rm',
+ 'sco', 'gd', 'es', 'sv', 'ta', 'cy',
+ # ISO 639-2
+ 'afr', 'ara', 'eus', 'baq', 'nob', 'bre', 'cat', 'zho', 'chi', 'cor', 'cos', 'dan', 'nld', 'dut', 'eng', 'fin',
+ 'fra', 'fre', 'fry', 'glg', 'deu', 'ger', 'guj', 'hin', 'isl', 'ice', 'gle', 'ita', 'jpn', 'ltz', 'mar', 'mal',
+ 'glv', 'nor', 'nno', 'por', 'oci', 'roh', 'gla', 'spa', 'swe', 'tam', 'cym', 'wel'}
+ opf_file = next((f for f in self.files if f.endswith('.opf')), None)
+
+ if opf_file:
+ root = ET.fromstring(self.files[opf_file])
+ lang_tag = root.find(".//{http://purl.org/dc/elements/1.1/}language")
+
+ current_lang = lang_tag.text if lang_tag is not None else 'undefined'
+
+ if current_lang not in allowed_languages:
+ new_lang = "en" # Automatically set to 'en' for unsupported languages
+
+ if lang_tag is None:
+ metadata = root.find(".//{http://www.idpf.org/2007/opf}metadata")
+ lang_tag = ET.SubElement(metadata, "{http://purl.org/dc/elements/1.1/}language")
+ lang_tag.text = new_lang
+
+ self.files[opf_file] = ET.tostring(root, encoding='unicode')
+ self.fixed_problems.append(f"Updated language from {current_lang} to {new_lang}")
+
+ def fix_stray_images(self):
+ img_tag_pattern = re.compile(r']*)>', re.IGNORECASE)
+ src_pattern = re.compile(r'src\s*=\s*[\'"].+?[\'"]', re.IGNORECASE)
+
+ for filename, content in self.files.items():
+ if filename.endswith(('.html', '.xhtml')):
+ original_content = content
+ content = re.sub(
+ img_tag_pattern,
+ lambda match: '' if not src_pattern.search(match.group(1)) else match.group(0),
+ content
+ )
+
+ if content != original_content:
+ self.files[filename] = content
+ self.fixed_problems.append(f"Removed stray images in {filename}")
+
+ def write_epub(self):
+ with zipfile.ZipFile(self.epub_path, 'w') as zip_out:
+ for filename, content in self.files.items():
+ if isinstance(content, str):
+ zip_out.writestr(filename, content.encode('utf-8'))
+ else:
+ zip_out.writestr(filename, content)
+
+ def process(self):
+ self.read_epub()
+ self.fix_encoding()
+ self.fix_language()
+ self.fix_stray_images()
+ self.write_epub()
+ print("[cwa-kindle-epub-fixer] Processing completed.")
+ if self.fixed_problems:
+ print("[cwa-kindle-epub-fixer] \n".join(self.fixed_problems))
+ else:
+ print("[cwa-kindle-epub-fixer] No issues found!")
+
+
+if __name__ == "__main__":
+ import sys
+
+ if len(sys.argv) != 2:
+ print("[cwa-kindle-epub-fixer] Usage: python epub_fixer.py ")
+ sys.exit(1)
+
+ input_file = sys.argv[1]
+
+ # Check if the input file is an EPUB file
+ if not input_file.lower().endswith('.epub'):
+ print("[cwa-kindle-epub-fixer] Error: The input file must be an EPUB file with a .epub extension.")
+ sys.exit(1)
+
+ EPUBFixer(input_file).process()
\ No newline at end of file
diff --git a/scripts/setup-cwa.sh b/scripts/setup-cwa.sh
index 35215af..ccb2f0c 100644
--- a/scripts/setup-cwa.sh
+++ b/scripts/setup-cwa.sh
@@ -39,7 +39,7 @@ add_aliases () {
echo "}" | cat >> ~/.bashrc
echo "convert-library () {" | cat >> ~/.bashrc
- echo ' python3 /app/calibre-web-automated/scripts/convert-library.py "$@"' | cat >> ~/.bashrc
+ echo ' python3 /app/calibre-web-automated/scripts/convert_library.py "$@"' | cat >> ~/.bashrc
echo "}" | cat >> ~/.bashrc
source ~/.bashrc