Skip to content

Commit

Permalink
Added multi-format support to convert-library:
Browse files Browse the repository at this point in the history
- Now instead of deleting the original file after conversion, the orignal is preserved alongside the original as CWA now supports multiple formats for every book
- Files in a format that the user has specifed should be excluded from conversion, are no longer converted
- Files will now be converted to the users specified target format in the CWA Settings panel, no longer just epub
  • Loading branch information
crocodilestick committed Dec 10, 2024
1 parent ed5a801 commit dffd892
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 47 deletions.
167 changes: 121 additions & 46 deletions scripts/convert-library.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,35 +52,23 @@ def removeLock():
Path(directory).mkdir(exist_ok=True)
os.system(f"chown -R abc:abc {directory}")


class LibraryConverter:
def __init__(self) -> None: #args
# self.args = args

self.supported_book_formats = ['azw', 'azw3', 'azw4', 'cbz', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'docx', 'epub', 'fb2', 'fbz', 'html', 'htmlz', 'lit', 'lrf', 'mobi', 'odt', 'pdf', 'prc', 'pdb', 'pml', 'rb', 'rtf', 'snb', 'tcr', 'txt', 'txtz']
self.hierarchy_of_success = ['lit', 'mobi', 'azw', 'azw3', 'fb2', 'fbz', 'azw4', 'prc', 'odt', 'lrf', 'pdb', 'cbz', 'pml', 'rb', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'snb', 'tcr', 'pdf', 'docx', 'rtf', 'html', 'htmlz', 'txtz', 'txt']
self.hierarchy_of_success = ['epub', 'lit', 'mobi', 'azw', 'azw3', 'fb2', 'fbz', 'azw4', 'prc', 'odt', 'lrf', 'pdb', 'cbz', 'pml', 'rb', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'snb', 'tcr', 'pdf', 'docx', 'rtf', 'html', 'htmlz', 'txtz', 'txt']

self.ingest_folder, self.library_dir, self.tmp_conversion_dir = self.get_dirs('/app/calibre-web-automated/dirs.json')
self.epubs, self.to_convert = self.get_library_books()
self.to_convert = self.get_books_to_convert()
self.current_book = 1

self.db = CWA_DB()
self.cwa_settings = self.db.cwa_settings
self.target_format = self.cwa_settings['auto_convert_target_format']
self.convert_ignored_formats = self.cwa_settings['auto_convert_ignored_formats']

def get_library_books(self):
library_files = [os.path.join(dirpath,f) for (dirpath, dirnames, filenames) in os.walk(self.library_dir) for f in filenames]
epub_files = [f for f in library_files if f.endswith('.epub')]
dupe_list = []
to_convert = []
for format in self.hierarchy_of_success:
format_files = [f for f in library_files if f.endswith(f'.{format}')]
if len(format_files) > 0:
for file in format_files:
filename, file_extension = os.path.splitext(file)
if filename not in dupe_list:
to_convert.append(file)
dupe_list.append(filename)

return epub_files, to_convert

def get_dirs(self, dirs_json_path: str) -> tuple[str, str, str]:
dirs = {}
Expand All @@ -93,13 +81,39 @@ def get_dirs(self, dirs_json_path: str) -> tuple[str, str, str]:

return ingest_folder, library_dir, tmp_conversion_dir


def get_books_to_convert(self):
library_files = [os.path.join(dirpath,f) for (dirpath, dirnames, filenames) in os.walk(self.library_dir) for f in filenames]

exclusion_list = [] # If multiple formats for a book exist, only the one with the highest success rate will be converted and the rest will be left alone
files_already_in_target_format = [f for f in library_files if f.endswith(f'.{self.target_format}')]
for file in files_already_in_target_format:
filename, file_extension = os.path.splitext(file)
exclusion_list.append(filename) # Adding books with a file already in the target format to the exclusion list

to_convert = [] # Will only contain a single filepath for each book without an existing file in the target format in the format with the highest available conversion success rate, where that filepath is allow to be converted
for format in self.hierarchy_of_success:
if format in self.convert_ignored_formats:
print_and_log(f"{format} in list of user-defined ignored formats for conversion. To change this, navigate to the CWA Settings panel from the Settings page in the Web UI.")
continue
files_in_format = [f for f in library_files if f.endswith(f'.{format}')]
if len(files_in_format) > 0:
for file in files_in_format:
filename, file_extension = os.path.splitext(file)
if filename not in exclusion_list:
to_convert.append(file)
exclusion_list.append(filename)

return to_convert


def convert_library(self):
for file in self.to_convert:
print_and_log(f"[convert-library]: ({self.current_book}/{len(self.to_convert)}) Converting {os.path.basename(file)}...")

filename = os.path.basename(file)
file_extension = Path(file).suffix

print_and_log(f"[convert-library]: ({self.current_book}/{len(self.to_convert)}) Converting {filename} from {file_extension} format to {self.target_format} format...")

try: # Get Calibre Library Book ID
book_id = (re.search(r'\(\d*\)', file).group(0))[1:-1]
except Exception as e:
Expand All @@ -114,25 +128,33 @@ def convert_library(self):
self.current_book += 1
continue

try: # Convert Book
target_filepath = f"{self.tmp_conversion_dir}{Path(file).stem}.epub"
subprocess.run(["ebook-convert", file, target_filepath], check=True)

if self.cwa_settings['auto_backup_conversions']:
shutil.copyfile(file, f"/config/processed_books/converted/{os.path.basename(file)}")

self.db.conversion_add_entry(os.path.basename(target_filepath),
Path(file).suffix,
str(self.cwa_settings["auto_backup_conversions"]))

print_and_log(f"[convert-library]: Conversion of {os.path.basename(file)} successful! Removing old version from library...")
except subprocess.CalledProcessError as e:
print_and_log(f"[convert-library]: Conversion of {os.path.basename(file)} was unsuccessful. See the following error:\n{e}")
self.current_book += 1
continue

try: # Import converted book to library
subprocess.run(["calibredb", "add", target_filepath, f"--library-path={self.library_dir}"], check=True)
if self.target_format == "kepub":
successful, target_filepath = self.convert_to_kepub(filename, file_extension)
if not successful:
print_and_log(f"[convert-library]: Conversion of {os.path.basename(file)} was unsuccessful. See the following error:\n{e}")
self.current_book += 1
continue
else:
try: # Convert Book to target format (target is not kepub)
target_filepath = f"{self.tmp_conversion_dir}{Path(file).stem}.{self.target_format}"
subprocess.run(["ebook-convert", file, target_filepath], check=True)

if self.cwa_settings['auto_backup_conversions']:
shutil.copyfile(file, f"/config/processed_books/converted/{os.path.basename(file)}")

self.db.conversion_add_entry(os.path.basename(target_filepath),
Path(file).suffix,
self.target_format,
str(self.cwa_settings["auto_backup_conversions"]))

print_and_log(f"[convert-library]: Conversion of {os.path.basename(file)} to {self.target_format} format successful!") # Removed as of V3.0.0 - Removing old version from library...
except subprocess.CalledProcessError as e:
print_and_log(f"[convert-library]: Conversion of {os.path.basename(file)} was unsuccessful. See the following error:\n{e}")
self.current_book += 1
continue

try: # Import converted book to library. As of V3.0.0, "add_format" is used instead of add
subprocess.run(["calibredb", "add_format", book_id, target_filepath, f"--library-path={self.library_dir}"], check=True)

if self.cwa_settings['auto_backup_imports']:
shutil.copyfile(target_filepath, f"/config/processed_books/imported/{os.path.basename(target_filepath)}")
Expand All @@ -147,19 +169,71 @@ def convert_library(self):
self.current_book += 1
continue

try: # Remove Book from Existing Library
subprocess.run(["calibredb", "remove", book_id, "--permanent", "--with-library", self.library_dir], check=True)
### As of Version 3.0.0, CWA will no longer remove the originals of converted files as CWA now supports multiple formats for each book ###

print_and_log(f"[convert-library]: Non-epub version of {Path(file).stem} (Book ID: {book_id}) was successfully removed from library.\nAdding converted version to library...")
except subprocess.CalledProcessError as e:
print_and_log(f"[convert-library]: Non-epub version of {Path(file).stem} couldn't be successfully removed from library. See the following error:\n{e}")
self.current_book += 1
continue
# try: # Remove Book from Existing Library
# subprocess.run(["calibredb", "remove", book_id, "--permanent", "--with-library", self.library_dir], check=True)

# print_and_log(f"[convert-library]: Non-epub version of {Path(file).stem} (Book ID: {book_id}) was successfully removed from library.\nAdding converted version to library...")
# except subprocess.CalledProcessError as e:
# print_and_log(f"[convert-library]: Non-epub version of {Path(file).stem} couldn't be successfully removed from library. See the following error:\n{e}")
# self.current_book += 1
# continue

self.set_library_permissions()
self.empty_tmp_con_dir()
self.current_book += 1
continue


def convert_to_kepub(self, filepath:str ,import_format:str) -> tuple[bool, str]:
"""Kepubify is limited in that it can only convert from epub to kepub, therefore any files not already in epub need to first be converted to epub, and then to kepub"""
if import_format == "epub":
print_and_log(f"[convert-library]: File in epub format, converting directly to kepub...")

if self.cwa_settings['auto_backup_conversions']:
shutil.copyfile(file, f"/config/processed_books/converted/{os.path.basename(filepath)}")

epub_filepath = filepath
epub_ready = True
else:
print_and_log("\n[convert-library]: *** NOTICE TO USER: Kepubify is limited in that it can only convert from epubs. To get around this, CWA will automatically convert other supported formats to epub using the Calibre's conversion tools & then use Kepubify to produce your desired kepubs. Obviously multi-step conversions aren't ideal so if you notice issues with your converted files, bare in mind starting with epubs will ensure the best possible results***\n")
try: # Convert book to epub format so it can then be converted to kepub
epub_filepath = f"{self.tmp_conversion_dir}{Path(filepath).stem}.epub"
subprocess.run(["ebook-convert", filepath, epub_filepath], check=True)

if self.cwa_settings['auto_backup_conversions']:
shutil.copyfile(file, f"/config/processed_books/converted/{os.path.basename(filepath)}")

print_and_log(f"[convert-library]: Intermediate conversion of {os.path.basename(filepath)} to epub from {import_format} successful, now converting to kepub...")
epub_ready = True
except subprocess.CalledProcessError as e:
print_and_log(f"[convert-library]: Intermediate conversion of {os.path.basename(filepath)} to epub was unsuccessful. Cancelling kepub conversion and moving on to next file. See the following error:\n{e}")
return False, ""

if epub_ready:
epub_filepath = Path(epub_filepath)
target_filepath = f"{self.tmp_conversion_dir}{epub_filepath.stem}.kepub"
try:
subprocess.run(['kepubify', '--inplace', '--calibre', '--output', self.tmp_conversion_dir, epub_filepath], check=True)
if self.cwa_settings['auto_backup_conversions']:
shutil.copy2(filepath, f"/config/processed_books/converted")

self.db.conversion_add_entry(epub_filepath.stem,
import_format,
self.target_format,
str(self.cwa_settings["auto_backup_conversions"]))

return True, target_filepath
except subprocess.CalledProcessError as e:
print_and_log(f"[convert-library]: CON_ERROR: {os.path.basename(filepath)} could not be converted to kepub due to the following error:\nEXIT/ERROR CODE: {e.returncode}\n{e.stderr}")
shutil.copy2(epub_filepath, f"/config/processed_books/failed")
return False, ""
else:
print_and_log(f"[convert-library]: An error occurred when converting the original {import_format} to epub. Cancelling kepub conversion and moving on to next file...")
return False, ""


def empty_tmp_con_dir(self):
try:
files = os.listdir(self.tmp_conversion_dir)
Expand All @@ -170,6 +244,7 @@ def empty_tmp_con_dir(self):
except OSError:
print_and_log(f"[convert-library] An error occurred while emptying {self.tmp_conversion_dir}.")


def set_library_permissions(self):
try:
subprocess.run(["chown", "-R", "abc:abc", self.library_dir], check=True)
Expand All @@ -195,7 +270,7 @@ def main():
if len(converter.to_convert) > 0:
converter.convert_library()
else:
print_and_log("[convert-library] No non-epubs found in library. Exiting now...")
print_and_log("[convert-library] No books found in library without a copy in the target format. Exiting now...")
logging.info("FIN")
sys.exit(0)

Expand Down
2 changes: 1 addition & 1 deletion scripts/ingest-processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(self, filepath: str):
self.convert_ignored_formats = self.db.cwa_settings['auto_convert_ignored_formats']

self.supported_book_formats = ['azw', 'azw3', 'azw4', 'cbz', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'docx', 'epub', 'fb2', 'fbz', 'html', 'htmlz', 'lit', 'lrf', 'mobi', 'odt', 'pdf', 'prc', 'pdb', 'pml', 'rb', 'rtf', 'snb', 'tcr', 'txtz', 'txt', 'kepub']
# self.hierarchy_of_success = ['lit', 'mobi', 'azw', 'epub', 'azw3', 'fb2', 'fbz', 'azw4', 'prc', 'odt', 'lrf', 'pdb', 'cbz', 'pml', 'rb', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'snb', 'tcr', 'pdf', 'docx', 'rtf', 'html', 'htmlz', 'txtz', 'txt']
# self.hierarchy_of_success = ['epub', 'lit', 'mobi', 'azw', 'epub', 'azw3', 'fb2', 'fbz', 'azw4', 'prc', 'odt', 'lrf', 'pdb', 'cbz', 'pml', 'rb', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'snb', 'tcr', 'pdf', 'docx', 'rtf', 'html', 'htmlz', 'txtz', 'txt']
self.ingest_folder, self.library_dir, self.tmp_conversion_dir = self.get_dirs("/app/calibre-web-automated/dirs.json")

self.filepath = filepath # path of the book we're targeting
Expand Down

0 comments on commit dffd892

Please sign in to comment.