From cfb942178c97e09d9cb98525c951d0b79616da21 Mon Sep 17 00:00:00 2001 From: undaunt <31376520+undaunt@users.noreply.github.com> Date: Tue, 26 Nov 2024 11:54:13 -0800 Subject: [PATCH 1/8] Allow ISO 8601, RFC 2822, or unknown date formats Allows different formats (ISO 8601, RFC 2822) for recording date, date range, and date range for folder naming without throwing a typescript error on a non-match --- bulldozer | 0 classes/file_analyzer.py | 104 +++++++++++++++++++++++++++++--------- classes/file_organizer.py | 75 +++++++++++++++++++-------- 3 files changed, 136 insertions(+), 43 deletions(-) mode change 100755 => 100644 bulldozer diff --git a/bulldozer b/bulldozer old mode 100755 new mode 100644 diff --git a/classes/file_analyzer.py b/classes/file_analyzer.py index 29c6875..0cd680f 100644 --- a/classes/file_analyzer.py +++ b/classes/file_analyzer.py @@ -1,6 +1,9 @@ # file_analyzer.py import mutagen +import re from collections import defaultdict +from datetime import datetime +from email.utils import parsedate_to_datetime from mutagen.mp3 import MP3 from mutagen.mp4 import MP4 from mutagen.mp3 import BitrateMode @@ -93,8 +96,6 @@ def analyze_audio_file(self, file_path, trailer_patterns): def get_date_range(self): """ Get the date range of the audio files. - - :return: The date range as a tuple of the earliest and latest dates. """ self.file_dates = {k: v for k, v in self.file_dates.items() if v} self.earliest_year = None @@ -104,43 +105,100 @@ def get_date_range(self): self.real_last_episode_date = None for date_str in self.file_dates.keys(): - year = int(str(date_str)[:4]) - if self.earliest_year is None or (year and year < self.earliest_year): - self.earliest_year = year - if self.first_episode_date is None or date_str < self.first_episode_date: - self.real_first_episode_date = self.first_episode_date = date_str - if self.last_episode_date is None or date_str > self.last_episode_date: - self.real_last_episode_date = self.last_episode_date = date_str + if date_str != "Unknown": + try: + year = int(str(date_str)[:4]) + except ValueError: + log(f"Invalid date string '{date_str}' encountered.", "warning") + continue + + if self.earliest_year is None or (year and year < self.earliest_year): + self.earliest_year = year + if self.first_episode_date is None or date_str < self.first_episode_date: + self.real_first_episode_date = self.first_episode_date = date_str + if self.last_episode_date is None or date_str > self.last_episode_date: + self.real_last_episode_date = self.last_episode_date = date_str + else: + # Handle files with unknown dates separately if needed + log(f"Encountered file with unknown date.", "warning") + continue + # If we have original files (from previous runs), update real first and last dates if self.original_files: for date_str in self.original_files.keys(): - year = int(str(date_str)[:4]) - if self.real_first_episode_date is None or (date_str and date_str < self.real_first_episode_date): - self.real_first_episode_date = date_str - if self.real_last_episode_date is None or (date_str and date_str > self.real_last_episode_date): - self.real_last_episode_date = date_str + if date_str != "Unknown": + if self.real_first_episode_date is None or (date_str and date_str < self.real_first_episode_date): + self.real_first_episode_date = date_str + if self.real_last_episode_date is None or (date_str and date_str > self.real_last_episode_date): + self.real_last_episode_date = date_str def process_metadata(self, metadata, file_path): """ Process the metadata of an audio file. - + :param metadata: The metadata of the audio file. :param file_path: The path to the audio file. """ recording_date = metadata.get('recording_date') + date_str = "Unknown" + year = None + if recording_date: - year = int(str(recording_date)[:4]) - date_str = str(recording_date) + date_str_raw = str(recording_date) + parsed = False + + # Try ISO 8601 format first + try: + date_obj = datetime.strptime(date_str_raw, '%Y-%m-%d') + parsed = True + except ValueError: + pass + + # If ISO 8601 parsing fails, try RFC 2822 format + if not parsed: + try: + date_obj = parsedate_to_datetime(date_str_raw) + parsed = True + except (TypeError, ValueError, IndexError): + pass + + # If parsing succeeds, extract the year and formatted date string + if parsed: + year = date_obj.year + date_str = date_obj.strftime('%Y-%m-%d') + else: + log(f"Invalid recording date format for file {file_path}: '{date_str_raw}'", "warning") + date_str = "Unknown" else: - log(f"Failed to get recording date for: {file_path}", "error") - year = None - date_str = "Unknown" + # Try to extract date from file name + date_pattern = re.compile(r'\b(\d{4}-\d{2}-\d{2})\b') + match = date_pattern.search(file_path.name) + if match: + date_str = match.group(1) + try: + date_obj = datetime.strptime(date_str, '%Y-%m-%d') + year = date_obj.year + except ValueError: + log(f"Invalid date in file name for file {file_path}: '{date_str}'", "warning") + date_str = "Unknown" + else: + # Use file modification date as a last resort + try: + timestamp = file_path.stat().st_mtime + date_obj = datetime.fromtimestamp(timestamp) + date_str = date_obj.strftime('%Y-%m-%d') + year = date_obj.year + log(f"Using file modification date for {file_path}: '{date_str}'", "info") + except Exception as e: + log(f"Failed to get file modification date for {file_path}: {e}", "error") + date_str = "Unknown" + # Continue processing even if date is unknown self.file_dates[date_str].append(file_path) - bitrate = metadata['bitrate'] - bitrate_mode = metadata['bitrate_mode'] - bitrate_str = "VBR" if "vbr" in bitrate_mode.lower() else f"{bitrate} kbps" + bitrate = metadata.get('bitrate', None) + bitrate_mode = metadata.get('bitrate_mode', 'Unknown') + bitrate_str = "VBR" if "vbr" in bitrate_mode.lower() else f"{bitrate} kbps" if bitrate else "Unknown" self.bitrates[bitrate_str].append(file_path) file_format = file_path.suffix.lower()[1:] diff --git a/classes/file_organizer.py b/classes/file_organizer.py index 108a845..18cd10e 100644 --- a/classes/file_organizer.py +++ b/classes/file_organizer.py @@ -390,40 +390,75 @@ def organize_files(self): def rename_folder(self): """ - Rename the podcast folder based on the podcast name and last episode date. + Rename the podcast folder based on the podcast name and date information. """ if '(' in self.podcast.folder_path.name: return + date_format_short = self.config.get('date_format_short', '%Y-%m-%d') date_format_long = self.config.get('date_format_long', '%B %d %Y') + completed_threshold_days = self.config.get('completed_threshold_days', 365) + + # Get date strings and handle None or "Unknown" start_year_str = str(self.podcast.analyzer.earliest_year) if self.podcast.analyzer.earliest_year else "Unknown" real_start_year_str = str(self.podcast.analyzer.real_first_episode_date)[:4] if self.podcast.analyzer.real_first_episode_date else "Unknown" - first_episode_date_str = format_last_date(self.podcast.analyzer.first_episode_date, date_format_long) if self.podcast.analyzer.first_episode_date else "Unknown" - last_episode_date_str = format_last_date(self.podcast.analyzer.last_episode_date, date_format_long) if self.podcast.analyzer.last_episode_date else "Unknown" - last_episode_date_dt = datetime.strptime(self.podcast.analyzer.last_episode_date, date_format_short) if self.podcast.analyzer.last_episode_date != "Unknown" else None - real_last_episode_date_dt = datetime.strptime(self.podcast.analyzer.real_last_episode_date, date_format_short) if self.podcast.analyzer.real_last_episode_date != "Unknown" else None + first_episode_date_str = format_last_date(self.podcast.analyzer.first_episode_date, date_format_long) if self.podcast.analyzer.first_episode_date and self.podcast.analyzer.first_episode_date != "Unknown" else "Unknown" + last_episode_date_str = format_last_date(self.podcast.analyzer.last_episode_date, date_format_long) if self.podcast.analyzer.last_episode_date and self.podcast.analyzer.last_episode_date != "Unknown" else "Unknown" + + # Initialize datetime objects + last_episode_date_dt = None + real_last_episode_date_dt = None + + # Safely parse last_episode_date + if self.podcast.analyzer.last_episode_date and self.podcast.analyzer.last_episode_date != "Unknown": + try: + last_episode_date_dt = datetime.strptime(self.podcast.analyzer.last_episode_date, date_format_short) + except ValueError: + log(f"Invalid last_episode_date format: {self.podcast.analyzer.last_episode_date}", "warning") + last_episode_date_dt = None + + # Safely parse real_last_episode_date + if self.podcast.analyzer.real_last_episode_date and self.podcast.analyzer.real_last_episode_date != "Unknown": + try: + real_last_episode_date_dt = datetime.strptime(self.podcast.analyzer.real_last_episode_date, date_format_short) + except ValueError: + log(f"Invalid real_last_episode_date format: {self.podcast.analyzer.real_last_episode_date}", "warning") + real_last_episode_date_dt = None + last_year_str = str(last_episode_date_dt.year) if last_episode_date_dt else "Unknown" new_folder_name = None - if real_last_episode_date_dt != last_episode_date_dt: - if ask_yes_no(f'Would you like to rename the folder to {self.podcast.name} ({start_year_str}-{last_year_str})'): - new_folder_name = f"{self.podcast.name} ({start_year_str}-{last_year_str})" - if not new_folder_name and start_year_str != real_start_year_str: - if ask_yes_no(f'Would you like to rename the folder to {self.podcast.name} ({first_episode_date_str}-{last_episode_date_str})'): - new_folder_name = f"{self.podcast.name} ({first_episode_date_str}-{last_episode_date_str})" - if not new_folder_name and last_episode_date_dt and datetime.now() - last_episode_date_dt > timedelta(days=self.config.get('completed_threshold_days', 365)): - if ask_yes_no(f'Would you like to rename the folder to {self.podcast.name} (Complete)'): - new_folder_name = f"{self.podcast.name} (Complete)" + + # Decision logic for renaming the folder + if real_last_episode_date_dt and last_episode_date_dt and real_last_episode_date_dt != last_episode_date_dt: + prompt_name = f"{self.podcast.name} ({start_year_str}-{last_year_str})" + if ask_yes_no(f'Would you like to rename the folder to {prompt_name}?'): + new_folder_name = prompt_name + + if not new_folder_name and start_year_str != real_start_year_str and first_episode_date_str != "Unknown" and last_episode_date_str != "Unknown": + prompt_name = f"{self.podcast.name} ({first_episode_date_str}-{last_episode_date_str})" + if ask_yes_no(f'Would you like to rename the folder to {prompt_name}?'): + new_folder_name = prompt_name + + if not new_folder_name and last_episode_date_dt and (datetime.now() - last_episode_date_dt > timedelta(days=completed_threshold_days)): + prompt_name = f"{self.podcast.name} (Complete)" + if ask_yes_no(f'Would you like to rename the folder to {prompt_name}?'): + new_folder_name = prompt_name self.podcast.completed = True + + if not new_folder_name and start_year_str != "Unknown" and last_episode_date_str != "Unknown": + prompt_name = f"{self.podcast.name} ({start_year_str}-{last_episode_date_str})" + if ask_yes_no(f'Would you like to rename the folder to {prompt_name}?'): + new_folder_name = prompt_name + if not new_folder_name: - if ask_yes_no(f'Would you like to rename the folder to {self.podcast.name} ({start_year_str}-{last_episode_date_str})'): - new_folder_name = f"{self.podcast.name} ({start_year_str}-{last_episode_date_str})" - if not new_folder_name: - new_folder_name = take_input(f'Enter a custom name for the folder (blank skips)') + custom_name = take_input(f'Enter a custom name for the folder (blank skips): ') + if custom_name: + new_folder_name = custom_name if new_folder_name: new_folder_path = self.podcast.folder_path.parent / new_folder_name - log(f"Renaming folder {self.podcast.folder_path} to {new_folder_path}", "debug") + log(f"Renaming folder '{self.podcast.folder_path}' to '{new_folder_path}'", "debug") self.podcast.folder_path.rename(new_folder_path) self.podcast.folder_path = new_folder_path - + return From c7e1fb84307eea63916719a6e1dadbd6eb9e27fd Mon Sep 17 00:00:00 2001 From: lewler Date: Tue, 26 Nov 2024 21:46:58 +0100 Subject: [PATCH 2/8] Refactor title handling in RSS class and add folder name fixing utility --- classes/rss.py | 7 +++---- classes/utils.py | 4 ++++ config.default.yaml | 8 ++++++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/classes/rss.py b/classes/rss.py index f9df21a..0bdeb4a 100644 --- a/classes/rss.py +++ b/classes/rss.py @@ -4,9 +4,8 @@ import shutil import xml.etree.ElementTree as ET from pathlib import Path -from titlecase import titlecase from .utils import spinner, get_metadata_directory, log, find_case_insensitive_files, copy_file, download_file -from .utils import special_capitalization, archive_metadata, ask_yes_no, announce, perform_replacements +from .utils import archive_metadata, ask_yes_no, announce, fix_folder_name class Rss: def __init__(self, podcast, source_rss_file, config, censor_rss): @@ -65,8 +64,8 @@ def extract_folder_name(self): if channel is not None: title = channel.find('title') if title is not None: - new_title = perform_replacements(title.text, self.config.get('title_replacements', [])).strip() - return titlecase(new_title, callback=lambda word, **kwargs: special_capitalization(word, self.config, None, **kwargs)) + return fix_folder_name(title.text) + return None def get_episode_count_from(self): diff --git a/classes/utils.py b/classes/utils.py index a6ed2f6..2512ea6 100644 --- a/classes/utils.py +++ b/classes/utils.py @@ -493,3 +493,7 @@ def download_file(url, target_path): return False return True + +def fix_folder_name(name): + new_name = perform_replacements(name, config.get('title_replacements', [])).strip() + return titlecase(new_name, callback=lambda word, **kwargs: special_capitalization(word, config, None, **kwargs)) \ No newline at end of file diff --git a/config.default.yaml b/config.default.yaml index 16e117b..151b2bf 100644 --- a/config.default.yaml +++ b/config.default.yaml @@ -43,6 +43,12 @@ dupecheck_url: https://tracker.com/api/torrents/filter # How the RSS feed should be censored. Can be 'delete', 'edit' rss_censor_mode: delete +# Should the script offer to move the folder +move: + active: false # Whether to move the folder + ask: false # Whether to ask before moving + path: /home/user/torrents # The path to move the folder to + ## Cache settings cache: directory: /home/user/podcasts/cache # The directory to store the cache @@ -228,6 +234,8 @@ title_replacements: replacement: '' - pattern: '\s*-\s*$' replacement: '' + - pattern: '\s_\s$' + replacement: '' # Used when splitting the title to add episode numbers title_split_pattern: '- (?=[^-]*$)' From c6506b7190696135a2f938ebcb96b71382684494 Mon Sep 17 00:00:00 2001 From: undaunt <31376520+undaunt@users.noreply.github.com> Date: Tue, 26 Nov 2024 12:48:06 -0800 Subject: [PATCH 3/8] Add additional date format support Allows more date formats and fixes TDRC mp3 extraction data --- classes/file_analyzer.py | 67 +++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/classes/file_analyzer.py b/classes/file_analyzer.py index 0cd680f..16e04d2 100644 --- a/classes/file_analyzer.py +++ b/classes/file_analyzer.py @@ -77,7 +77,11 @@ def analyze_audio_file(self, file_path, trailer_patterns): metadata = {} if isinstance(audiofile, MP3): - metadata['recording_date'] = audiofile.get("TDRC") + tdrc = audiofile.get("TDRC") + if tdrc: + metadata['recording_date'] = tdrc.text[0] # Extract the text value + else: + metadata['recording_date'] = None metadata['bitrate'] = round(audiofile.info.bitrate / 1000) metadata['bitrate_mode'] = "VBR" if audiofile.info.bitrate_mode == BitrateMode.VBR else "CBR" elif isinstance(audiofile, MP4): @@ -132,42 +136,63 @@ def get_date_range(self): if self.real_last_episode_date is None or (date_str and date_str > self.real_last_episode_date): self.real_last_episode_date = date_str - def process_metadata(self, metadata, file_path): - """ - Process the metadata of an audio file. - - :param metadata: The metadata of the audio file. - :param file_path: The path to the audio file. - """ - recording_date = metadata.get('recording_date') - date_str = "Unknown" - year = None + def process_metadata(self, metadata, file_path): + """ + Process the metadata of an audio file. + :param metadata: The metadata of the audio file. + :param file_path: The path to the audio file. + """ if recording_date: date_str_raw = str(recording_date) parsed = False + date_obj = None + + # Add debug logging + log(f"Processing recording_date: '{date_str_raw}' for file '{file_path.name}'", "debug") + + # Define possible date formats + date_formats = [ + '%Y-%m-%d', + '%Y', + '%a, %d %b %Y %H:%M:%S %z', # 'Thu, 02 Nov 2023 16:31:53 -0000' + '%a, %d %b %Y %H:%M:%S %Z', + '%d %b %Y %H:%M:%S %z', + '%d %b %Y %H:%M:%S %Z', + ] + + # Try parsing with the defined formats + for fmt in date_formats: + try: + date_obj = datetime.strptime(date_str_raw, fmt) + parsed = True + break + except ValueError: + continue - # Try ISO 8601 format first - try: - date_obj = datetime.strptime(date_str_raw, '%Y-%m-%d') - parsed = True - except ValueError: - pass + if not parsed: + try: + # Use dateutil.parser.parse for flexible parsing + from dateutil.parser import parse + date_obj = parse(date_str_raw) + parsed = True + except (ImportError, ValueError): + pass - # If ISO 8601 parsing fails, try RFC 2822 format if not parsed: try: + # Fallback to parsedate_to_datetime date_obj = parsedate_to_datetime(date_str_raw) parsed = True except (TypeError, ValueError, IndexError): pass - # If parsing succeeds, extract the year and formatted date string - if parsed: + if parsed and date_obj: year = date_obj.year date_str = date_obj.strftime('%Y-%m-%d') + log(f"Parsed date: '{date_str}'", "debug") else: - log(f"Invalid recording date format for file {file_path}: '{date_str_raw}'", "warning") + log(f"Invalid recording date format for file '{file_path.name}': '{date_str_raw}'", "warning") date_str = "Unknown" else: # Try to extract date from file name From 971214255b7434c0cfbc1e495acecec1afde514c Mon Sep 17 00:00:00 2001 From: undaunt <31376520+undaunt@users.noreply.github.com> Date: Tue, 26 Nov 2024 13:27:59 -0800 Subject: [PATCH 4/8] add extra date extraction support Scans TDC and TXXX tags to help detect proper recording dates in mp3s --- classes/file_analyzer.py | 103 ++++++++++++++++++++++++++++++++------- requirements.txt | 1 + 2 files changed, 87 insertions(+), 17 deletions(-) diff --git a/classes/file_analyzer.py b/classes/file_analyzer.py index 16e04d2..b078ada 100644 --- a/classes/file_analyzer.py +++ b/classes/file_analyzer.py @@ -1,4 +1,5 @@ # file_analyzer.py + import mutagen import re from collections import defaultdict @@ -7,6 +8,9 @@ from mutagen.mp3 import MP3 from mutagen.mp4 import MP4 from mutagen.mp3 import BitrateMode +from mutagen.easyid3 import EasyID3 +from dateutil.parser import parse # Import dateutil parser +from mutagen.id3 import TXXX # Import TXXX for accessing custom tags from .utils import spinner, log class FileAnalyzer: @@ -61,7 +65,7 @@ def analyze_files(self): def analyze_audio_file(self, file_path, trailer_patterns): """ Analyze an individual audio file and extract metadata. - + :param file_path: The path to the audio file. :return: The metadata of the audio file. """ @@ -77,13 +81,34 @@ def analyze_audio_file(self, file_path, trailer_patterns): metadata = {} if isinstance(audiofile, MP3): - tdrc = audiofile.get("TDRC") - if tdrc: - metadata['recording_date'] = tdrc.text[0] # Extract the text value - else: + # Access 'TDRC' frame directly for recording date + try: + log(f"MP3 tags for '{file_path.name}': {audiofile.tags.pprint()}", "debug") + if ('TDRC' in audiofile.tags and + audiofile.tags['TDRC'].text and + audiofile.tags['TDRC'].text[0]): + + tdrc = audiofile.tags['TDRC'] + date_value = tdrc.text[0] + # Convert date_value to string before stripping + date_str = str(date_value).strip() + + if date_str: + metadata['recording_date'] = date_str + log(f"Found 'TDRC' tag: '{date_str}' in '{file_path.name}'", "debug") + else: + # Proceed to fallback methods + metadata['recording_date'] = self.mp3_date_extract_alternatives(audiofile, file_path) + else: + # Proceed to fallback methods + metadata['recording_date'] = self.mp3_date_extract_alternatives(audiofile, file_path) + except Exception as e: + log(f"Error reading tags from '{file_path.name}': {e}", "error") metadata['recording_date'] = None + metadata['bitrate'] = round(audiofile.info.bitrate / 1000) metadata['bitrate_mode'] = "VBR" if audiofile.info.bitrate_mode == BitrateMode.VBR else "CBR" + elif isinstance(audiofile, MP4): metadata['recording_date'] = audiofile.tags.get("\xa9day", [None])[0] metadata['bitrate'] = round(audiofile.info.bitrate / 1000) @@ -91,11 +116,51 @@ def analyze_audio_file(self, file_path, trailer_patterns): else: log(f"Unsupported audio format, skipping: {file_path}", "warning") return None - + if metadata['bitrate_mode'] != "VBR": self.all_vbr = False return metadata + + def mp3_date_extract_alternatives(self, audiofile, file_path): + """ + Extract the recording date from alternative tags. + + :param audiofile: The audio file object. + :param file_path: The path to the audio file. + :return: The extracted date as a string, or None if not found. + """ + try: + easy_tags = EasyID3(file_path) + log(f"EasyID3 tags for '{file_path.name}': {easy_tags.pprint()}", "debug") + # Try to get date from different tags + date = easy_tags.get('date', [None])[0] + if not date: + date = easy_tags.get('originaldate', [None])[0] + if not date: + date = easy_tags.get('year', [None])[0] + if date: + log(f"Found date in EasyID3 tags: '{date}' in '{file_path.name}'", "debug") + return date + else: + # Look for 'releasedate' in TXXX frames + txxx_tags = audiofile.tags.getall('TXXX') + releasedate = None + for tag in txxx_tags: + log(f"TXXX tag: desc='{tag.desc}', text='{tag.text}'", "debug") + if 'releasedate' in tag.desc.lower(): + releasedate = tag.text[0].strip() + log(f"Found 'releasedate' in TXXX tags: '{releasedate}' in '{file_path.name}'", "debug") + break + if releasedate: + log(f"Set 'recording_date' to 'releasedate': '{releasedate}' for '{file_path.name}'", "debug") + return releasedate + else: + log(f"No date tag found in '{file_path.name}'", "warning") + return None + except Exception as e: + log(f"Error reading EasyID3 tags from '{file_path.name}': {e}", "error") + return None def get_date_range(self): """ @@ -136,13 +201,17 @@ def get_date_range(self): if self.real_last_episode_date is None or (date_str and date_str > self.real_last_episode_date): self.real_last_episode_date = date_str - def process_metadata(self, metadata, file_path): - """ - Process the metadata of an audio file. + def process_metadata(self, metadata, file_path): + """ + Process the metadata of an audio file. + + :param metadata: The metadata of the audio file. + :param file_path: The path to the audio file. + """ + recording_date = metadata.get('recording_date') + date_str = "Unknown" + year = None - :param metadata: The metadata of the audio file. - :param file_path: The path to the audio file. - """ if recording_date: date_str_raw = str(recording_date) parsed = False @@ -173,17 +242,17 @@ def process_metadata(self, metadata, file_path): if not parsed: try: # Use dateutil.parser.parse for flexible parsing - from dateutil.parser import parse date_obj = parse(date_str_raw) parsed = True - except (ImportError, ValueError): - pass + except (ValueError) as e: + log(f"Failed to parse date '{date_str_raw}' using dateutil: {e}", "warning") if not parsed: try: # Fallback to parsedate_to_datetime date_obj = parsedate_to_datetime(date_str_raw) - parsed = True + if date_obj is not None: + parsed = True except (TypeError, ValueError, IndexError): pass @@ -204,7 +273,7 @@ def process_metadata(self, metadata, file_path): date_obj = datetime.strptime(date_str, '%Y-%m-%d') year = date_obj.year except ValueError: - log(f"Invalid date in file name for file {file_path}: '{date_str}'", "warning") + log(f"Invalid date in file name for file '{file_path}': '{date_str}'", "warning") date_str = "Unknown" else: # Use file modification date as a last resort diff --git a/requirements.txt b/requirements.txt index ff5c2b7..8b3ba2a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ yaspin requests mutagen titlecase +python-dateutil pyyaml pillow pillow-avif-plugin From b030b54b08f050c4fae00a54f09dc7765e102370 Mon Sep 17 00:00:00 2001 From: lewler Date: Tue, 26 Nov 2024 22:28:36 +0100 Subject: [PATCH 5/8] Implement folder renaming utility and streamline RSS file renaming process --- classes/rss.py | 23 +++++------------------ classes/utils.py | 20 +++++++++++++++++++- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/classes/rss.py b/classes/rss.py index 0bdeb4a..abe7a82 100644 --- a/classes/rss.py +++ b/classes/rss.py @@ -5,7 +5,7 @@ import xml.etree.ElementTree as ET from pathlib import Path from .utils import spinner, get_metadata_directory, log, find_case_insensitive_files, copy_file, download_file -from .utils import archive_metadata, ask_yes_no, announce, fix_folder_name +from .utils import archive_metadata, ask_yes_no, announce, fix_folder_name, rename_folder class Rss: def __init__(self, podcast, source_rss_file, config, censor_rss): @@ -86,9 +86,9 @@ def rename(self): """ Rename the RSS file to the podcast name. """ - old_file_path = get_metadata_directory(self.podcast.folder_path, self.config) / f'podcast.rss' - if not old_file_path.exists(): - log(f"RSS file {old_file_path} does not exist, can't rename", "error") + old_file_path = self.get_file_path() + if not old_file_path: + log(f"RSS file {old_file_path} does not exist, can't rename", "warning") return new_file_path = get_metadata_directory(self.podcast.folder_path, self.config) / f'{self.podcast.name}.rss' log(f"Renaming RSS file from {old_file_path} to {new_file_path}", "debug") @@ -115,20 +115,7 @@ def get_metadata_rename_folder(self): exit(1) if self.podcast.name == 'unknown podcast': - new_folder_path = self.podcast.folder_path.parent / f'{self.metadata['name']}' - if new_folder_path.exists(): - spin.fail("✖") - log(f"Folder {new_folder_path} already exists", "critical") - if not ask_yes_no("Folder already exists, do you want to overwrite it?"): - announce("Exiting, cya later!", "info") - exit(1) - - shutil.rmtree(new_folder_path) - - self.podcast.folder_path.rename(new_folder_path) - log(f"Folder renamed to {new_folder_path}", "debug") - self.podcast.folder_path = new_folder_path - self.podcast.name = self.metadata['name'] + rename_folder(self.podcast, self.metadata['name'], spin) self.rename() self.metadata['total_episodes'] = self.get_episode_count_from() diff --git a/classes/utils.py b/classes/utils.py index 2512ea6..33d23a0 100644 --- a/classes/utils.py +++ b/classes/utils.py @@ -496,4 +496,22 @@ def download_file(url, target_path): def fix_folder_name(name): new_name = perform_replacements(name, config.get('title_replacements', [])).strip() - return titlecase(new_name, callback=lambda word, **kwargs: special_capitalization(word, config, None, **kwargs)) \ No newline at end of file + return titlecase(new_name, callback=lambda word, **kwargs: special_capitalization(word, config, None, **kwargs)) + +def rename_folder(podcast, name, spin=None): + new_folder_path = podcast.folder_path.parent / name + if new_folder_path.exists(): + if spin: + spin.fail("✖") + log(f"Folder {new_folder_path} already exists", "critical") + if not ask_yes_no("Folder already exists, do you want to overwrite it?"): + announce("Exiting, cya later!", "info") + exit(1) + if spin: + spin = spinner("Renaming folder") + shutil.rmtree(new_folder_path) + + podcast.folder_path.rename(new_folder_path) + log(f"Folder renamed to {new_folder_path}", "debug") + podcast.folder_path = new_folder_path + podcast.name = name \ No newline at end of file From c38912de6a778026867027583b0ea63dc7ce2826 Mon Sep 17 00:00:00 2001 From: lewler Date: Tue, 26 Nov 2024 22:44:57 +0100 Subject: [PATCH 6/8] Add folder renaming and moving functionality in main process --- bulldozer | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/bulldozer b/bulldozer index 6e08f44..0d36130 100755 --- a/bulldozer +++ b/bulldozer @@ -7,7 +7,8 @@ from pathlib import Path from classes.dupe_checker import DupeChecker from classes.podcast import Podcast from classes.torrent_creator import TorrentCreator -from classes.utils import load_config, setup_logging, announce, ask_yes_no, check_config, log +from classes.utils import load_config, setup_logging, announce, ask_yes_no, check_config, log, fix_folder_name +from classes.utils import rename_folder from classes.report import Report config = load_config() @@ -20,7 +21,7 @@ def main(input, censor_rss, report_only=False, search_term=None, download_only=F :param censor_rss: Whether to censor the RSS feed or not """ global config - print("· • —– ++ ---| Bulldozer v0.6.3 |--- ++ —– • ·") + print("· • —– ++ ---| Bulldozer v0.6.4 |--- ++ —– • ·") database_active = config.get("database", {}).get("active", True) if os.path.isdir(input): @@ -29,6 +30,11 @@ def main(input, censor_rss, report_only=False, search_term=None, download_only=F name = folder_path.name check_duplicates = not report_only podcast = Podcast(name, folder_path, config, censor_rss=censor_rss, check_duplicates=check_duplicates, search_term=search_term) + new_name = fix_folder_name(name) + if new_name != name and ask_yes_no(f"Would you like to rename the folder to {new_name}"): + rename_folder(podcast, new_name) + podcast.name = name = new_name + podcast.rss.rename() else: source_rss_file = input output_dir = config.get("output_dir", ".") @@ -86,6 +92,21 @@ def main(input, censor_rss, report_only=False, search_term=None, download_only=F tracker_source = config.get("tracker_source", None) create_torrent(podcast, announce_url, base_dir, tracker_source) podcast.db.close() + move_folder = config.get("move", {}).get("active", False) + move_folder_path = config.get("move", {}).get("path", None) + if move_folder and move_folder_path: + ask_to_move = config.get("move", {}).get("ask", True) + if ask_to_move and ask_yes_no(f"Would you like to move the folder to {move_folder_path}"): + move_folder_path = Path(move_folder_path) + if move_folder_path.exists() and move_folder_path.is_dir(): + new_folder_path = move_folder_path / folder_path.name + if new_folder_path.exists(): + announce(f"Folder {new_folder_path} already exists, skipping move", "warning") + else: + folder_path.rename(new_folder_path) + announce(f"Moved folder to {new_folder_path}", "info") + else: + announce(f"Move folder {move_folder_path} does not exist", "error") announce(f"All done, enjoy!", "celebrate") def check_files(input): From 045260be259c3170b6dc44c4b8aaff461eab84ba Mon Sep 17 00:00:00 2001 From: lewler Date: Thu, 5 Dec 2024 12:53:32 +0100 Subject: [PATCH 7/8] Update podcast split configuration and logging for clarity --- classes/file_organizer.py | 6 +++--- config.default.yaml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/classes/file_organizer.py b/classes/file_organizer.py index 108a845..f2ac56d 100644 --- a/classes/file_organizer.py +++ b/classes/file_organizer.py @@ -321,9 +321,9 @@ def check_split(self): log("Skipping split check, podcast is marked as complete", "debug") return - full_years_only = self.config.get('full_years_only', False) - if not full_years_only: - log("Skipping split check, full_years_only is false", "debug") + split = self.config.get('split', False) + if not split: + log("Skipping split check, split is false", "debug") return start_year = int(self.podcast.analyzer.earliest_year) diff --git a/config.default.yaml b/config.default.yaml index 151b2bf..4500b08 100644 --- a/config.default.yaml +++ b/config.default.yaml @@ -57,8 +57,8 @@ cache: # The cutoff for using mixed labels cutoff: .7 -# If true, uploads of active podcasts will be split on current year -full_years_only: false +# How should the podcast be split. Possible values are 'last_full_year', 'yearly', 'false' +split: last_full_year ## API Settings # The Podchaser API settings From cc18cfe13ca80eb3f635de5c490420547f24ba22 Mon Sep 17 00:00:00 2001 From: lewler Date: Thu, 5 Dec 2024 14:07:53 +0100 Subject: [PATCH 8/8] Enhance podcast folder splitting functionality with split modes for current and yearly organization --- classes/file_organizer.py | 111 +++++++++++++++++++++++++++----------- 1 file changed, 80 insertions(+), 31 deletions(-) diff --git a/classes/file_organizer.py b/classes/file_organizer.py index ed7f22a..bb6c608 100644 --- a/classes/file_organizer.py +++ b/classes/file_organizer.py @@ -1,6 +1,5 @@ # file_organizer.py import fnmatch -import mutagen import re from datetime import datetime, timedelta from pathlib import Path @@ -314,8 +313,10 @@ def check_numbering(self): def check_split(self): """ Check if the podcast is active, and if it is and spans multiple years, - split it into multiple folders -- one folder from the start up until the - last full year, and another folder for the current year. + split it into multiple folders based on the split mode. + + If split is "last_full_year", split into a "--CURRENT--" folder for the current year. + If split is "yearly", create one folder per year with the year appended inside parentheses. """ if self.podcast.completed: log("Skipping split check, podcast is marked as complete", "debug") @@ -334,40 +335,88 @@ def check_split(self): log("Skipping split check, podcast does not span multiple years", "debug") return - current_folder = self.podcast.folder_path.parent / f"{self.podcast.name} --CURRENT--" + if split == "last_full_year": + current_folder = self.podcast.folder_path.parent / f"{self.podcast.name} --CURRENT--" - if current_folder.exists(): - log(f"Current year folder '{current_folder}' already exists", "debug") - if not ask_yes_no(f"'{current_folder.name}' already exists, proceed with split anyway?"): - log("Skipping split check, user chose not to proceed - folder exists", "debug") - return - - if not current_folder.exists(): - current_folder.mkdir() + if current_folder.exists(): + log(f"Current year folder '{current_folder}' already exists", "debug") + if not ask_yes_no(f"'{current_folder.name}' already exists, proceed with split anyway?"): + log("Skipping split check, user chose not to proceed - folder exists", "debug") + return - for date, year_list in self.podcast.analyzer.file_dates.items(): - year = int(date[:4]) - if year == current_year: - for file_path in year_list[:]: - if not file_path.exists(): - log(f"File '{file_path}' does not exist", "debug") - continue - new_path = current_folder / file_path.name - if new_path.exists(): - log(f"File '{new_path}' already exists", "debug") - if not ask_yes_no(f"'{new_path.name}' already exists, overwritet?"): - log("Skipping file", "debug") + if not current_folder.exists(): + current_folder.mkdir() + + for date, year_list in self.podcast.analyzer.file_dates.items(): + year = int(date[:4]) + if year == current_year: + for file_path in year_list[:]: + if not file_path.exists(): + log(f"File '{file_path}' does not exist", "debug") continue - log("Deleting file", "debug") - new_path.unlink() + new_path = current_folder / file_path.name + if new_path.exists(): + log(f"File '{new_path}' already exists", "debug") + if not ask_yes_no(f"'{new_path.name}' already exists, overwrite?"): + log("Skipping file", "debug") + continue + log("Deleting file", "debug") + new_path.unlink() - file_path.rename(new_path) - log(f"Moved '{file_path}' to '{new_path}'", "debug") - self.podcast.analyzer.remove_file(file_path) + file_path.rename(new_path) + log(f"Moved '{file_path}' to '{new_path}'", "debug") + self.podcast.analyzer.remove_file(file_path) + + self.duplicate_metadata(current_folder) + announce(f"Podcast split into two folders, current year is in folder appended with --CURRENT--", "info") + + elif split == "yearly": + new_name = f"{self.podcast.name} ({start_year})" + new_folder_path = self.podcast.folder_path.parent / new_name + self.podcast.folder_path.rename(new_folder_path) + self.podcast.folder_path = new_folder_path + log(f"Renamed original folder to '{new_name}'", "debug") - self.duplicate_metadata(current_folder) + self.podcast.analyze_files() + + for year in range(start_year + 1, last_year + 1): + year_folder = self.podcast.folder_path.parent / f"{self.podcast.name} ({year})" + + if year_folder.exists(): + log(f"Year folder '{year_folder}' already exists", "debug") + if not ask_yes_no(f"'{year_folder.name}' already exists, proceed with split anyway?"): + log("Skipping split check, user chose not to proceed - folder exists", "debug") + continue - announce(f"Podcast split into two folders, current year is in folder appended with --CURRENT--", "info") + if not year_folder.exists(): + year_folder.mkdir() + + for date, year_list in self.podcast.analyzer.file_dates.items(): + file_year = int(date[:4]) + if file_year == year: + for file_path in year_list[:]: + if not file_path.exists(): + log(f"File '{file_path}' does not exist", "debug") + continue + new_path = year_folder / file_path.name + if new_path.exists(): + log(f"File '{new_path}' already exists", "debug") + if not ask_yes_no(f"'{new_path.name}' already exists, overwrite?"): + log("Skipping file", "debug") + continue + log("Deleting file", "debug") + new_path.unlink() + + file_path.rename(new_path) + log(f"Moved '{file_path}' to '{new_path}'", "debug") + self.podcast.analyzer.remove_file(file_path) + + announce(f"Podcast split into folder for year {start_year}", "info") + + for year in range(start_year + 1, last_year + 1): + year_folder = self.podcast.folder_path.parent / f"{self.podcast.name} ({year})" + self.duplicate_metadata(year_folder) + announce(f"Podcast split into folder for year {year}", "info") def duplicate_metadata(self, new_folder): self.podcast.metadata.duplicate(new_folder)