Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v0.6.4 #35

Merged
merged 9 commits into from
Dec 5, 2024
25 changes: 23 additions & 2 deletions bulldozer
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ from pathlib import Path
from classes.dupe_checker import DupeChecker
from classes.podcast import Podcast
from classes.torrent_creator import TorrentCreator
from classes.utils import load_config, setup_logging, announce, ask_yes_no, check_config, log
from classes.utils import load_config, setup_logging, announce, ask_yes_no, check_config, log, fix_folder_name
from classes.utils import rename_folder
from classes.report import Report

config = load_config()
Expand All @@ -20,7 +21,7 @@ def main(input, censor_rss, report_only=False, search_term=None, download_only=F
:param censor_rss: Whether to censor the RSS feed or not
"""
global config
print("· • —– ++ ---| Bulldozer v0.6.3 |--- ++ —– • ·")
print("· • —– ++ ---| Bulldozer v0.6.4 |--- ++ —– • ·")
database_active = config.get("database", {}).get("active", True)

if os.path.isdir(input):
Expand All @@ -29,6 +30,11 @@ def main(input, censor_rss, report_only=False, search_term=None, download_only=F
name = folder_path.name
check_duplicates = not report_only
podcast = Podcast(name, folder_path, config, censor_rss=censor_rss, check_duplicates=check_duplicates, search_term=search_term)
new_name = fix_folder_name(name)
if new_name != name and ask_yes_no(f"Would you like to rename the folder to {new_name}"):
rename_folder(podcast, new_name)
podcast.name = name = new_name
podcast.rss.rename()
else:
source_rss_file = input
output_dir = config.get("output_dir", ".")
Expand Down Expand Up @@ -86,6 +92,21 @@ def main(input, censor_rss, report_only=False, search_term=None, download_only=F
tracker_source = config.get("tracker_source", None)
create_torrent(podcast, announce_url, base_dir, tracker_source)
podcast.db.close()
move_folder = config.get("move", {}).get("active", False)
move_folder_path = config.get("move", {}).get("path", None)
if move_folder and move_folder_path:
ask_to_move = config.get("move", {}).get("ask", True)
if ask_to_move and ask_yes_no(f"Would you like to move the folder to {move_folder_path}"):
move_folder_path = Path(move_folder_path)
if move_folder_path.exists() and move_folder_path.is_dir():
new_folder_path = move_folder_path / folder_path.name
if new_folder_path.exists():
announce(f"Folder {new_folder_path} already exists, skipping move", "warning")
else:
folder_path.rename(new_folder_path)
announce(f"Moved folder to {new_folder_path}", "info")
else:
announce(f"Move folder {move_folder_path} does not exist", "error")
announce(f"All done, enjoy!", "celebrate")

def check_files(input):
Expand Down
204 changes: 178 additions & 26 deletions classes/file_analyzer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
# file_analyzer.py

import mutagen
import re
from collections import defaultdict
from datetime import datetime
from email.utils import parsedate_to_datetime
from mutagen.mp3 import MP3
from mutagen.mp4 import MP4
from mutagen.mp3 import BitrateMode
from mutagen.easyid3 import EasyID3
from dateutil.parser import parse # Import dateutil parser
from mutagen.id3 import TXXX # Import TXXX for accessing custom tags
from .utils import spinner, log

class FileAnalyzer:
Expand Down Expand Up @@ -58,7 +65,7 @@ def analyze_files(self):
def analyze_audio_file(self, file_path, trailer_patterns):
"""
Analyze an individual audio file and extract metadata.

:param file_path: The path to the audio file.
:return: The metadata of the audio file.
"""
Expand All @@ -74,27 +81,90 @@ def analyze_audio_file(self, file_path, trailer_patterns):

metadata = {}
if isinstance(audiofile, MP3):
metadata['recording_date'] = audiofile.get("TDRC")
# Access 'TDRC' frame directly for recording date
try:
log(f"MP3 tags for '{file_path.name}': {audiofile.tags.pprint()}", "debug")
if ('TDRC' in audiofile.tags and
audiofile.tags['TDRC'].text and
audiofile.tags['TDRC'].text[0]):

tdrc = audiofile.tags['TDRC']
date_value = tdrc.text[0]
# Convert date_value to string before stripping
date_str = str(date_value).strip()

if date_str:
metadata['recording_date'] = date_str
log(f"Found 'TDRC' tag: '{date_str}' in '{file_path.name}'", "debug")
else:
# Proceed to fallback methods
metadata['recording_date'] = self.mp3_date_extract_alternatives(audiofile, file_path)
else:
# Proceed to fallback methods
metadata['recording_date'] = self.mp3_date_extract_alternatives(audiofile, file_path)
except Exception as e:
log(f"Error reading tags from '{file_path.name}': {e}", "error")
metadata['recording_date'] = None

metadata['bitrate'] = round(audiofile.info.bitrate / 1000)
metadata['bitrate_mode'] = "VBR" if audiofile.info.bitrate_mode == BitrateMode.VBR else "CBR"

elif isinstance(audiofile, MP4):
metadata['recording_date'] = audiofile.tags.get("\xa9day", [None])[0]
metadata['bitrate'] = round(audiofile.info.bitrate / 1000)
metadata['bitrate_mode'] = "CBR" if metadata['bitrate'] else "VBR"
else:
log(f"Unsupported audio format, skipping: {file_path}", "warning")
return None

if metadata['bitrate_mode'] != "VBR":
self.all_vbr = False

return metadata

def mp3_date_extract_alternatives(self, audiofile, file_path):
"""
Extract the recording date from alternative tags.

:param audiofile: The audio file object.
:param file_path: The path to the audio file.
:return: The extracted date as a string, or None if not found.
"""
try:
easy_tags = EasyID3(file_path)
log(f"EasyID3 tags for '{file_path.name}': {easy_tags.pprint()}", "debug")
# Try to get date from different tags
date = easy_tags.get('date', [None])[0]
if not date:
date = easy_tags.get('originaldate', [None])[0]
if not date:
date = easy_tags.get('year', [None])[0]
if date:
log(f"Found date in EasyID3 tags: '{date}' in '{file_path.name}'", "debug")
return date
else:
# Look for 'releasedate' in TXXX frames
txxx_tags = audiofile.tags.getall('TXXX')
releasedate = None
for tag in txxx_tags:
log(f"TXXX tag: desc='{tag.desc}', text='{tag.text}'", "debug")
if 'releasedate' in tag.desc.lower():
releasedate = tag.text[0].strip()
log(f"Found 'releasedate' in TXXX tags: '{releasedate}' in '{file_path.name}'", "debug")
break
if releasedate:
log(f"Set 'recording_date' to 'releasedate': '{releasedate}' for '{file_path.name}'", "debug")
return releasedate
else:
log(f"No date tag found in '{file_path.name}'", "warning")
return None
except Exception as e:
log(f"Error reading EasyID3 tags from '{file_path.name}': {e}", "error")
return None

def get_date_range(self):
"""
Get the date range of the audio files.

:return: The date range as a tuple of the earliest and latest dates.
"""
self.file_dates = {k: v for k, v in self.file_dates.items() if v}
self.earliest_year = None
Expand All @@ -104,43 +174,125 @@ def get_date_range(self):
self.real_last_episode_date = None

for date_str in self.file_dates.keys():
year = int(str(date_str)[:4])
if self.earliest_year is None or (year and year < self.earliest_year):
self.earliest_year = year
if self.first_episode_date is None or date_str < self.first_episode_date:
self.real_first_episode_date = self.first_episode_date = date_str
if self.last_episode_date is None or date_str > self.last_episode_date:
self.real_last_episode_date = self.last_episode_date = date_str
if date_str != "Unknown":
try:
year = int(str(date_str)[:4])
except ValueError:
log(f"Invalid date string '{date_str}' encountered.", "warning")
continue

if self.earliest_year is None or (year and year < self.earliest_year):
self.earliest_year = year
if self.first_episode_date is None or date_str < self.first_episode_date:
self.real_first_episode_date = self.first_episode_date = date_str
if self.last_episode_date is None or date_str > self.last_episode_date:
self.real_last_episode_date = self.last_episode_date = date_str
else:
# Handle files with unknown dates separately if needed
log(f"Encountered file with unknown date.", "warning")
continue

# If we have original files (from previous runs), update real first and last dates
if self.original_files:
for date_str in self.original_files.keys():
year = int(str(date_str)[:4])
if self.real_first_episode_date is None or (date_str and date_str < self.real_first_episode_date):
self.real_first_episode_date = date_str
if self.real_last_episode_date is None or (date_str and date_str > self.real_last_episode_date):
self.real_last_episode_date = date_str
if date_str != "Unknown":
if self.real_first_episode_date is None or (date_str and date_str < self.real_first_episode_date):
self.real_first_episode_date = date_str
if self.real_last_episode_date is None or (date_str and date_str > self.real_last_episode_date):
self.real_last_episode_date = date_str

def process_metadata(self, metadata, file_path):
"""
Process the metadata of an audio file.

:param metadata: The metadata of the audio file.
:param file_path: The path to the audio file.
"""
recording_date = metadata.get('recording_date')
date_str = "Unknown"
year = None

if recording_date:
year = int(str(recording_date)[:4])
date_str = str(recording_date)
date_str_raw = str(recording_date)
parsed = False
date_obj = None

# Add debug logging
log(f"Processing recording_date: '{date_str_raw}' for file '{file_path.name}'", "debug")

# Define possible date formats
date_formats = [
'%Y-%m-%d',
'%Y',
'%a, %d %b %Y %H:%M:%S %z', # 'Thu, 02 Nov 2023 16:31:53 -0000'
'%a, %d %b %Y %H:%M:%S %Z',
'%d %b %Y %H:%M:%S %z',
'%d %b %Y %H:%M:%S %Z',
]

# Try parsing with the defined formats
for fmt in date_formats:
try:
date_obj = datetime.strptime(date_str_raw, fmt)
parsed = True
break
except ValueError:
continue

if not parsed:
try:
# Use dateutil.parser.parse for flexible parsing
date_obj = parse(date_str_raw)
parsed = True
except (ValueError) as e:
log(f"Failed to parse date '{date_str_raw}' using dateutil: {e}", "warning")

if not parsed:
try:
# Fallback to parsedate_to_datetime
date_obj = parsedate_to_datetime(date_str_raw)
if date_obj is not None:
parsed = True
except (TypeError, ValueError, IndexError):
pass

if parsed and date_obj:
year = date_obj.year
date_str = date_obj.strftime('%Y-%m-%d')
log(f"Parsed date: '{date_str}'", "debug")
else:
log(f"Invalid recording date format for file '{file_path.name}': '{date_str_raw}'", "warning")
date_str = "Unknown"
else:
log(f"Failed to get recording date for: {file_path}", "error")
year = None
date_str = "Unknown"
# Try to extract date from file name
date_pattern = re.compile(r'\b(\d{4}-\d{2}-\d{2})\b')
match = date_pattern.search(file_path.name)
if match:
date_str = match.group(1)
try:
date_obj = datetime.strptime(date_str, '%Y-%m-%d')
year = date_obj.year
except ValueError:
log(f"Invalid date in file name for file '{file_path}': '{date_str}'", "warning")
date_str = "Unknown"
else:
# Use file modification date as a last resort
try:
timestamp = file_path.stat().st_mtime
date_obj = datetime.fromtimestamp(timestamp)
date_str = date_obj.strftime('%Y-%m-%d')
year = date_obj.year
log(f"Using file modification date for {file_path}: '{date_str}'", "info")
except Exception as e:
log(f"Failed to get file modification date for {file_path}: {e}", "error")
date_str = "Unknown"

# Continue processing even if date is unknown
self.file_dates[date_str].append(file_path)

bitrate = metadata['bitrate']
bitrate_mode = metadata['bitrate_mode']
bitrate_str = "VBR" if "vbr" in bitrate_mode.lower() else f"{bitrate} kbps"
bitrate = metadata.get('bitrate', None)
bitrate_mode = metadata.get('bitrate_mode', 'Unknown')
bitrate_str = "VBR" if "vbr" in bitrate_mode.lower() else f"{bitrate} kbps" if bitrate else "Unknown"
self.bitrates[bitrate_str].append(file_path)

file_format = file_path.suffix.lower()[1:]
Expand Down
Loading