Initial commit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Todd
2026-03-29 22:42:55 -04:00
commit 0d7b2b1aab
389 changed files with 280296 additions and 0 deletions

473
modules/date_utils.py Executable file
View File

@@ -0,0 +1,473 @@
#!/usr/bin/env python3
"""
Shared date utilities module for media downloaders
Provides comprehensive date extraction and timestamp updating
Features:
- Extract dates from text/titles (multiple formats)
- Extract TV show season/episode info and lookup air dates via OMDB
- Update filesystem timestamps (mtime, atime)
- Update creation time (platform-specific)
- Update EXIF metadata for images
- Update video metadata
"""
import os
import re
import platform
import subprocess
import requests
from datetime import datetime
from pathlib import Path
from typing import Optional, Union, Tuple
from modules.universal_logger import get_logger
logger = get_logger('DateUtils')
class DateHandler:
"""Comprehensive date extraction and timestamp updating"""
# OMDB API key (should be set by user)
OMDB_API_KEY = None
# TV show season/episode patterns
TV_PATTERNS = [
r'S(\d{1,2})E(\d{1,2})', # S01E01
r'Season\s+(\d{1,2})\s+Episode\s+(\d{1,2})', # Season 1 Episode 1
r'(\d{1,2})x(\d{1,2})', # 1x01
r's(\d{1,2})\s*e(\d{1,2})', # s01 e01 or s01e01
]
# Year pattern for fallback
YEAR_PATTERN = r'\b(19\d{2}|20\d{2})\b'
# Date patterns for extraction from text
DATE_PATTERNS = [
# Instagram filename format: YYYYMMDD_HHMMSS (e.g., "20251027_155842")
(r'(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})', 'instagram'),
# DD.MM.YYYY or DD/MM/YYYY or DD-MM-YYYY or DD_MM_YYYY (underscore for forum titles)
(r'(\d{1,2})[\.\/\-_](\d{1,2})[\.\/\-_](\d{4})', 'dmy'),
# YYYY-MM-DD or YYYY/MM/DD or YYYY_MM_DD
(r'(\d{4})[\-\/_](\d{1,2})[\-\/_](\d{1,2})', 'ymd'),
# Month DD, YYYY (e.g., "August 15, 2025")
(r'(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2}),?\s+(\d{4})', 'mdy_name'),
# Month YYYY (e.g., "April 2025") - use first day of month
(r'(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{4})', 'my_name'),
# DD Mon YYYY (e.g., "15 Aug 2025")
(r'(\d{1,2})\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{4})', 'dmy_abbr'),
# Mon DD, YYYY (e.g., "Aug 15, 2025")
(r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{1,2}),?\s+(\d{4})', 'mdy_abbr'),
# Mon YYYY (e.g., "Apr 2025") - use first day of month
(r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{4})', 'my_abbr'),
]
MONTH_MAP = {
'January': 1, 'February': 2, 'March': 3, 'April': 4,
'May': 5, 'June': 6, 'July': 7, 'August': 8,
'September': 9, 'October': 10, 'November': 11, 'December': 12,
'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4,
'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8,
'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12
}
@classmethod
def set_omdb_api_key(cls, api_key: str):
"""Set OMDB API key for TV show lookups"""
cls.OMDB_API_KEY = api_key
@classmethod
def extract_tv_info(cls, text: str) -> Optional[Tuple[str, int, int]]:
"""
Extract TV show name, season, and episode from text
Returns:
Tuple of (show_name, season, episode) or None
"""
for pattern in cls.TV_PATTERNS:
match = re.search(pattern, text, re.IGNORECASE)
if match:
season = int(match.group(1))
episode = int(match.group(2))
# Extract show name (everything before the season/episode)
show_part = text[:match.start()].strip()
# Look for common TV show names in the text
# Common pattern: "Actor Name & Actor Name - Show Name S01E01"
if ' - ' in show_part:
# Split on dash and take the last part as show name
parts = show_part.split(' - ')
show_name = parts[-1].strip()
else:
# Clean up common separators
show_name = re.sub(r'[-_.]', ' ', show_part)
show_name = re.sub(r'\s+', ' ', show_name).strip()
# Remove trailing "Season" or similar words
show_name = re.sub(r'\s+(Season|Series|S)\s*$', '', show_name, re.IGNORECASE)
if show_name:
return (show_name, season, episode)
return None
@classmethod
def lookup_tv_episode_date(cls, show_name: str, season: int, episode: int) -> Optional[datetime]:
"""
Lookup TV episode air date using OMDB API
Args:
show_name: Name of the TV show
season: Season number
episode: Episode number
Returns:
Air date of the episode or None
"""
if not cls.OMDB_API_KEY:
logger.debug("OMDB API key not set")
return None
try:
# First, search for the show
search_url = "http://www.omdbapi.com/"
params = {
'apikey': cls.OMDB_API_KEY,
't': show_name,
'type': 'series'
}
response = requests.get(search_url, params=params, timeout=5)
if response.status_code != 200:
return None
show_data = response.json()
if show_data.get('Response') != 'True':
return None
# Get the IMDB ID
imdb_id = show_data.get('imdbID')
if not imdb_id:
return None
# Now get the specific episode
episode_params = {
'apikey': cls.OMDB_API_KEY,
'i': imdb_id,
'Season': season,
'Episode': episode
}
episode_response = requests.get(search_url, params=episode_params, timeout=5)
if episode_response.status_code != 200:
return None
episode_data = episode_response.json()
if episode_data.get('Response') != 'True':
return None
# Parse the release date
release_date = episode_data.get('Released')
if release_date and release_date != 'N/A':
# Try different date formats
for fmt in ['%d %b %Y', '%Y-%m-%d', '%d %B %Y']:
try:
return datetime.strptime(release_date, fmt)
except ValueError:
continue
except Exception as e:
logger.debug(f"OMDB lookup failed: {e}")
return None
@classmethod
def extract_date_from_text(cls, text: str, fallback_date: Optional[datetime] = None, use_omdb: bool = True) -> Optional[datetime]:
"""
Extract date from text using multiple format patterns
Args:
text: Text to search for dates (e.g., post title, caption)
fallback_date: Date to use if no date found in text
use_omdb: Whether to try OMDB lookup for TV shows
Returns:
Extracted datetime or fallback_date if no date found
"""
if not text:
return fallback_date
# First, try TV show lookup if enabled
if use_omdb:
tv_info = cls.extract_tv_info(text)
if tv_info:
show_name, season, episode = tv_info
tv_date = cls.lookup_tv_episode_date(show_name, season, episode)
if tv_date:
logger.info(f"Found TV episode date via OMDB: {show_name} S{season:02d}E{episode:02d} -> {tv_date}")
return tv_date
# Try standard date patterns
for pattern, format_type in cls.DATE_PATTERNS:
match = re.search(pattern, text, re.IGNORECASE)
if match:
try:
if format_type == 'instagram':
# Instagram format: YYYYMMDD_HHMMSS
year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
hour, minute, second = int(match.group(4)), int(match.group(5)), int(match.group(6))
return datetime(year, month, day, hour, minute, second)
elif format_type == 'dmy':
day, month, year = int(match.group(1)), int(match.group(2)), int(match.group(3))
# Handle ambiguous dates (could be DD/MM or MM/DD)
if '.' in text[match.start():match.end()]:
# European format with dots: DD.MM.YYYY
return datetime(year, month, day)
elif day <= 12 and month <= 12:
# Ambiguous, assume MM/DD/YYYY for US format
return datetime(year, day, month)
else:
# Clear from values which is day/month
if day > 12:
return datetime(year, month, day)
else:
return datetime(year, day, month)
elif format_type == 'ymd':
year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
return datetime(year, month, day)
elif format_type == 'mdy_name':
month_str, day, year = match.group(1), int(match.group(2)), int(match.group(3))
month = cls.MONTH_MAP.get(month_str, 0)
if month:
return datetime(year, month, day)
elif format_type == 'my_name':
# Month YYYY (no day) - use first day of month
month_str, year = match.group(1), int(match.group(2))
month = cls.MONTH_MAP.get(month_str, 0)
if month:
return datetime(year, month, 1)
elif format_type == 'dmy_abbr':
day, month_str, year = int(match.group(1)), match.group(2), int(match.group(3))
month = cls.MONTH_MAP.get(month_str, 0)
if month:
return datetime(year, month, day)
elif format_type == 'mdy_abbr':
month_str, day, year = match.group(1), int(match.group(2)), int(match.group(3))
month = cls.MONTH_MAP.get(month_str, 0)
if month:
return datetime(year, month, day)
elif format_type == 'my_abbr':
# Mon YYYY (no day) - use first day of month
month_str, year = match.group(1), int(match.group(2))
month = cls.MONTH_MAP.get(month_str, 0)
if month:
return datetime(year, month, 1)
except (ValueError, IndexError) as e:
logger.debug(f"Failed to parse date from pattern {pattern}: {e}")
continue
# Don't use year-only as fallback - it's too unreliable
# Examples: "Moments of 2025" shouldn't default to Jan 1, 2025
# Instead, use the actual post date from the forum
return fallback_date
@classmethod
def update_file_timestamps(cls, filepath: Union[str, Path], date: datetime) -> bool:
"""
Update all timestamps for a file: filesystem, creation time, and EXIF data
Args:
filepath: Path to the file to update
date: DateTime to set
Returns:
True if successful, False otherwise
"""
filepath = Path(filepath)
if not filepath.exists():
logger.error(f"File not found: {filepath}")
return False
if not date:
logger.warning(f"No date provided for {filepath}")
return False
success = True
# 1. Update EXIF data for images FIRST (this modifies the file)
if filepath.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tiff', '.bmp', '.gif']:
try:
cls._update_exif_data(filepath, date)
except Exception as e:
logger.debug(f"Failed to update EXIF data: {e}")
# Don't mark as failure since not all images support EXIF
# 2. Update video metadata SECOND (this also modifies the file)
if filepath.suffix.lower() in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v']:
try:
cls._update_video_metadata(filepath, date)
except Exception as e:
logger.debug(f"Failed to update video metadata: {e}")
# Don't mark as failure since this requires ffmpeg
# 3. Update creation time (platform-specific)
try:
if platform.system() == 'Darwin': # macOS
cls._update_macos_creation_time(filepath, date)
elif platform.system() == 'Windows':
cls._update_windows_creation_time(filepath, date)
# Linux doesn't have a reliable way to set creation time
except Exception as e:
logger.debug(f"Failed to update creation time: {e}")
# Don't mark as failure since this is platform-specific
# 4. Update filesystem timestamps LAST (mtime and atime)
# This must be last because EXIF/video updates modify the file and change mtime
try:
timestamp = date.timestamp()
os.utime(filepath, (timestamp, timestamp))
logger.debug(f"Updated filesystem timestamps for {filepath}")
except Exception as e:
logger.error(f"Failed to update filesystem timestamps: {e}")
success = False
return success
@classmethod
def _update_macos_creation_time(cls, filepath: Path, date: datetime):
"""Update creation time on macOS using SetFile"""
date_str = date.strftime("%m/%d/%Y %H:%M:%S")
try:
result = subprocess.run(
['SetFile', '-d', date_str, str(filepath)],
capture_output=True,
text=True,
check=False
)
if result.returncode == 0:
logger.debug(f"Updated macOS creation time for {filepath}")
else:
logger.debug(f"SetFile failed: {result.stderr}")
except FileNotFoundError:
logger.debug("SetFile not found (Xcode Command Line Tools not installed)")
@classmethod
def _update_windows_creation_time(cls, filepath: Path, date: datetime):
"""Update creation time on Windows using PowerShell"""
date_str = date.strftime("%Y-%m-%d %H:%M:%S")
ps_command = f'''
$file = Get-Item "{filepath}"
$file.CreationTime = "{date_str}"
'''
try:
result = subprocess.run(
['powershell', '-Command', ps_command],
capture_output=True,
text=True,
check=False
)
if result.returncode == 0:
logger.debug(f"Updated Windows creation time for {filepath}")
except FileNotFoundError:
logger.debug("PowerShell not available")
@classmethod
def _update_exif_data(cls, filepath: Path, date: datetime):
"""Update EXIF metadata using exiftool
Sets all date fields comprehensively to ensure consistent timestamps
across all metadata readers (including Immich):
- AllDates (DateTimeOriginal, CreateDate, ModifyDate)
- MetadataDate (used by some photo managers)
- FileModifyDate (filesystem modification time)
- Clears HistoryWhen to avoid conflicting timestamps
"""
date_str = date.strftime("%Y:%m:%d %H:%M:%S")
try:
result = subprocess.run([
'exiftool',
'-overwrite_original',
f'-AllDates={date_str}',
f'-MetadataDate={date_str}',
'-HistoryWhen=',
f'-FileModifyDate={date_str}',
str(filepath)
], capture_output=True, text=True, check=False)
if result.returncode == 0:
logger.debug(f"Updated EXIF data for {filepath}")
else:
logger.debug(f"exiftool failed: {result.stderr}")
except FileNotFoundError:
logger.debug("exiftool not found")
@classmethod
def _update_video_metadata(cls, filepath: Path, date: datetime):
"""Update video metadata using ffmpeg"""
date_str = date.strftime("%Y-%m-%d %H:%M:%S")
temp_file = filepath.with_suffix('.tmp' + filepath.suffix)
try:
result = subprocess.run([
'ffmpeg', '-i', str(filepath),
'-c', 'copy',
'-metadata', f'creation_time={date_str}',
'-y', str(temp_file)
], capture_output=True, text=True, check=False)
if result.returncode == 0 and temp_file.exists():
# Replace original with updated file
temp_file.replace(filepath)
logger.debug(f"Updated video metadata for {filepath}")
else:
if temp_file.exists():
temp_file.unlink()
logger.debug(f"ffmpeg failed: {result.stderr}")
except FileNotFoundError:
logger.debug("ffmpeg not found")
except Exception as e:
if temp_file.exists():
temp_file.unlink()
logger.debug(f"Video metadata update failed: {e}")
# Convenience functions for direct use
def extract_date(text: str, fallback: Optional[datetime] = None) -> Optional[datetime]:
"""Extract date from text"""
return DateHandler.extract_date_from_text(text, fallback)
def update_timestamps(filepath: Union[str, Path], date: datetime) -> bool:
"""Update all timestamps for a file"""
return DateHandler.update_file_timestamps(filepath, date)
if __name__ == "__main__":
# Test examples
test_texts = [
"Eva Longoria - 15.08.2025 Event Photos",
"Photos from 08/15/2025",
"August 15, 2025 - Red Carpet",
"15 Aug 2025 Photoshoot",
"Event 2025-08-15",
]
print("Date extraction tests:")
for text in test_texts:
extracted = extract_date(text)
print(f" '{text}' -> {extracted}")
# Test file timestamp update
test_file = Path("test_image.jpg")
if test_file.exists():
test_date = datetime(2025, 8, 15, 18, 30, 0)
if update_timestamps(test_file, test_date):
print(f"\nSuccessfully updated timestamps for {test_file}")