473 lines
19 KiB
Python
Executable File
473 lines
19 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Shared date utilities module for media downloaders
|
|
Provides comprehensive date extraction and timestamp updating
|
|
|
|
Features:
|
|
- Extract dates from text/titles (multiple formats)
|
|
- Extract TV show season/episode info and lookup air dates via OMDB
|
|
- Update filesystem timestamps (mtime, atime)
|
|
- Update creation time (platform-specific)
|
|
- Update EXIF metadata for images
|
|
- Update video metadata
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import platform
|
|
import subprocess
|
|
import requests
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Optional, Union, Tuple
|
|
from modules.universal_logger import get_logger
|
|
|
|
logger = get_logger('DateUtils')
|
|
|
|
|
|
class DateHandler:
|
|
"""Comprehensive date extraction and timestamp updating"""
|
|
|
|
# OMDB API key (should be set by user)
|
|
OMDB_API_KEY = None
|
|
|
|
# TV show season/episode patterns
|
|
TV_PATTERNS = [
|
|
r'S(\d{1,2})E(\d{1,2})', # S01E01
|
|
r'Season\s+(\d{1,2})\s+Episode\s+(\d{1,2})', # Season 1 Episode 1
|
|
r'(\d{1,2})x(\d{1,2})', # 1x01
|
|
r's(\d{1,2})\s*e(\d{1,2})', # s01 e01 or s01e01
|
|
]
|
|
|
|
# Year pattern for fallback
|
|
YEAR_PATTERN = r'\b(19\d{2}|20\d{2})\b'
|
|
|
|
# Date patterns for extraction from text
|
|
DATE_PATTERNS = [
|
|
# Instagram filename format: YYYYMMDD_HHMMSS (e.g., "20251027_155842")
|
|
(r'(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})', 'instagram'),
|
|
# DD.MM.YYYY or DD/MM/YYYY or DD-MM-YYYY or DD_MM_YYYY (underscore for forum titles)
|
|
(r'(\d{1,2})[\.\/\-_](\d{1,2})[\.\/\-_](\d{4})', 'dmy'),
|
|
# YYYY-MM-DD or YYYY/MM/DD or YYYY_MM_DD
|
|
(r'(\d{4})[\-\/_](\d{1,2})[\-\/_](\d{1,2})', 'ymd'),
|
|
# Month DD, YYYY (e.g., "August 15, 2025")
|
|
(r'(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2}),?\s+(\d{4})', 'mdy_name'),
|
|
# Month YYYY (e.g., "April 2025") - use first day of month
|
|
(r'(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{4})', 'my_name'),
|
|
# DD Mon YYYY (e.g., "15 Aug 2025")
|
|
(r'(\d{1,2})\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{4})', 'dmy_abbr'),
|
|
# Mon DD, YYYY (e.g., "Aug 15, 2025")
|
|
(r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{1,2}),?\s+(\d{4})', 'mdy_abbr'),
|
|
# Mon YYYY (e.g., "Apr 2025") - use first day of month
|
|
(r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{4})', 'my_abbr'),
|
|
]
|
|
|
|
MONTH_MAP = {
|
|
'January': 1, 'February': 2, 'March': 3, 'April': 4,
|
|
'May': 5, 'June': 6, 'July': 7, 'August': 8,
|
|
'September': 9, 'October': 10, 'November': 11, 'December': 12,
|
|
'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4,
|
|
'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8,
|
|
'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12
|
|
}
|
|
|
|
@classmethod
|
|
def set_omdb_api_key(cls, api_key: str):
|
|
"""Set OMDB API key for TV show lookups"""
|
|
cls.OMDB_API_KEY = api_key
|
|
|
|
@classmethod
|
|
def extract_tv_info(cls, text: str) -> Optional[Tuple[str, int, int]]:
|
|
"""
|
|
Extract TV show name, season, and episode from text
|
|
|
|
Returns:
|
|
Tuple of (show_name, season, episode) or None
|
|
"""
|
|
for pattern in cls.TV_PATTERNS:
|
|
match = re.search(pattern, text, re.IGNORECASE)
|
|
if match:
|
|
season = int(match.group(1))
|
|
episode = int(match.group(2))
|
|
|
|
# Extract show name (everything before the season/episode)
|
|
show_part = text[:match.start()].strip()
|
|
|
|
# Look for common TV show names in the text
|
|
# Common pattern: "Actor Name & Actor Name - Show Name S01E01"
|
|
if ' - ' in show_part:
|
|
# Split on dash and take the last part as show name
|
|
parts = show_part.split(' - ')
|
|
show_name = parts[-1].strip()
|
|
else:
|
|
# Clean up common separators
|
|
show_name = re.sub(r'[-_.]', ' ', show_part)
|
|
show_name = re.sub(r'\s+', ' ', show_name).strip()
|
|
|
|
# Remove trailing "Season" or similar words
|
|
show_name = re.sub(r'\s+(Season|Series|S)\s*$', '', show_name, re.IGNORECASE)
|
|
|
|
if show_name:
|
|
return (show_name, season, episode)
|
|
return None
|
|
|
|
@classmethod
|
|
def lookup_tv_episode_date(cls, show_name: str, season: int, episode: int) -> Optional[datetime]:
|
|
"""
|
|
Lookup TV episode air date using OMDB API
|
|
|
|
Args:
|
|
show_name: Name of the TV show
|
|
season: Season number
|
|
episode: Episode number
|
|
|
|
Returns:
|
|
Air date of the episode or None
|
|
"""
|
|
if not cls.OMDB_API_KEY:
|
|
logger.debug("OMDB API key not set")
|
|
return None
|
|
|
|
try:
|
|
# First, search for the show
|
|
search_url = "http://www.omdbapi.com/"
|
|
params = {
|
|
'apikey': cls.OMDB_API_KEY,
|
|
't': show_name,
|
|
'type': 'series'
|
|
}
|
|
|
|
response = requests.get(search_url, params=params, timeout=5)
|
|
if response.status_code != 200:
|
|
return None
|
|
|
|
show_data = response.json()
|
|
if show_data.get('Response') != 'True':
|
|
return None
|
|
|
|
# Get the IMDB ID
|
|
imdb_id = show_data.get('imdbID')
|
|
if not imdb_id:
|
|
return None
|
|
|
|
# Now get the specific episode
|
|
episode_params = {
|
|
'apikey': cls.OMDB_API_KEY,
|
|
'i': imdb_id,
|
|
'Season': season,
|
|
'Episode': episode
|
|
}
|
|
|
|
episode_response = requests.get(search_url, params=episode_params, timeout=5)
|
|
if episode_response.status_code != 200:
|
|
return None
|
|
|
|
episode_data = episode_response.json()
|
|
if episode_data.get('Response') != 'True':
|
|
return None
|
|
|
|
# Parse the release date
|
|
release_date = episode_data.get('Released')
|
|
if release_date and release_date != 'N/A':
|
|
# Try different date formats
|
|
for fmt in ['%d %b %Y', '%Y-%m-%d', '%d %B %Y']:
|
|
try:
|
|
return datetime.strptime(release_date, fmt)
|
|
except ValueError:
|
|
continue
|
|
|
|
except Exception as e:
|
|
logger.debug(f"OMDB lookup failed: {e}")
|
|
|
|
return None
|
|
|
|
@classmethod
|
|
def extract_date_from_text(cls, text: str, fallback_date: Optional[datetime] = None, use_omdb: bool = True) -> Optional[datetime]:
|
|
"""
|
|
Extract date from text using multiple format patterns
|
|
|
|
Args:
|
|
text: Text to search for dates (e.g., post title, caption)
|
|
fallback_date: Date to use if no date found in text
|
|
use_omdb: Whether to try OMDB lookup for TV shows
|
|
|
|
Returns:
|
|
Extracted datetime or fallback_date if no date found
|
|
"""
|
|
if not text:
|
|
return fallback_date
|
|
|
|
# First, try TV show lookup if enabled
|
|
if use_omdb:
|
|
tv_info = cls.extract_tv_info(text)
|
|
if tv_info:
|
|
show_name, season, episode = tv_info
|
|
tv_date = cls.lookup_tv_episode_date(show_name, season, episode)
|
|
if tv_date:
|
|
logger.info(f"Found TV episode date via OMDB: {show_name} S{season:02d}E{episode:02d} -> {tv_date}")
|
|
return tv_date
|
|
|
|
# Try standard date patterns
|
|
for pattern, format_type in cls.DATE_PATTERNS:
|
|
match = re.search(pattern, text, re.IGNORECASE)
|
|
if match:
|
|
try:
|
|
if format_type == 'instagram':
|
|
# Instagram format: YYYYMMDD_HHMMSS
|
|
year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
|
|
hour, minute, second = int(match.group(4)), int(match.group(5)), int(match.group(6))
|
|
return datetime(year, month, day, hour, minute, second)
|
|
|
|
elif format_type == 'dmy':
|
|
day, month, year = int(match.group(1)), int(match.group(2)), int(match.group(3))
|
|
# Handle ambiguous dates (could be DD/MM or MM/DD)
|
|
if '.' in text[match.start():match.end()]:
|
|
# European format with dots: DD.MM.YYYY
|
|
return datetime(year, month, day)
|
|
elif day <= 12 and month <= 12:
|
|
# Ambiguous, assume MM/DD/YYYY for US format
|
|
return datetime(year, day, month)
|
|
else:
|
|
# Clear from values which is day/month
|
|
if day > 12:
|
|
return datetime(year, month, day)
|
|
else:
|
|
return datetime(year, day, month)
|
|
|
|
elif format_type == 'ymd':
|
|
year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
|
|
return datetime(year, month, day)
|
|
|
|
elif format_type == 'mdy_name':
|
|
month_str, day, year = match.group(1), int(match.group(2)), int(match.group(3))
|
|
month = cls.MONTH_MAP.get(month_str, 0)
|
|
if month:
|
|
return datetime(year, month, day)
|
|
|
|
elif format_type == 'my_name':
|
|
# Month YYYY (no day) - use first day of month
|
|
month_str, year = match.group(1), int(match.group(2))
|
|
month = cls.MONTH_MAP.get(month_str, 0)
|
|
if month:
|
|
return datetime(year, month, 1)
|
|
|
|
elif format_type == 'dmy_abbr':
|
|
day, month_str, year = int(match.group(1)), match.group(2), int(match.group(3))
|
|
month = cls.MONTH_MAP.get(month_str, 0)
|
|
if month:
|
|
return datetime(year, month, day)
|
|
|
|
elif format_type == 'mdy_abbr':
|
|
month_str, day, year = match.group(1), int(match.group(2)), int(match.group(3))
|
|
month = cls.MONTH_MAP.get(month_str, 0)
|
|
if month:
|
|
return datetime(year, month, day)
|
|
|
|
elif format_type == 'my_abbr':
|
|
# Mon YYYY (no day) - use first day of month
|
|
month_str, year = match.group(1), int(match.group(2))
|
|
month = cls.MONTH_MAP.get(month_str, 0)
|
|
if month:
|
|
return datetime(year, month, 1)
|
|
|
|
except (ValueError, IndexError) as e:
|
|
logger.debug(f"Failed to parse date from pattern {pattern}: {e}")
|
|
continue
|
|
|
|
# Don't use year-only as fallback - it's too unreliable
|
|
# Examples: "Moments of 2025" shouldn't default to Jan 1, 2025
|
|
# Instead, use the actual post date from the forum
|
|
return fallback_date
|
|
|
|
@classmethod
|
|
def update_file_timestamps(cls, filepath: Union[str, Path], date: datetime) -> bool:
|
|
"""
|
|
Update all timestamps for a file: filesystem, creation time, and EXIF data
|
|
|
|
Args:
|
|
filepath: Path to the file to update
|
|
date: DateTime to set
|
|
|
|
Returns:
|
|
True if successful, False otherwise
|
|
"""
|
|
filepath = Path(filepath)
|
|
if not filepath.exists():
|
|
logger.error(f"File not found: {filepath}")
|
|
return False
|
|
|
|
if not date:
|
|
logger.warning(f"No date provided for {filepath}")
|
|
return False
|
|
|
|
success = True
|
|
|
|
# 1. Update EXIF data for images FIRST (this modifies the file)
|
|
if filepath.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tiff', '.bmp', '.gif']:
|
|
try:
|
|
cls._update_exif_data(filepath, date)
|
|
except Exception as e:
|
|
logger.debug(f"Failed to update EXIF data: {e}")
|
|
# Don't mark as failure since not all images support EXIF
|
|
|
|
# 2. Update video metadata SECOND (this also modifies the file)
|
|
if filepath.suffix.lower() in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v']:
|
|
try:
|
|
cls._update_video_metadata(filepath, date)
|
|
except Exception as e:
|
|
logger.debug(f"Failed to update video metadata: {e}")
|
|
# Don't mark as failure since this requires ffmpeg
|
|
|
|
# 3. Update creation time (platform-specific)
|
|
try:
|
|
if platform.system() == 'Darwin': # macOS
|
|
cls._update_macos_creation_time(filepath, date)
|
|
elif platform.system() == 'Windows':
|
|
cls._update_windows_creation_time(filepath, date)
|
|
# Linux doesn't have a reliable way to set creation time
|
|
except Exception as e:
|
|
logger.debug(f"Failed to update creation time: {e}")
|
|
# Don't mark as failure since this is platform-specific
|
|
|
|
# 4. Update filesystem timestamps LAST (mtime and atime)
|
|
# This must be last because EXIF/video updates modify the file and change mtime
|
|
try:
|
|
timestamp = date.timestamp()
|
|
os.utime(filepath, (timestamp, timestamp))
|
|
logger.debug(f"Updated filesystem timestamps for {filepath}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to update filesystem timestamps: {e}")
|
|
success = False
|
|
|
|
return success
|
|
|
|
@classmethod
|
|
def _update_macos_creation_time(cls, filepath: Path, date: datetime):
|
|
"""Update creation time on macOS using SetFile"""
|
|
date_str = date.strftime("%m/%d/%Y %H:%M:%S")
|
|
try:
|
|
result = subprocess.run(
|
|
['SetFile', '-d', date_str, str(filepath)],
|
|
capture_output=True,
|
|
text=True,
|
|
check=False
|
|
)
|
|
if result.returncode == 0:
|
|
logger.debug(f"Updated macOS creation time for {filepath}")
|
|
else:
|
|
logger.debug(f"SetFile failed: {result.stderr}")
|
|
except FileNotFoundError:
|
|
logger.debug("SetFile not found (Xcode Command Line Tools not installed)")
|
|
|
|
@classmethod
|
|
def _update_windows_creation_time(cls, filepath: Path, date: datetime):
|
|
"""Update creation time on Windows using PowerShell"""
|
|
date_str = date.strftime("%Y-%m-%d %H:%M:%S")
|
|
ps_command = f'''
|
|
$file = Get-Item "{filepath}"
|
|
$file.CreationTime = "{date_str}"
|
|
'''
|
|
try:
|
|
result = subprocess.run(
|
|
['powershell', '-Command', ps_command],
|
|
capture_output=True,
|
|
text=True,
|
|
check=False
|
|
)
|
|
if result.returncode == 0:
|
|
logger.debug(f"Updated Windows creation time for {filepath}")
|
|
except FileNotFoundError:
|
|
logger.debug("PowerShell not available")
|
|
|
|
@classmethod
|
|
def _update_exif_data(cls, filepath: Path, date: datetime):
|
|
"""Update EXIF metadata using exiftool
|
|
|
|
Sets all date fields comprehensively to ensure consistent timestamps
|
|
across all metadata readers (including Immich):
|
|
- AllDates (DateTimeOriginal, CreateDate, ModifyDate)
|
|
- MetadataDate (used by some photo managers)
|
|
- FileModifyDate (filesystem modification time)
|
|
- Clears HistoryWhen to avoid conflicting timestamps
|
|
"""
|
|
date_str = date.strftime("%Y:%m:%d %H:%M:%S")
|
|
try:
|
|
result = subprocess.run([
|
|
'exiftool',
|
|
'-overwrite_original',
|
|
f'-AllDates={date_str}',
|
|
f'-MetadataDate={date_str}',
|
|
'-HistoryWhen=',
|
|
f'-FileModifyDate={date_str}',
|
|
str(filepath)
|
|
], capture_output=True, text=True, check=False)
|
|
|
|
if result.returncode == 0:
|
|
logger.debug(f"Updated EXIF data for {filepath}")
|
|
else:
|
|
logger.debug(f"exiftool failed: {result.stderr}")
|
|
except FileNotFoundError:
|
|
logger.debug("exiftool not found")
|
|
|
|
@classmethod
|
|
def _update_video_metadata(cls, filepath: Path, date: datetime):
|
|
"""Update video metadata using ffmpeg"""
|
|
date_str = date.strftime("%Y-%m-%d %H:%M:%S")
|
|
temp_file = filepath.with_suffix('.tmp' + filepath.suffix)
|
|
|
|
try:
|
|
result = subprocess.run([
|
|
'ffmpeg', '-i', str(filepath),
|
|
'-c', 'copy',
|
|
'-metadata', f'creation_time={date_str}',
|
|
'-y', str(temp_file)
|
|
], capture_output=True, text=True, check=False)
|
|
|
|
if result.returncode == 0 and temp_file.exists():
|
|
# Replace original with updated file
|
|
temp_file.replace(filepath)
|
|
logger.debug(f"Updated video metadata for {filepath}")
|
|
else:
|
|
if temp_file.exists():
|
|
temp_file.unlink()
|
|
logger.debug(f"ffmpeg failed: {result.stderr}")
|
|
except FileNotFoundError:
|
|
logger.debug("ffmpeg not found")
|
|
except Exception as e:
|
|
if temp_file.exists():
|
|
temp_file.unlink()
|
|
logger.debug(f"Video metadata update failed: {e}")
|
|
|
|
|
|
# Convenience functions for direct use
|
|
def extract_date(text: str, fallback: Optional[datetime] = None) -> Optional[datetime]:
|
|
"""Extract date from text"""
|
|
return DateHandler.extract_date_from_text(text, fallback)
|
|
|
|
|
|
def update_timestamps(filepath: Union[str, Path], date: datetime) -> bool:
|
|
"""Update all timestamps for a file"""
|
|
return DateHandler.update_file_timestamps(filepath, date)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Test examples
|
|
test_texts = [
|
|
"Eva Longoria - 15.08.2025 Event Photos",
|
|
"Photos from 08/15/2025",
|
|
"August 15, 2025 - Red Carpet",
|
|
"15 Aug 2025 Photoshoot",
|
|
"Event 2025-08-15",
|
|
]
|
|
|
|
print("Date extraction tests:")
|
|
for text in test_texts:
|
|
extracted = extract_date(text)
|
|
print(f" '{text}' -> {extracted}")
|
|
|
|
# Test file timestamp update
|
|
test_file = Path("test_image.jpg")
|
|
if test_file.exists():
|
|
test_date = datetime(2025, 8, 15, 18, 30, 0)
|
|
if update_timestamps(test_file, test_date):
|
|
print(f"\nSuccessfully updated timestamps for {test_file}") |