473
modules/date_utils.py
Executable file
473
modules/date_utils.py
Executable file
@@ -0,0 +1,473 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Shared date utilities module for media downloaders
|
||||
Provides comprehensive date extraction and timestamp updating
|
||||
|
||||
Features:
|
||||
- Extract dates from text/titles (multiple formats)
|
||||
- Extract TV show season/episode info and lookup air dates via OMDB
|
||||
- Update filesystem timestamps (mtime, atime)
|
||||
- Update creation time (platform-specific)
|
||||
- Update EXIF metadata for images
|
||||
- Update video metadata
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import platform
|
||||
import subprocess
|
||||
import requests
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional, Union, Tuple
|
||||
from modules.universal_logger import get_logger
|
||||
|
||||
logger = get_logger('DateUtils')
|
||||
|
||||
|
||||
class DateHandler:
|
||||
"""Comprehensive date extraction and timestamp updating"""
|
||||
|
||||
# OMDB API key (should be set by user)
|
||||
OMDB_API_KEY = None
|
||||
|
||||
# TV show season/episode patterns
|
||||
TV_PATTERNS = [
|
||||
r'S(\d{1,2})E(\d{1,2})', # S01E01
|
||||
r'Season\s+(\d{1,2})\s+Episode\s+(\d{1,2})', # Season 1 Episode 1
|
||||
r'(\d{1,2})x(\d{1,2})', # 1x01
|
||||
r's(\d{1,2})\s*e(\d{1,2})', # s01 e01 or s01e01
|
||||
]
|
||||
|
||||
# Year pattern for fallback
|
||||
YEAR_PATTERN = r'\b(19\d{2}|20\d{2})\b'
|
||||
|
||||
# Date patterns for extraction from text
|
||||
DATE_PATTERNS = [
|
||||
# Instagram filename format: YYYYMMDD_HHMMSS (e.g., "20251027_155842")
|
||||
(r'(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})', 'instagram'),
|
||||
# DD.MM.YYYY or DD/MM/YYYY or DD-MM-YYYY or DD_MM_YYYY (underscore for forum titles)
|
||||
(r'(\d{1,2})[\.\/\-_](\d{1,2})[\.\/\-_](\d{4})', 'dmy'),
|
||||
# YYYY-MM-DD or YYYY/MM/DD or YYYY_MM_DD
|
||||
(r'(\d{4})[\-\/_](\d{1,2})[\-\/_](\d{1,2})', 'ymd'),
|
||||
# Month DD, YYYY (e.g., "August 15, 2025")
|
||||
(r'(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2}),?\s+(\d{4})', 'mdy_name'),
|
||||
# Month YYYY (e.g., "April 2025") - use first day of month
|
||||
(r'(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{4})', 'my_name'),
|
||||
# DD Mon YYYY (e.g., "15 Aug 2025")
|
||||
(r'(\d{1,2})\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{4})', 'dmy_abbr'),
|
||||
# Mon DD, YYYY (e.g., "Aug 15, 2025")
|
||||
(r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{1,2}),?\s+(\d{4})', 'mdy_abbr'),
|
||||
# Mon YYYY (e.g., "Apr 2025") - use first day of month
|
||||
(r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{4})', 'my_abbr'),
|
||||
]
|
||||
|
||||
MONTH_MAP = {
|
||||
'January': 1, 'February': 2, 'March': 3, 'April': 4,
|
||||
'May': 5, 'June': 6, 'July': 7, 'August': 8,
|
||||
'September': 9, 'October': 10, 'November': 11, 'December': 12,
|
||||
'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4,
|
||||
'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8,
|
||||
'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def set_omdb_api_key(cls, api_key: str):
|
||||
"""Set OMDB API key for TV show lookups"""
|
||||
cls.OMDB_API_KEY = api_key
|
||||
|
||||
@classmethod
|
||||
def extract_tv_info(cls, text: str) -> Optional[Tuple[str, int, int]]:
|
||||
"""
|
||||
Extract TV show name, season, and episode from text
|
||||
|
||||
Returns:
|
||||
Tuple of (show_name, season, episode) or None
|
||||
"""
|
||||
for pattern in cls.TV_PATTERNS:
|
||||
match = re.search(pattern, text, re.IGNORECASE)
|
||||
if match:
|
||||
season = int(match.group(1))
|
||||
episode = int(match.group(2))
|
||||
|
||||
# Extract show name (everything before the season/episode)
|
||||
show_part = text[:match.start()].strip()
|
||||
|
||||
# Look for common TV show names in the text
|
||||
# Common pattern: "Actor Name & Actor Name - Show Name S01E01"
|
||||
if ' - ' in show_part:
|
||||
# Split on dash and take the last part as show name
|
||||
parts = show_part.split(' - ')
|
||||
show_name = parts[-1].strip()
|
||||
else:
|
||||
# Clean up common separators
|
||||
show_name = re.sub(r'[-_.]', ' ', show_part)
|
||||
show_name = re.sub(r'\s+', ' ', show_name).strip()
|
||||
|
||||
# Remove trailing "Season" or similar words
|
||||
show_name = re.sub(r'\s+(Season|Series|S)\s*$', '', show_name, re.IGNORECASE)
|
||||
|
||||
if show_name:
|
||||
return (show_name, season, episode)
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def lookup_tv_episode_date(cls, show_name: str, season: int, episode: int) -> Optional[datetime]:
|
||||
"""
|
||||
Lookup TV episode air date using OMDB API
|
||||
|
||||
Args:
|
||||
show_name: Name of the TV show
|
||||
season: Season number
|
||||
episode: Episode number
|
||||
|
||||
Returns:
|
||||
Air date of the episode or None
|
||||
"""
|
||||
if not cls.OMDB_API_KEY:
|
||||
logger.debug("OMDB API key not set")
|
||||
return None
|
||||
|
||||
try:
|
||||
# First, search for the show
|
||||
search_url = "http://www.omdbapi.com/"
|
||||
params = {
|
||||
'apikey': cls.OMDB_API_KEY,
|
||||
't': show_name,
|
||||
'type': 'series'
|
||||
}
|
||||
|
||||
response = requests.get(search_url, params=params, timeout=5)
|
||||
if response.status_code != 200:
|
||||
return None
|
||||
|
||||
show_data = response.json()
|
||||
if show_data.get('Response') != 'True':
|
||||
return None
|
||||
|
||||
# Get the IMDB ID
|
||||
imdb_id = show_data.get('imdbID')
|
||||
if not imdb_id:
|
||||
return None
|
||||
|
||||
# Now get the specific episode
|
||||
episode_params = {
|
||||
'apikey': cls.OMDB_API_KEY,
|
||||
'i': imdb_id,
|
||||
'Season': season,
|
||||
'Episode': episode
|
||||
}
|
||||
|
||||
episode_response = requests.get(search_url, params=episode_params, timeout=5)
|
||||
if episode_response.status_code != 200:
|
||||
return None
|
||||
|
||||
episode_data = episode_response.json()
|
||||
if episode_data.get('Response') != 'True':
|
||||
return None
|
||||
|
||||
# Parse the release date
|
||||
release_date = episode_data.get('Released')
|
||||
if release_date and release_date != 'N/A':
|
||||
# Try different date formats
|
||||
for fmt in ['%d %b %Y', '%Y-%m-%d', '%d %B %Y']:
|
||||
try:
|
||||
return datetime.strptime(release_date, fmt)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"OMDB lookup failed: {e}")
|
||||
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def extract_date_from_text(cls, text: str, fallback_date: Optional[datetime] = None, use_omdb: bool = True) -> Optional[datetime]:
|
||||
"""
|
||||
Extract date from text using multiple format patterns
|
||||
|
||||
Args:
|
||||
text: Text to search for dates (e.g., post title, caption)
|
||||
fallback_date: Date to use if no date found in text
|
||||
use_omdb: Whether to try OMDB lookup for TV shows
|
||||
|
||||
Returns:
|
||||
Extracted datetime or fallback_date if no date found
|
||||
"""
|
||||
if not text:
|
||||
return fallback_date
|
||||
|
||||
# First, try TV show lookup if enabled
|
||||
if use_omdb:
|
||||
tv_info = cls.extract_tv_info(text)
|
||||
if tv_info:
|
||||
show_name, season, episode = tv_info
|
||||
tv_date = cls.lookup_tv_episode_date(show_name, season, episode)
|
||||
if tv_date:
|
||||
logger.info(f"Found TV episode date via OMDB: {show_name} S{season:02d}E{episode:02d} -> {tv_date}")
|
||||
return tv_date
|
||||
|
||||
# Try standard date patterns
|
||||
for pattern, format_type in cls.DATE_PATTERNS:
|
||||
match = re.search(pattern, text, re.IGNORECASE)
|
||||
if match:
|
||||
try:
|
||||
if format_type == 'instagram':
|
||||
# Instagram format: YYYYMMDD_HHMMSS
|
||||
year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
|
||||
hour, minute, second = int(match.group(4)), int(match.group(5)), int(match.group(6))
|
||||
return datetime(year, month, day, hour, minute, second)
|
||||
|
||||
elif format_type == 'dmy':
|
||||
day, month, year = int(match.group(1)), int(match.group(2)), int(match.group(3))
|
||||
# Handle ambiguous dates (could be DD/MM or MM/DD)
|
||||
if '.' in text[match.start():match.end()]:
|
||||
# European format with dots: DD.MM.YYYY
|
||||
return datetime(year, month, day)
|
||||
elif day <= 12 and month <= 12:
|
||||
# Ambiguous, assume MM/DD/YYYY for US format
|
||||
return datetime(year, day, month)
|
||||
else:
|
||||
# Clear from values which is day/month
|
||||
if day > 12:
|
||||
return datetime(year, month, day)
|
||||
else:
|
||||
return datetime(year, day, month)
|
||||
|
||||
elif format_type == 'ymd':
|
||||
year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
|
||||
return datetime(year, month, day)
|
||||
|
||||
elif format_type == 'mdy_name':
|
||||
month_str, day, year = match.group(1), int(match.group(2)), int(match.group(3))
|
||||
month = cls.MONTH_MAP.get(month_str, 0)
|
||||
if month:
|
||||
return datetime(year, month, day)
|
||||
|
||||
elif format_type == 'my_name':
|
||||
# Month YYYY (no day) - use first day of month
|
||||
month_str, year = match.group(1), int(match.group(2))
|
||||
month = cls.MONTH_MAP.get(month_str, 0)
|
||||
if month:
|
||||
return datetime(year, month, 1)
|
||||
|
||||
elif format_type == 'dmy_abbr':
|
||||
day, month_str, year = int(match.group(1)), match.group(2), int(match.group(3))
|
||||
month = cls.MONTH_MAP.get(month_str, 0)
|
||||
if month:
|
||||
return datetime(year, month, day)
|
||||
|
||||
elif format_type == 'mdy_abbr':
|
||||
month_str, day, year = match.group(1), int(match.group(2)), int(match.group(3))
|
||||
month = cls.MONTH_MAP.get(month_str, 0)
|
||||
if month:
|
||||
return datetime(year, month, day)
|
||||
|
||||
elif format_type == 'my_abbr':
|
||||
# Mon YYYY (no day) - use first day of month
|
||||
month_str, year = match.group(1), int(match.group(2))
|
||||
month = cls.MONTH_MAP.get(month_str, 0)
|
||||
if month:
|
||||
return datetime(year, month, 1)
|
||||
|
||||
except (ValueError, IndexError) as e:
|
||||
logger.debug(f"Failed to parse date from pattern {pattern}: {e}")
|
||||
continue
|
||||
|
||||
# Don't use year-only as fallback - it's too unreliable
|
||||
# Examples: "Moments of 2025" shouldn't default to Jan 1, 2025
|
||||
# Instead, use the actual post date from the forum
|
||||
return fallback_date
|
||||
|
||||
@classmethod
|
||||
def update_file_timestamps(cls, filepath: Union[str, Path], date: datetime) -> bool:
|
||||
"""
|
||||
Update all timestamps for a file: filesystem, creation time, and EXIF data
|
||||
|
||||
Args:
|
||||
filepath: Path to the file to update
|
||||
date: DateTime to set
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
filepath = Path(filepath)
|
||||
if not filepath.exists():
|
||||
logger.error(f"File not found: {filepath}")
|
||||
return False
|
||||
|
||||
if not date:
|
||||
logger.warning(f"No date provided for {filepath}")
|
||||
return False
|
||||
|
||||
success = True
|
||||
|
||||
# 1. Update EXIF data for images FIRST (this modifies the file)
|
||||
if filepath.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tiff', '.bmp', '.gif']:
|
||||
try:
|
||||
cls._update_exif_data(filepath, date)
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to update EXIF data: {e}")
|
||||
# Don't mark as failure since not all images support EXIF
|
||||
|
||||
# 2. Update video metadata SECOND (this also modifies the file)
|
||||
if filepath.suffix.lower() in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v']:
|
||||
try:
|
||||
cls._update_video_metadata(filepath, date)
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to update video metadata: {e}")
|
||||
# Don't mark as failure since this requires ffmpeg
|
||||
|
||||
# 3. Update creation time (platform-specific)
|
||||
try:
|
||||
if platform.system() == 'Darwin': # macOS
|
||||
cls._update_macos_creation_time(filepath, date)
|
||||
elif platform.system() == 'Windows':
|
||||
cls._update_windows_creation_time(filepath, date)
|
||||
# Linux doesn't have a reliable way to set creation time
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to update creation time: {e}")
|
||||
# Don't mark as failure since this is platform-specific
|
||||
|
||||
# 4. Update filesystem timestamps LAST (mtime and atime)
|
||||
# This must be last because EXIF/video updates modify the file and change mtime
|
||||
try:
|
||||
timestamp = date.timestamp()
|
||||
os.utime(filepath, (timestamp, timestamp))
|
||||
logger.debug(f"Updated filesystem timestamps for {filepath}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update filesystem timestamps: {e}")
|
||||
success = False
|
||||
|
||||
return success
|
||||
|
||||
@classmethod
|
||||
def _update_macos_creation_time(cls, filepath: Path, date: datetime):
|
||||
"""Update creation time on macOS using SetFile"""
|
||||
date_str = date.strftime("%m/%d/%Y %H:%M:%S")
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['SetFile', '-d', date_str, str(filepath)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False
|
||||
)
|
||||
if result.returncode == 0:
|
||||
logger.debug(f"Updated macOS creation time for {filepath}")
|
||||
else:
|
||||
logger.debug(f"SetFile failed: {result.stderr}")
|
||||
except FileNotFoundError:
|
||||
logger.debug("SetFile not found (Xcode Command Line Tools not installed)")
|
||||
|
||||
@classmethod
|
||||
def _update_windows_creation_time(cls, filepath: Path, date: datetime):
|
||||
"""Update creation time on Windows using PowerShell"""
|
||||
date_str = date.strftime("%Y-%m-%d %H:%M:%S")
|
||||
ps_command = f'''
|
||||
$file = Get-Item "{filepath}"
|
||||
$file.CreationTime = "{date_str}"
|
||||
'''
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['powershell', '-Command', ps_command],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False
|
||||
)
|
||||
if result.returncode == 0:
|
||||
logger.debug(f"Updated Windows creation time for {filepath}")
|
||||
except FileNotFoundError:
|
||||
logger.debug("PowerShell not available")
|
||||
|
||||
@classmethod
|
||||
def _update_exif_data(cls, filepath: Path, date: datetime):
|
||||
"""Update EXIF metadata using exiftool
|
||||
|
||||
Sets all date fields comprehensively to ensure consistent timestamps
|
||||
across all metadata readers (including Immich):
|
||||
- AllDates (DateTimeOriginal, CreateDate, ModifyDate)
|
||||
- MetadataDate (used by some photo managers)
|
||||
- FileModifyDate (filesystem modification time)
|
||||
- Clears HistoryWhen to avoid conflicting timestamps
|
||||
"""
|
||||
date_str = date.strftime("%Y:%m:%d %H:%M:%S")
|
||||
try:
|
||||
result = subprocess.run([
|
||||
'exiftool',
|
||||
'-overwrite_original',
|
||||
f'-AllDates={date_str}',
|
||||
f'-MetadataDate={date_str}',
|
||||
'-HistoryWhen=',
|
||||
f'-FileModifyDate={date_str}',
|
||||
str(filepath)
|
||||
], capture_output=True, text=True, check=False)
|
||||
|
||||
if result.returncode == 0:
|
||||
logger.debug(f"Updated EXIF data for {filepath}")
|
||||
else:
|
||||
logger.debug(f"exiftool failed: {result.stderr}")
|
||||
except FileNotFoundError:
|
||||
logger.debug("exiftool not found")
|
||||
|
||||
@classmethod
|
||||
def _update_video_metadata(cls, filepath: Path, date: datetime):
|
||||
"""Update video metadata using ffmpeg"""
|
||||
date_str = date.strftime("%Y-%m-%d %H:%M:%S")
|
||||
temp_file = filepath.with_suffix('.tmp' + filepath.suffix)
|
||||
|
||||
try:
|
||||
result = subprocess.run([
|
||||
'ffmpeg', '-i', str(filepath),
|
||||
'-c', 'copy',
|
||||
'-metadata', f'creation_time={date_str}',
|
||||
'-y', str(temp_file)
|
||||
], capture_output=True, text=True, check=False)
|
||||
|
||||
if result.returncode == 0 and temp_file.exists():
|
||||
# Replace original with updated file
|
||||
temp_file.replace(filepath)
|
||||
logger.debug(f"Updated video metadata for {filepath}")
|
||||
else:
|
||||
if temp_file.exists():
|
||||
temp_file.unlink()
|
||||
logger.debug(f"ffmpeg failed: {result.stderr}")
|
||||
except FileNotFoundError:
|
||||
logger.debug("ffmpeg not found")
|
||||
except Exception as e:
|
||||
if temp_file.exists():
|
||||
temp_file.unlink()
|
||||
logger.debug(f"Video metadata update failed: {e}")
|
||||
|
||||
|
||||
# Convenience functions for direct use
|
||||
def extract_date(text: str, fallback: Optional[datetime] = None) -> Optional[datetime]:
|
||||
"""Extract date from text"""
|
||||
return DateHandler.extract_date_from_text(text, fallback)
|
||||
|
||||
|
||||
def update_timestamps(filepath: Union[str, Path], date: datetime) -> bool:
|
||||
"""Update all timestamps for a file"""
|
||||
return DateHandler.update_file_timestamps(filepath, date)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test examples
|
||||
test_texts = [
|
||||
"Eva Longoria - 15.08.2025 Event Photos",
|
||||
"Photos from 08/15/2025",
|
||||
"August 15, 2025 - Red Carpet",
|
||||
"15 Aug 2025 Photoshoot",
|
||||
"Event 2025-08-15",
|
||||
]
|
||||
|
||||
print("Date extraction tests:")
|
||||
for text in test_texts:
|
||||
extracted = extract_date(text)
|
||||
print(f" '{text}' -> {extracted}")
|
||||
|
||||
# Test file timestamp update
|
||||
test_file = Path("test_image.jpg")
|
||||
if test_file.exists():
|
||||
test_date = datetime(2025, 8, 15, 18, 30, 0)
|
||||
if update_timestamps(test_file, test_date):
|
||||
print(f"\nSuccessfully updated timestamps for {test_file}")
|
||||
Reference in New Issue
Block a user