#!/usr/bin/env python3 """ Shared date utilities module for media downloaders Provides comprehensive date extraction and timestamp updating Features: - Extract dates from text/titles (multiple formats) - Extract TV show season/episode info and lookup air dates via OMDB - Update filesystem timestamps (mtime, atime) - Update creation time (platform-specific) - Update EXIF metadata for images - Update video metadata """ import os import re import platform import subprocess import requests from datetime import datetime from pathlib import Path from typing import Optional, Union, Tuple from modules.universal_logger import get_logger logger = get_logger('DateUtils') class DateHandler: """Comprehensive date extraction and timestamp updating""" # OMDB API key (should be set by user) OMDB_API_KEY = None # TV show season/episode patterns TV_PATTERNS = [ r'S(\d{1,2})E(\d{1,2})', # S01E01 r'Season\s+(\d{1,2})\s+Episode\s+(\d{1,2})', # Season 1 Episode 1 r'(\d{1,2})x(\d{1,2})', # 1x01 r's(\d{1,2})\s*e(\d{1,2})', # s01 e01 or s01e01 ] # Year pattern for fallback YEAR_PATTERN = r'\b(19\d{2}|20\d{2})\b' # Date patterns for extraction from text DATE_PATTERNS = [ # Instagram filename format: YYYYMMDD_HHMMSS (e.g., "20251027_155842") (r'(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})', 'instagram'), # DD.MM.YYYY or DD/MM/YYYY or DD-MM-YYYY or DD_MM_YYYY (underscore for forum titles) (r'(\d{1,2})[\.\/\-_](\d{1,2})[\.\/\-_](\d{4})', 'dmy'), # YYYY-MM-DD or YYYY/MM/DD or YYYY_MM_DD (r'(\d{4})[\-\/_](\d{1,2})[\-\/_](\d{1,2})', 'ymd'), # Month DD, YYYY (e.g., "August 15, 2025") (r'(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2}),?\s+(\d{4})', 'mdy_name'), # Month YYYY (e.g., "April 2025") - use first day of month (r'(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{4})', 'my_name'), # DD Mon YYYY (e.g., "15 Aug 2025") (r'(\d{1,2})\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{4})', 'dmy_abbr'), # Mon DD, YYYY (e.g., "Aug 15, 2025") (r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{1,2}),?\s+(\d{4})', 'mdy_abbr'), # Mon YYYY (e.g., "Apr 2025") - use first day of month (r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{4})', 'my_abbr'), ] MONTH_MAP = { 'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6, 'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11, 'December': 12, 'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12 } @classmethod def set_omdb_api_key(cls, api_key: str): """Set OMDB API key for TV show lookups""" cls.OMDB_API_KEY = api_key @classmethod def extract_tv_info(cls, text: str) -> Optional[Tuple[str, int, int]]: """ Extract TV show name, season, and episode from text Returns: Tuple of (show_name, season, episode) or None """ for pattern in cls.TV_PATTERNS: match = re.search(pattern, text, re.IGNORECASE) if match: season = int(match.group(1)) episode = int(match.group(2)) # Extract show name (everything before the season/episode) show_part = text[:match.start()].strip() # Look for common TV show names in the text # Common pattern: "Actor Name & Actor Name - Show Name S01E01" if ' - ' in show_part: # Split on dash and take the last part as show name parts = show_part.split(' - ') show_name = parts[-1].strip() else: # Clean up common separators show_name = re.sub(r'[-_.]', ' ', show_part) show_name = re.sub(r'\s+', ' ', show_name).strip() # Remove trailing "Season" or similar words show_name = re.sub(r'\s+(Season|Series|S)\s*$', '', show_name, re.IGNORECASE) if show_name: return (show_name, season, episode) return None @classmethod def lookup_tv_episode_date(cls, show_name: str, season: int, episode: int) -> Optional[datetime]: """ Lookup TV episode air date using OMDB API Args: show_name: Name of the TV show season: Season number episode: Episode number Returns: Air date of the episode or None """ if not cls.OMDB_API_KEY: logger.debug("OMDB API key not set") return None try: # First, search for the show search_url = "http://www.omdbapi.com/" params = { 'apikey': cls.OMDB_API_KEY, 't': show_name, 'type': 'series' } response = requests.get(search_url, params=params, timeout=5) if response.status_code != 200: return None show_data = response.json() if show_data.get('Response') != 'True': return None # Get the IMDB ID imdb_id = show_data.get('imdbID') if not imdb_id: return None # Now get the specific episode episode_params = { 'apikey': cls.OMDB_API_KEY, 'i': imdb_id, 'Season': season, 'Episode': episode } episode_response = requests.get(search_url, params=episode_params, timeout=5) if episode_response.status_code != 200: return None episode_data = episode_response.json() if episode_data.get('Response') != 'True': return None # Parse the release date release_date = episode_data.get('Released') if release_date and release_date != 'N/A': # Try different date formats for fmt in ['%d %b %Y', '%Y-%m-%d', '%d %B %Y']: try: return datetime.strptime(release_date, fmt) except ValueError: continue except Exception as e: logger.debug(f"OMDB lookup failed: {e}") return None @classmethod def extract_date_from_text(cls, text: str, fallback_date: Optional[datetime] = None, use_omdb: bool = True) -> Optional[datetime]: """ Extract date from text using multiple format patterns Args: text: Text to search for dates (e.g., post title, caption) fallback_date: Date to use if no date found in text use_omdb: Whether to try OMDB lookup for TV shows Returns: Extracted datetime or fallback_date if no date found """ if not text: return fallback_date # First, try TV show lookup if enabled if use_omdb: tv_info = cls.extract_tv_info(text) if tv_info: show_name, season, episode = tv_info tv_date = cls.lookup_tv_episode_date(show_name, season, episode) if tv_date: logger.info(f"Found TV episode date via OMDB: {show_name} S{season:02d}E{episode:02d} -> {tv_date}") return tv_date # Try standard date patterns for pattern, format_type in cls.DATE_PATTERNS: match = re.search(pattern, text, re.IGNORECASE) if match: try: if format_type == 'instagram': # Instagram format: YYYYMMDD_HHMMSS year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3)) hour, minute, second = int(match.group(4)), int(match.group(5)), int(match.group(6)) return datetime(year, month, day, hour, minute, second) elif format_type == 'dmy': day, month, year = int(match.group(1)), int(match.group(2)), int(match.group(3)) # Handle ambiguous dates (could be DD/MM or MM/DD) if '.' in text[match.start():match.end()]: # European format with dots: DD.MM.YYYY return datetime(year, month, day) elif day <= 12 and month <= 12: # Ambiguous, assume MM/DD/YYYY for US format return datetime(year, day, month) else: # Clear from values which is day/month if day > 12: return datetime(year, month, day) else: return datetime(year, day, month) elif format_type == 'ymd': year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3)) return datetime(year, month, day) elif format_type == 'mdy_name': month_str, day, year = match.group(1), int(match.group(2)), int(match.group(3)) month = cls.MONTH_MAP.get(month_str, 0) if month: return datetime(year, month, day) elif format_type == 'my_name': # Month YYYY (no day) - use first day of month month_str, year = match.group(1), int(match.group(2)) month = cls.MONTH_MAP.get(month_str, 0) if month: return datetime(year, month, 1) elif format_type == 'dmy_abbr': day, month_str, year = int(match.group(1)), match.group(2), int(match.group(3)) month = cls.MONTH_MAP.get(month_str, 0) if month: return datetime(year, month, day) elif format_type == 'mdy_abbr': month_str, day, year = match.group(1), int(match.group(2)), int(match.group(3)) month = cls.MONTH_MAP.get(month_str, 0) if month: return datetime(year, month, day) elif format_type == 'my_abbr': # Mon YYYY (no day) - use first day of month month_str, year = match.group(1), int(match.group(2)) month = cls.MONTH_MAP.get(month_str, 0) if month: return datetime(year, month, 1) except (ValueError, IndexError) as e: logger.debug(f"Failed to parse date from pattern {pattern}: {e}") continue # Don't use year-only as fallback - it's too unreliable # Examples: "Moments of 2025" shouldn't default to Jan 1, 2025 # Instead, use the actual post date from the forum return fallback_date @classmethod def update_file_timestamps(cls, filepath: Union[str, Path], date: datetime) -> bool: """ Update all timestamps for a file: filesystem, creation time, and EXIF data Args: filepath: Path to the file to update date: DateTime to set Returns: True if successful, False otherwise """ filepath = Path(filepath) if not filepath.exists(): logger.error(f"File not found: {filepath}") return False if not date: logger.warning(f"No date provided for {filepath}") return False success = True # 1. Update EXIF data for images FIRST (this modifies the file) if filepath.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tiff', '.bmp', '.gif']: try: cls._update_exif_data(filepath, date) except Exception as e: logger.debug(f"Failed to update EXIF data: {e}") # Don't mark as failure since not all images support EXIF # 2. Update video metadata SECOND (this also modifies the file) if filepath.suffix.lower() in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v']: try: cls._update_video_metadata(filepath, date) except Exception as e: logger.debug(f"Failed to update video metadata: {e}") # Don't mark as failure since this requires ffmpeg # 3. Update creation time (platform-specific) try: if platform.system() == 'Darwin': # macOS cls._update_macos_creation_time(filepath, date) elif platform.system() == 'Windows': cls._update_windows_creation_time(filepath, date) # Linux doesn't have a reliable way to set creation time except Exception as e: logger.debug(f"Failed to update creation time: {e}") # Don't mark as failure since this is platform-specific # 4. Update filesystem timestamps LAST (mtime and atime) # This must be last because EXIF/video updates modify the file and change mtime try: timestamp = date.timestamp() os.utime(filepath, (timestamp, timestamp)) logger.debug(f"Updated filesystem timestamps for {filepath}") except Exception as e: logger.error(f"Failed to update filesystem timestamps: {e}") success = False return success @classmethod def _update_macos_creation_time(cls, filepath: Path, date: datetime): """Update creation time on macOS using SetFile""" date_str = date.strftime("%m/%d/%Y %H:%M:%S") try: result = subprocess.run( ['SetFile', '-d', date_str, str(filepath)], capture_output=True, text=True, check=False ) if result.returncode == 0: logger.debug(f"Updated macOS creation time for {filepath}") else: logger.debug(f"SetFile failed: {result.stderr}") except FileNotFoundError: logger.debug("SetFile not found (Xcode Command Line Tools not installed)") @classmethod def _update_windows_creation_time(cls, filepath: Path, date: datetime): """Update creation time on Windows using PowerShell""" date_str = date.strftime("%Y-%m-%d %H:%M:%S") ps_command = f''' $file = Get-Item "{filepath}" $file.CreationTime = "{date_str}" ''' try: result = subprocess.run( ['powershell', '-Command', ps_command], capture_output=True, text=True, check=False ) if result.returncode == 0: logger.debug(f"Updated Windows creation time for {filepath}") except FileNotFoundError: logger.debug("PowerShell not available") @classmethod def _update_exif_data(cls, filepath: Path, date: datetime): """Update EXIF metadata using exiftool Sets all date fields comprehensively to ensure consistent timestamps across all metadata readers (including Immich): - AllDates (DateTimeOriginal, CreateDate, ModifyDate) - MetadataDate (used by some photo managers) - FileModifyDate (filesystem modification time) - Clears HistoryWhen to avoid conflicting timestamps """ date_str = date.strftime("%Y:%m:%d %H:%M:%S") try: result = subprocess.run([ 'exiftool', '-overwrite_original', f'-AllDates={date_str}', f'-MetadataDate={date_str}', '-HistoryWhen=', f'-FileModifyDate={date_str}', str(filepath) ], capture_output=True, text=True, check=False) if result.returncode == 0: logger.debug(f"Updated EXIF data for {filepath}") else: logger.debug(f"exiftool failed: {result.stderr}") except FileNotFoundError: logger.debug("exiftool not found") @classmethod def _update_video_metadata(cls, filepath: Path, date: datetime): """Update video metadata using ffmpeg""" date_str = date.strftime("%Y-%m-%d %H:%M:%S") temp_file = filepath.with_suffix('.tmp' + filepath.suffix) try: result = subprocess.run([ 'ffmpeg', '-i', str(filepath), '-c', 'copy', '-metadata', f'creation_time={date_str}', '-y', str(temp_file) ], capture_output=True, text=True, check=False) if result.returncode == 0 and temp_file.exists(): # Replace original with updated file temp_file.replace(filepath) logger.debug(f"Updated video metadata for {filepath}") else: if temp_file.exists(): temp_file.unlink() logger.debug(f"ffmpeg failed: {result.stderr}") except FileNotFoundError: logger.debug("ffmpeg not found") except Exception as e: if temp_file.exists(): temp_file.unlink() logger.debug(f"Video metadata update failed: {e}") # Convenience functions for direct use def extract_date(text: str, fallback: Optional[datetime] = None) -> Optional[datetime]: """Extract date from text""" return DateHandler.extract_date_from_text(text, fallback) def update_timestamps(filepath: Union[str, Path], date: datetime) -> bool: """Update all timestamps for a file""" return DateHandler.update_file_timestamps(filepath, date) if __name__ == "__main__": # Test examples test_texts = [ "Eva Longoria - 15.08.2025 Event Photos", "Photos from 08/15/2025", "August 15, 2025 - Red Carpet", "15 Aug 2025 Photoshoot", "Event 2025-08-15", ] print("Date extraction tests:") for text in test_texts: extracted = extract_date(text) print(f" '{text}' -> {extracted}") # Test file timestamp update test_file = Path("test_image.jpg") if test_file.exists(): test_date = datetime(2025, 8, 15, 18, 30, 0) if update_timestamps(test_file, test_date): print(f"\nSuccessfully updated timestamps for {test_file}")