#!/usr/bin/env python3 """ Universal Logging Module for Media Downloader Provides consistent logging across all components with automatic rotation and 7-day retention """ import logging import logging.handlers from pathlib import Path from datetime import datetime, timedelta import os import glob import sys class UniversalLogger: """ Universal logger with automatic rotation and cleanup Features: - Consistent log format across all components - Daily log rotation at midnight - Automatic cleanup of logs older than 7 days - Separate log files per component - Console and file output """ def __init__( self, component_name: str, log_dir: str = None, retention_days: int = 7, console_level: str = 'INFO', file_level: str = 'DEBUG' ): """ Initialize universal logger for a component Args: component_name: Name of the component (e.g., 'API', 'Scheduler', 'MediaDownloader') log_dir: Directory to store logs (default: /opt/media-downloader/logs) retention_days: Number of days to keep logs (default: 7) console_level: Logging level for console output (default: INFO) file_level: Logging level for file output (default: DEBUG) """ self.component_name = component_name self.retention_days = retention_days # Set up log directory if log_dir is None: base_path = Path(__file__).parent.parent self.log_dir = base_path / 'logs' else: self.log_dir = Path(log_dir) self.log_dir.mkdir(exist_ok=True, parents=True) # Create logger self.logger = logging.getLogger(f'MediaDownloader.{component_name}') self.logger.setLevel(logging.DEBUG) # Remove existing handlers to prevent duplicates self.logger.handlers = [] # Create formatter - matches media-downloader.py format # Format: 2025-11-12 21:00:00.123456 [ComponentName] [Module] [LEVEL] message # Custom formatter to include microseconds for proper log sorting class MicrosecondFormatter(logging.Formatter): def formatTime(self, record, datefmt=None): ct = datetime.fromtimestamp(record.created) return ct.strftime('%Y-%m-%d %H:%M:%S.%f') formatter = MicrosecondFormatter( '%(asctime)s [%(name)s] %(message)s' ) # File handler with date-stamped filename (one file per day) # Format: 20251113_component.log (all logs for the day append to same file) date_stamp = datetime.now().strftime('%Y%m%d') log_file = self.log_dir / f'{date_stamp}_{component_name.lower()}.log' file_handler = logging.FileHandler( filename=str(log_file), mode='a', # Append mode - preserves logs across restarts encoding='utf-8' ) file_handler.setLevel(getattr(logging, file_level.upper())) file_handler.setFormatter(formatter) self.logger.addHandler(file_handler) # Console handler console_handler = logging.StreamHandler() console_handler.setLevel(getattr(logging, console_level.upper())) console_handler.setFormatter(formatter) self.logger.addHandler(console_handler) # Suppress noisy third-party loggers logging.getLogger('asyncio').setLevel(logging.WARNING) logging.getLogger('selenium').setLevel(logging.WARNING) logging.getLogger('urllib3').setLevel(logging.WARNING) logging.getLogger('websocket').setLevel(logging.WARNING) logging.getLogger('requests').setLevel(logging.WARNING) logging.getLogger('PIL').setLevel(logging.WARNING) logging.getLogger('instaloader').setLevel(logging.WARNING) logging.getLogger('tensorflow').setLevel(logging.ERROR) logging.getLogger('deepface').setLevel(logging.WARNING) # Clean up old logs on initialization self._cleanup_old_logs() def _cleanup_old_logs(self): """Remove log files older than retention_days""" try: cutoff_date = datetime.now() - timedelta(days=self.retention_days) # Match pattern: YYYYMMDD_HHMMSS_component.log pattern = str(self.log_dir / f'*_{self.component_name.lower()}.log') cleaned_count = 0 for log_file in glob.glob(pattern): try: file_path = Path(log_file) # Check file modification time mtime = datetime.fromtimestamp(file_path.stat().st_mtime) if mtime < cutoff_date: file_path.unlink() cleaned_count += 1 except Exception as e: # Don't fail if we can't clean up a single file pass if cleaned_count > 0: # Log cleanup message through the logger itself (after file handler is set up) self.info(f"Cleaned up {cleaned_count} old {self.component_name} log file(s)", module='LogCleanup') except Exception as e: # Don't fail initialization if cleanup fails pass def _format_message(self, module: str, level: str, message: str) -> str: """ Format message to match media-downloader.py style Args: module: Module name (e.g., 'Core', 'Forum', 'Instagram') level: Log level (e.g., 'INFO', 'ERROR', 'DEBUG') message: Log message Returns: Formatted message: [Module] [LEVEL] message """ return f"[{module}] [{level.upper()}] {message}" def _broadcast_error(self, message: str, module: str, level: str = 'ERROR'): """ Broadcast error to connected WebSocket clients for real-time notifications. Fails silently to not disrupt logging. """ try: # Try to import the WebSocket manager from the API # This will only work when the API is running from web.backend.api import manager if manager and manager.active_connections: manager.broadcast_sync({ 'type': 'error_alert', 'error': { 'module': module, 'level': level, 'message': message[:200], # Truncate for notification 'timestamp': datetime.now().isoformat(), 'component': self.component_name } }) except Exception: # Fail silently - API may not be running or manager not available pass def _record_error_to_db(self, message: str, module: str, level: str = 'ERROR'): """ Record error to error_log database table for dashboard display. Uses a separate connection to avoid circular dependencies. Fails silently to not disrupt logging. """ try: import sqlite3 import hashlib import re from pathlib import Path # Get database path db_path = Path(__file__).parent.parent / 'database' / 'media_downloader.db' if not db_path.exists(): return # Normalize message for deduplication (remove variable parts like URLs, paths, numbers) normalized = message normalized = re.sub(r'/[\w/\-\.]+\.(jpg|png|mp4|webp|gif|heic|mov)', '{file}', normalized) normalized = re.sub(r'https?://[^\s]+', '{url}', normalized) normalized = re.sub(r'\b\d+\b', '{n}', normalized) normalized = re.sub(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', '{uuid}', normalized) # Create error hash for deduplication (module + normalized message) error_key = f"{module}:{normalized[:200]}" error_hash = hashlib.sha256(error_key.encode()).hexdigest() # Use a quick connection with short timeout conn = sqlite3.connect(str(db_path), timeout=2.0) conn.execute("PRAGMA busy_timeout = 2000") cursor = conn.cursor() now = datetime.now().isoformat() # Upsert: insert new error or update occurrence count # Reset viewed_at and dismissed_at to NULL when error recurs so it shows as "new" on dashboard cursor.execute(''' INSERT INTO error_log (error_hash, module, level, message, first_seen, last_seen, occurrence_count, log_file) VALUES (?, ?, ?, ?, ?, ?, 1, ?) ON CONFLICT(error_hash) DO UPDATE SET last_seen = excluded.last_seen, occurrence_count = error_log.occurrence_count + 1, viewed_at = NULL, dismissed_at = NULL ''', (error_hash, module, level, message[:500], now, now, self.component_name)) conn.commit() conn.close() # Broadcast to WebSocket clients for real-time notification self._broadcast_error(message, module, level) except Exception: # Fail silently - don't let error logging break the main logging pass def debug(self, message: str, module: str = 'Core'): """Log debug message""" self.logger.debug(self._format_message(module, 'DEBUG', message)) def info(self, message: str, module: str = 'Core'): """Log info message""" self.logger.info(self._format_message(module, 'INFO', message)) def warning(self, message: str, module: str = 'Core'): """Log warning message""" self.logger.warning(self._format_message(module, 'WARNING', message)) def error(self, message: str, module: str = 'Core'): """Log error message and record to error_log database""" self.logger.error(self._format_message(module, 'ERROR', message)) # Record error to database for dashboard display self._record_error_to_db(message, module) def critical(self, message: str, module: str = 'Core'): """Log critical message and record to error_log database""" self.logger.critical(self._format_message(module, 'CRITICAL', message)) # Record critical errors to database for dashboard display self._record_error_to_db(message, module, level='CRITICAL') def success(self, message: str, module: str = 'Core'): """Log success message (maps to INFO level)""" self.logger.info(self._format_message(module, 'SUCCESS', message)) def log(self, message: str, level: str = 'INFO', module: str = 'Core'): """ Generic log method supporting all levels Args: message: Log message level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL, SUCCESS) module: Module name """ level_map = { 'DEBUG': self.debug, 'INFO': self.info, 'WARNING': self.warning, 'ERROR': self.error, 'CRITICAL': self.critical, 'SUCCESS': self.success } log_func = level_map.get(level.upper(), self.info) log_func(message, module) def get_callback(self): """ Get a callback function compatible with existing module signatures Returns: Callback function that can be passed to modules expecting log_callback """ def callback(*args): """ Flexible callback that handles multiple signature formats: - callback(message, level) - callback(message, level, module) """ if len(args) == 2: message, level = args # Extract module from message if present if message.startswith('[') and ']' in message: end_bracket = message.index(']') module = message[1:end_bracket] message = message[end_bracket+1:].strip() # Remove level tag if present if message.startswith('[') and ']' in message: message = message[message.index(']')+1:].strip() self.log(message, level, module) else: self.log(message, level) elif len(args) == 3: message, level, module = args self.log(message, level, module) else: # Default: treat as simple message self.info(str(args)) return callback # Singleton instances for common components _logger_instances = {} def get_logger( component_name: str, log_dir: str = None, retention_days: int = 7, console_level: str = 'INFO', file_level: str = 'DEBUG' ) -> UniversalLogger: """ Get or create a logger instance for a component (singleton pattern) Args: component_name: Name of the component log_dir: Directory to store logs retention_days: Number of days to keep logs console_level: Console logging level file_level: File logging level Returns: UniversalLogger instance """ if component_name not in _logger_instances: _logger_instances[component_name] = UniversalLogger( component_name=component_name, log_dir=log_dir, retention_days=retention_days, console_level=console_level, file_level=file_level ) return _logger_instances[component_name]