Files
media-downloader/modules/universal_logger.py
Todd 0d7b2b1aab Initial commit
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-29 22:42:55 -04:00

349 lines
13 KiB
Python

#!/usr/bin/env python3
"""
Universal Logging Module for Media Downloader
Provides consistent logging across all components with automatic rotation and 7-day retention
"""
import logging
import logging.handlers
from pathlib import Path
from datetime import datetime, timedelta
import os
import glob
import sys
class UniversalLogger:
"""
Universal logger with automatic rotation and cleanup
Features:
- Consistent log format across all components
- Daily log rotation at midnight
- Automatic cleanup of logs older than 7 days
- Separate log files per component
- Console and file output
"""
def __init__(
self,
component_name: str,
log_dir: str = None,
retention_days: int = 7,
console_level: str = 'INFO',
file_level: str = 'DEBUG'
):
"""
Initialize universal logger for a component
Args:
component_name: Name of the component (e.g., 'API', 'Scheduler', 'MediaDownloader')
log_dir: Directory to store logs (default: /opt/media-downloader/logs)
retention_days: Number of days to keep logs (default: 7)
console_level: Logging level for console output (default: INFO)
file_level: Logging level for file output (default: DEBUG)
"""
self.component_name = component_name
self.retention_days = retention_days
# Set up log directory
if log_dir is None:
base_path = Path(__file__).parent.parent
self.log_dir = base_path / 'logs'
else:
self.log_dir = Path(log_dir)
self.log_dir.mkdir(exist_ok=True, parents=True)
# Create logger
self.logger = logging.getLogger(f'MediaDownloader.{component_name}')
self.logger.setLevel(logging.DEBUG)
# Remove existing handlers to prevent duplicates
self.logger.handlers = []
# Create formatter - matches media-downloader.py format
# Format: 2025-11-12 21:00:00.123456 [ComponentName] [Module] [LEVEL] message
# Custom formatter to include microseconds for proper log sorting
class MicrosecondFormatter(logging.Formatter):
def formatTime(self, record, datefmt=None):
ct = datetime.fromtimestamp(record.created)
return ct.strftime('%Y-%m-%d %H:%M:%S.%f')
formatter = MicrosecondFormatter(
'%(asctime)s [%(name)s] %(message)s'
)
# File handler with date-stamped filename (one file per day)
# Format: 20251113_component.log (all logs for the day append to same file)
date_stamp = datetime.now().strftime('%Y%m%d')
log_file = self.log_dir / f'{date_stamp}_{component_name.lower()}.log'
file_handler = logging.FileHandler(
filename=str(log_file),
mode='a', # Append mode - preserves logs across restarts
encoding='utf-8'
)
file_handler.setLevel(getattr(logging, file_level.upper()))
file_handler.setFormatter(formatter)
self.logger.addHandler(file_handler)
# Console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(getattr(logging, console_level.upper()))
console_handler.setFormatter(formatter)
self.logger.addHandler(console_handler)
# Suppress noisy third-party loggers
logging.getLogger('asyncio').setLevel(logging.WARNING)
logging.getLogger('selenium').setLevel(logging.WARNING)
logging.getLogger('urllib3').setLevel(logging.WARNING)
logging.getLogger('websocket').setLevel(logging.WARNING)
logging.getLogger('requests').setLevel(logging.WARNING)
logging.getLogger('PIL').setLevel(logging.WARNING)
logging.getLogger('instaloader').setLevel(logging.WARNING)
logging.getLogger('tensorflow').setLevel(logging.ERROR)
logging.getLogger('deepface').setLevel(logging.WARNING)
# Clean up old logs on initialization
self._cleanup_old_logs()
def _cleanup_old_logs(self):
"""Remove log files older than retention_days"""
try:
cutoff_date = datetime.now() - timedelta(days=self.retention_days)
# Match pattern: YYYYMMDD_HHMMSS_component.log
pattern = str(self.log_dir / f'*_{self.component_name.lower()}.log')
cleaned_count = 0
for log_file in glob.glob(pattern):
try:
file_path = Path(log_file)
# Check file modification time
mtime = datetime.fromtimestamp(file_path.stat().st_mtime)
if mtime < cutoff_date:
file_path.unlink()
cleaned_count += 1
except Exception as e:
# Don't fail if we can't clean up a single file
pass
if cleaned_count > 0:
# Log cleanup message through the logger itself (after file handler is set up)
self.info(f"Cleaned up {cleaned_count} old {self.component_name} log file(s)", module='LogCleanup')
except Exception as e:
# Don't fail initialization if cleanup fails
pass
def _format_message(self, module: str, level: str, message: str) -> str:
"""
Format message to match media-downloader.py style
Args:
module: Module name (e.g., 'Core', 'Forum', 'Instagram')
level: Log level (e.g., 'INFO', 'ERROR', 'DEBUG')
message: Log message
Returns:
Formatted message: [Module] [LEVEL] message
"""
return f"[{module}] [{level.upper()}] {message}"
def _broadcast_error(self, message: str, module: str, level: str = 'ERROR'):
"""
Broadcast error to connected WebSocket clients for real-time notifications.
Fails silently to not disrupt logging.
"""
try:
# Try to import the WebSocket manager from the API
# This will only work when the API is running
from web.backend.api import manager
if manager and manager.active_connections:
manager.broadcast_sync({
'type': 'error_alert',
'error': {
'module': module,
'level': level,
'message': message[:200], # Truncate for notification
'timestamp': datetime.now().isoformat(),
'component': self.component_name
}
})
except Exception:
# Fail silently - API may not be running or manager not available
pass
def _record_error_to_db(self, message: str, module: str, level: str = 'ERROR'):
"""
Record error to error_log database table for dashboard display.
Uses a separate connection to avoid circular dependencies.
Fails silently to not disrupt logging.
"""
try:
import sqlite3
import hashlib
import re
from pathlib import Path
# Get database path
db_path = Path(__file__).parent.parent / 'database' / 'media_downloader.db'
if not db_path.exists():
return
# Normalize message for deduplication (remove variable parts like URLs, paths, numbers)
normalized = message
normalized = re.sub(r'/[\w/\-\.]+\.(jpg|png|mp4|webp|gif|heic|mov)', '{file}', normalized)
normalized = re.sub(r'https?://[^\s]+', '{url}', normalized)
normalized = re.sub(r'\b\d+\b', '{n}', normalized)
normalized = re.sub(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', '{uuid}', normalized)
# Create error hash for deduplication (module + normalized message)
error_key = f"{module}:{normalized[:200]}"
error_hash = hashlib.sha256(error_key.encode()).hexdigest()
# Use a quick connection with short timeout
conn = sqlite3.connect(str(db_path), timeout=2.0)
conn.execute("PRAGMA busy_timeout = 2000")
cursor = conn.cursor()
now = datetime.now().isoformat()
# Upsert: insert new error or update occurrence count
# Reset viewed_at and dismissed_at to NULL when error recurs so it shows as "new" on dashboard
cursor.execute('''
INSERT INTO error_log (error_hash, module, level, message, first_seen, last_seen, occurrence_count, log_file)
VALUES (?, ?, ?, ?, ?, ?, 1, ?)
ON CONFLICT(error_hash) DO UPDATE SET
last_seen = excluded.last_seen,
occurrence_count = error_log.occurrence_count + 1,
viewed_at = NULL,
dismissed_at = NULL
''', (error_hash, module, level, message[:500], now, now, self.component_name))
conn.commit()
conn.close()
# Broadcast to WebSocket clients for real-time notification
self._broadcast_error(message, module, level)
except Exception:
# Fail silently - don't let error logging break the main logging
pass
def debug(self, message: str, module: str = 'Core'):
"""Log debug message"""
self.logger.debug(self._format_message(module, 'DEBUG', message))
def info(self, message: str, module: str = 'Core'):
"""Log info message"""
self.logger.info(self._format_message(module, 'INFO', message))
def warning(self, message: str, module: str = 'Core'):
"""Log warning message"""
self.logger.warning(self._format_message(module, 'WARNING', message))
def error(self, message: str, module: str = 'Core'):
"""Log error message and record to error_log database"""
self.logger.error(self._format_message(module, 'ERROR', message))
# Record error to database for dashboard display
self._record_error_to_db(message, module)
def critical(self, message: str, module: str = 'Core'):
"""Log critical message and record to error_log database"""
self.logger.critical(self._format_message(module, 'CRITICAL', message))
# Record critical errors to database for dashboard display
self._record_error_to_db(message, module, level='CRITICAL')
def success(self, message: str, module: str = 'Core'):
"""Log success message (maps to INFO level)"""
self.logger.info(self._format_message(module, 'SUCCESS', message))
def log(self, message: str, level: str = 'INFO', module: str = 'Core'):
"""
Generic log method supporting all levels
Args:
message: Log message
level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL, SUCCESS)
module: Module name
"""
level_map = {
'DEBUG': self.debug,
'INFO': self.info,
'WARNING': self.warning,
'ERROR': self.error,
'CRITICAL': self.critical,
'SUCCESS': self.success
}
log_func = level_map.get(level.upper(), self.info)
log_func(message, module)
def get_callback(self):
"""
Get a callback function compatible with existing module signatures
Returns:
Callback function that can be passed to modules expecting log_callback
"""
def callback(*args):
"""
Flexible callback that handles multiple signature formats:
- callback(message, level)
- callback(message, level, module)
"""
if len(args) == 2:
message, level = args
# Extract module from message if present
if message.startswith('[') and ']' in message:
end_bracket = message.index(']')
module = message[1:end_bracket]
message = message[end_bracket+1:].strip()
# Remove level tag if present
if message.startswith('[') and ']' in message:
message = message[message.index(']')+1:].strip()
self.log(message, level, module)
else:
self.log(message, level)
elif len(args) == 3:
message, level, module = args
self.log(message, level, module)
else:
# Default: treat as simple message
self.info(str(args))
return callback
# Singleton instances for common components
_logger_instances = {}
def get_logger(
component_name: str,
log_dir: str = None,
retention_days: int = 7,
console_level: str = 'INFO',
file_level: str = 'DEBUG'
) -> UniversalLogger:
"""
Get or create a logger instance for a component (singleton pattern)
Args:
component_name: Name of the component
log_dir: Directory to store logs
retention_days: Number of days to keep logs
console_level: Console logging level
file_level: File logging level
Returns:
UniversalLogger instance
"""
if component_name not in _logger_instances:
_logger_instances[component_name] = UniversalLogger(
component_name=component_name,
log_dir=log_dir,
retention_days=retention_days,
console_level=console_level,
file_level=file_level
)
return _logger_instances[component_name]