349 lines
13 KiB
Python
349 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Universal Logging Module for Media Downloader
|
|
Provides consistent logging across all components with automatic rotation and 7-day retention
|
|
"""
|
|
|
|
import logging
|
|
import logging.handlers
|
|
from pathlib import Path
|
|
from datetime import datetime, timedelta
|
|
import os
|
|
import glob
|
|
import sys
|
|
|
|
class UniversalLogger:
|
|
"""
|
|
Universal logger with automatic rotation and cleanup
|
|
|
|
Features:
|
|
- Consistent log format across all components
|
|
- Daily log rotation at midnight
|
|
- Automatic cleanup of logs older than 7 days
|
|
- Separate log files per component
|
|
- Console and file output
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
component_name: str,
|
|
log_dir: str = None,
|
|
retention_days: int = 7,
|
|
console_level: str = 'INFO',
|
|
file_level: str = 'DEBUG'
|
|
):
|
|
"""
|
|
Initialize universal logger for a component
|
|
|
|
Args:
|
|
component_name: Name of the component (e.g., 'API', 'Scheduler', 'MediaDownloader')
|
|
log_dir: Directory to store logs (default: /opt/media-downloader/logs)
|
|
retention_days: Number of days to keep logs (default: 7)
|
|
console_level: Logging level for console output (default: INFO)
|
|
file_level: Logging level for file output (default: DEBUG)
|
|
"""
|
|
self.component_name = component_name
|
|
self.retention_days = retention_days
|
|
|
|
# Set up log directory
|
|
if log_dir is None:
|
|
base_path = Path(__file__).parent.parent
|
|
self.log_dir = base_path / 'logs'
|
|
else:
|
|
self.log_dir = Path(log_dir)
|
|
|
|
self.log_dir.mkdir(exist_ok=True, parents=True)
|
|
|
|
# Create logger
|
|
self.logger = logging.getLogger(f'MediaDownloader.{component_name}')
|
|
self.logger.setLevel(logging.DEBUG)
|
|
|
|
# Remove existing handlers to prevent duplicates
|
|
self.logger.handlers = []
|
|
|
|
# Create formatter - matches media-downloader.py format
|
|
# Format: 2025-11-12 21:00:00.123456 [ComponentName] [Module] [LEVEL] message
|
|
# Custom formatter to include microseconds for proper log sorting
|
|
class MicrosecondFormatter(logging.Formatter):
|
|
def formatTime(self, record, datefmt=None):
|
|
ct = datetime.fromtimestamp(record.created)
|
|
return ct.strftime('%Y-%m-%d %H:%M:%S.%f')
|
|
|
|
formatter = MicrosecondFormatter(
|
|
'%(asctime)s [%(name)s] %(message)s'
|
|
)
|
|
|
|
# File handler with date-stamped filename (one file per day)
|
|
# Format: 20251113_component.log (all logs for the day append to same file)
|
|
date_stamp = datetime.now().strftime('%Y%m%d')
|
|
log_file = self.log_dir / f'{date_stamp}_{component_name.lower()}.log'
|
|
file_handler = logging.FileHandler(
|
|
filename=str(log_file),
|
|
mode='a', # Append mode - preserves logs across restarts
|
|
encoding='utf-8'
|
|
)
|
|
file_handler.setLevel(getattr(logging, file_level.upper()))
|
|
file_handler.setFormatter(formatter)
|
|
self.logger.addHandler(file_handler)
|
|
|
|
# Console handler
|
|
console_handler = logging.StreamHandler()
|
|
console_handler.setLevel(getattr(logging, console_level.upper()))
|
|
console_handler.setFormatter(formatter)
|
|
self.logger.addHandler(console_handler)
|
|
|
|
# Suppress noisy third-party loggers
|
|
logging.getLogger('asyncio').setLevel(logging.WARNING)
|
|
logging.getLogger('selenium').setLevel(logging.WARNING)
|
|
logging.getLogger('urllib3').setLevel(logging.WARNING)
|
|
logging.getLogger('websocket').setLevel(logging.WARNING)
|
|
logging.getLogger('requests').setLevel(logging.WARNING)
|
|
logging.getLogger('PIL').setLevel(logging.WARNING)
|
|
logging.getLogger('instaloader').setLevel(logging.WARNING)
|
|
logging.getLogger('tensorflow').setLevel(logging.ERROR)
|
|
logging.getLogger('deepface').setLevel(logging.WARNING)
|
|
|
|
# Clean up old logs on initialization
|
|
self._cleanup_old_logs()
|
|
|
|
def _cleanup_old_logs(self):
|
|
"""Remove log files older than retention_days"""
|
|
try:
|
|
cutoff_date = datetime.now() - timedelta(days=self.retention_days)
|
|
# Match pattern: YYYYMMDD_HHMMSS_component.log
|
|
pattern = str(self.log_dir / f'*_{self.component_name.lower()}.log')
|
|
|
|
cleaned_count = 0
|
|
for log_file in glob.glob(pattern):
|
|
try:
|
|
file_path = Path(log_file)
|
|
# Check file modification time
|
|
mtime = datetime.fromtimestamp(file_path.stat().st_mtime)
|
|
if mtime < cutoff_date:
|
|
file_path.unlink()
|
|
cleaned_count += 1
|
|
except Exception as e:
|
|
# Don't fail if we can't clean up a single file
|
|
pass
|
|
|
|
if cleaned_count > 0:
|
|
# Log cleanup message through the logger itself (after file handler is set up)
|
|
self.info(f"Cleaned up {cleaned_count} old {self.component_name} log file(s)", module='LogCleanup')
|
|
except Exception as e:
|
|
# Don't fail initialization if cleanup fails
|
|
pass
|
|
|
|
def _format_message(self, module: str, level: str, message: str) -> str:
|
|
"""
|
|
Format message to match media-downloader.py style
|
|
|
|
Args:
|
|
module: Module name (e.g., 'Core', 'Forum', 'Instagram')
|
|
level: Log level (e.g., 'INFO', 'ERROR', 'DEBUG')
|
|
message: Log message
|
|
|
|
Returns:
|
|
Formatted message: [Module] [LEVEL] message
|
|
"""
|
|
return f"[{module}] [{level.upper()}] {message}"
|
|
|
|
def _broadcast_error(self, message: str, module: str, level: str = 'ERROR'):
|
|
"""
|
|
Broadcast error to connected WebSocket clients for real-time notifications.
|
|
Fails silently to not disrupt logging.
|
|
"""
|
|
try:
|
|
# Try to import the WebSocket manager from the API
|
|
# This will only work when the API is running
|
|
from web.backend.api import manager
|
|
if manager and manager.active_connections:
|
|
manager.broadcast_sync({
|
|
'type': 'error_alert',
|
|
'error': {
|
|
'module': module,
|
|
'level': level,
|
|
'message': message[:200], # Truncate for notification
|
|
'timestamp': datetime.now().isoformat(),
|
|
'component': self.component_name
|
|
}
|
|
})
|
|
except Exception:
|
|
# Fail silently - API may not be running or manager not available
|
|
pass
|
|
|
|
def _record_error_to_db(self, message: str, module: str, level: str = 'ERROR'):
|
|
"""
|
|
Record error to error_log database table for dashboard display.
|
|
Uses a separate connection to avoid circular dependencies.
|
|
Fails silently to not disrupt logging.
|
|
"""
|
|
try:
|
|
import sqlite3
|
|
import hashlib
|
|
import re
|
|
from pathlib import Path
|
|
|
|
# Get database path
|
|
db_path = Path(__file__).parent.parent / 'database' / 'media_downloader.db'
|
|
if not db_path.exists():
|
|
return
|
|
|
|
# Normalize message for deduplication (remove variable parts like URLs, paths, numbers)
|
|
normalized = message
|
|
normalized = re.sub(r'/[\w/\-\.]+\.(jpg|png|mp4|webp|gif|heic|mov)', '{file}', normalized)
|
|
normalized = re.sub(r'https?://[^\s]+', '{url}', normalized)
|
|
normalized = re.sub(r'\b\d+\b', '{n}', normalized)
|
|
normalized = re.sub(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', '{uuid}', normalized)
|
|
|
|
# Create error hash for deduplication (module + normalized message)
|
|
error_key = f"{module}:{normalized[:200]}"
|
|
error_hash = hashlib.sha256(error_key.encode()).hexdigest()
|
|
|
|
# Use a quick connection with short timeout
|
|
conn = sqlite3.connect(str(db_path), timeout=2.0)
|
|
conn.execute("PRAGMA busy_timeout = 2000")
|
|
cursor = conn.cursor()
|
|
|
|
now = datetime.now().isoformat()
|
|
|
|
# Upsert: insert new error or update occurrence count
|
|
# Reset viewed_at and dismissed_at to NULL when error recurs so it shows as "new" on dashboard
|
|
cursor.execute('''
|
|
INSERT INTO error_log (error_hash, module, level, message, first_seen, last_seen, occurrence_count, log_file)
|
|
VALUES (?, ?, ?, ?, ?, ?, 1, ?)
|
|
ON CONFLICT(error_hash) DO UPDATE SET
|
|
last_seen = excluded.last_seen,
|
|
occurrence_count = error_log.occurrence_count + 1,
|
|
viewed_at = NULL,
|
|
dismissed_at = NULL
|
|
''', (error_hash, module, level, message[:500], now, now, self.component_name))
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
# Broadcast to WebSocket clients for real-time notification
|
|
self._broadcast_error(message, module, level)
|
|
|
|
except Exception:
|
|
# Fail silently - don't let error logging break the main logging
|
|
pass
|
|
|
|
def debug(self, message: str, module: str = 'Core'):
|
|
"""Log debug message"""
|
|
self.logger.debug(self._format_message(module, 'DEBUG', message))
|
|
|
|
def info(self, message: str, module: str = 'Core'):
|
|
"""Log info message"""
|
|
self.logger.info(self._format_message(module, 'INFO', message))
|
|
|
|
def warning(self, message: str, module: str = 'Core'):
|
|
"""Log warning message"""
|
|
self.logger.warning(self._format_message(module, 'WARNING', message))
|
|
|
|
def error(self, message: str, module: str = 'Core'):
|
|
"""Log error message and record to error_log database"""
|
|
self.logger.error(self._format_message(module, 'ERROR', message))
|
|
# Record error to database for dashboard display
|
|
self._record_error_to_db(message, module)
|
|
|
|
def critical(self, message: str, module: str = 'Core'):
|
|
"""Log critical message and record to error_log database"""
|
|
self.logger.critical(self._format_message(module, 'CRITICAL', message))
|
|
# Record critical errors to database for dashboard display
|
|
self._record_error_to_db(message, module, level='CRITICAL')
|
|
|
|
def success(self, message: str, module: str = 'Core'):
|
|
"""Log success message (maps to INFO level)"""
|
|
self.logger.info(self._format_message(module, 'SUCCESS', message))
|
|
|
|
def log(self, message: str, level: str = 'INFO', module: str = 'Core'):
|
|
"""
|
|
Generic log method supporting all levels
|
|
|
|
Args:
|
|
message: Log message
|
|
level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL, SUCCESS)
|
|
module: Module name
|
|
"""
|
|
level_map = {
|
|
'DEBUG': self.debug,
|
|
'INFO': self.info,
|
|
'WARNING': self.warning,
|
|
'ERROR': self.error,
|
|
'CRITICAL': self.critical,
|
|
'SUCCESS': self.success
|
|
}
|
|
|
|
log_func = level_map.get(level.upper(), self.info)
|
|
log_func(message, module)
|
|
|
|
def get_callback(self):
|
|
"""
|
|
Get a callback function compatible with existing module signatures
|
|
|
|
Returns:
|
|
Callback function that can be passed to modules expecting log_callback
|
|
"""
|
|
def callback(*args):
|
|
"""
|
|
Flexible callback that handles multiple signature formats:
|
|
- callback(message, level)
|
|
- callback(message, level, module)
|
|
"""
|
|
if len(args) == 2:
|
|
message, level = args
|
|
# Extract module from message if present
|
|
if message.startswith('[') and ']' in message:
|
|
end_bracket = message.index(']')
|
|
module = message[1:end_bracket]
|
|
message = message[end_bracket+1:].strip()
|
|
# Remove level tag if present
|
|
if message.startswith('[') and ']' in message:
|
|
message = message[message.index(']')+1:].strip()
|
|
self.log(message, level, module)
|
|
else:
|
|
self.log(message, level)
|
|
elif len(args) == 3:
|
|
message, level, module = args
|
|
self.log(message, level, module)
|
|
else:
|
|
# Default: treat as simple message
|
|
self.info(str(args))
|
|
|
|
return callback
|
|
|
|
|
|
# Singleton instances for common components
|
|
_logger_instances = {}
|
|
|
|
def get_logger(
|
|
component_name: str,
|
|
log_dir: str = None,
|
|
retention_days: int = 7,
|
|
console_level: str = 'INFO',
|
|
file_level: str = 'DEBUG'
|
|
) -> UniversalLogger:
|
|
"""
|
|
Get or create a logger instance for a component (singleton pattern)
|
|
|
|
Args:
|
|
component_name: Name of the component
|
|
log_dir: Directory to store logs
|
|
retention_days: Number of days to keep logs
|
|
console_level: Console logging level
|
|
file_level: File logging level
|
|
|
|
Returns:
|
|
UniversalLogger instance
|
|
"""
|
|
if component_name not in _logger_instances:
|
|
_logger_instances[component_name] = UniversalLogger(
|
|
component_name=component_name,
|
|
log_dir=log_dir,
|
|
retention_days=retention_days,
|
|
console_level=console_level,
|
|
file_level=file_level
|
|
)
|
|
|
|
return _logger_instances[component_name]
|