#!/usr/bin/env python3 """ Base Subprocess Wrapper Provides common functionality for all subprocess wrappers: - Signal handling for graceful cleanup - Stderr logging - Database connection management Usage: from wrappers.base_subprocess_wrapper import ( setup_signal_handlers, stderr_log, set_database_reference, get_database_reference ) """ import sys import signal from pathlib import Path # Add parent directory to path for module imports sys.path.insert(0, str(Path(__file__).parent.parent)) # Bootstrap database backend (patches sqlite3 → pg_adapter if DATABASE_BACKEND=postgresql) import modules.db_bootstrap # noqa: F401, E402 # Global database reference for cleanup in signal handler _unified_db = None _module_name = "Subprocess" def set_module_name(name: str): """Set the module name used in log messages.""" global _module_name _module_name = name def set_database_reference(db): """Set the global database reference for signal handler cleanup.""" global _unified_db _unified_db = db def get_database_reference(): """Get the current database reference.""" return _unified_db def _signal_handler(signum, frame): """Handle termination signals gracefully with database cleanup.""" global _unified_db, _module_name signal_name = signal.Signals(signum).name print(f"[{_module_name}] [WARNING] Received {signal_name}, cleaning up...", file=sys.stderr, flush=True) if _unified_db: try: _unified_db.close() print(f"[{_module_name}] [DEBUG] Database connection closed on signal", file=sys.stderr, flush=True) except Exception as e: print(f"[{_module_name}] [WARNING] Error closing database on signal: {e}", file=sys.stderr, flush=True) sys.exit(128 + signum) def setup_signal_handlers(module_name: str = None): """ Register signal handlers for graceful termination. Args: module_name: Name to use in log messages (optional) """ if module_name: set_module_name(module_name) signal.signal(signal.SIGTERM, _signal_handler) signal.signal(signal.SIGINT, _signal_handler) def stderr_log(tag_or_message, level_or_tag="info", message=None): """ Log to stderr to avoid polluting stdout JSON. Supports multiple call signatures: - stderr_log("message") -> Logs message as-is - stderr_log("message", "error") -> Logs message as-is - stderr_log("Tag", "info", "message") -> Logs as "[Tag] [INFO] message" Args: tag_or_message: Either the tag or the complete message level_or_tag: Either the level or the tag (when 3 args) message: The actual message (when 3 args) """ if message is not None: # 3 arguments: (tag, level, message) tag = tag_or_message level = level_or_tag log_msg = f"[{tag}] [{level.upper()}] {message}" else: # 2 or 1 argument: message only log_msg = tag_or_message print(log_msg, file=sys.stderr, flush=True) def create_json_result(success: bool, data: dict = None, error: str = None) -> dict: """ Create a standardized JSON result for subprocess output. Args: success: Whether the operation succeeded data: Additional data to include in result error: Error message if operation failed Returns: Dictionary suitable for JSON serialization """ result = {"success": success} if data: result.update(data) if error: result["error"] = error return result def create_download_wrapper(module_name: str, scraper_id: str, downloader_factory: callable): """ Factory function to create standardized download wrapper functions. This eliminates code duplication across the individual wrapper files by providing a common implementation pattern. Args: module_name: Display name for logging (e.g., "FastDL", "ImgInn") scraper_id: Identifier for the scraper (e.g., "fastdl", "imginn") downloader_factory: Callable that takes (config, unified_db) and returns a downloader instance Returns: A run_download function that can be used as the main entry point Example usage in a wrapper file: from wrappers.base_subprocess_wrapper import create_download_wrapper, run_wrapper_main def create_downloader(config, unified_db): from modules.fastdl_module import FastDLDownloader return FastDLDownloader( headless=config.get('headless', True), show_progress=True, use_database=True, log_callback=None, unified_db=unified_db, high_res=config.get('high_res', False) ) run_download = create_download_wrapper("FastDL", "fastdl", create_downloader) if __name__ == '__main__': run_wrapper_main(run_download) """ # Setup signal handlers at wrapper creation time setup_signal_handlers(module_name) def run_download(config: dict) -> dict: """Run download in isolated subprocess.""" from modules.unified_database import UnifiedDatabase from modules.monitor_wrapper import log_download_result # Validate required config parameters required_keys = ['username', 'content_type', 'temp_dir'] missing_keys = [key for key in required_keys if key not in config] if missing_keys: return {'status': 'error', 'message': f'Missing required config keys: {missing_keys}', 'count': 0} # Create unified database db_path = config.get('db_path', '/opt/media-downloader/database/media_downloader.db') unified_db = UnifiedDatabase(db_path, use_pool=False) set_database_reference(unified_db) # Set for signal handler cleanup try: # Create downloader instance using the factory downloader = downloader_factory(config, unified_db) username = config['username'] content_type = config['content_type'] temp_dir = Path(config['temp_dir']) # Log that we're starting this user (for scraping monitor) stderr_log(f"Processing {module_name} {content_type} for @{username}", "info") # Create directories temp_dir.mkdir(parents=True, exist_ok=True) # Download content with deferred database recording try: count = downloader.download( username=username, content_type=content_type, days_back=config.get('days_back', 3), max_downloads=config.get('max_downloads', 50), output_dir=str(temp_dir), phrase_config=config.get('phrase_config'), defer_database=True # Defer recording until after file move ) # Get pending downloads for main script to record after move pending_downloads = downloader.get_pending_downloads() # Log to monitor log_download_result(scraper_id, username, count or 0, error=None) result = { 'status': 'success', 'count': count or 0, 'pending_downloads': pending_downloads } # Propagate auth failure flag if the downloader supports it if getattr(downloader, 'auth_failed', False): result['auth_failed'] = True if getattr(downloader, 'user_id_failed', False): result['user_id_failed'] = True if getattr(downloader, 'invalid_owner', False): result['invalid_owner'] = True return result except Exception as e: stderr_log(f"{module_name} download error: {e}", "error") import traceback stderr_log(traceback.format_exc(), "error") # Log failure to monitor log_download_result(scraper_id, username, 0, error=str(e)) return { 'status': 'error', 'message': str(e), 'count': 0 } finally: # Explicitly close database connection before subprocess exits if unified_db: try: unified_db.close() stderr_log("Database connection closed", "debug") except Exception as e: stderr_log(f"Error closing database: {e}", "warning") return run_download def create_download_wrapper_multi(module_name: str, scraper_id: str, downloader_factory: callable): """ Factory function to create multi-content-type download wrapper functions. Similar to create_download_wrapper but handles multiple content types in a single browser session. Args: module_name: Display name for logging (e.g., "FastDL") scraper_id: Identifier for the scraper (e.g., "fastdl") downloader_factory: Callable that takes (config, unified_db) and returns a downloader instance Returns: A run_download_multi function that can be used as the main entry point """ # Setup signal handlers at wrapper creation time setup_signal_handlers(module_name) def run_download_multi(config: dict) -> dict: """Run multi-content-type download in isolated subprocess.""" from modules.unified_database import UnifiedDatabase from modules.monitor_wrapper import log_download_result # Validate required config parameters required_keys = ['username', 'content_types', 'output_dirs'] missing_keys = [key for key in required_keys if key not in config] if missing_keys: return {'status': 'error', 'message': f'Missing required config keys: {missing_keys}', 'results': {}} # Create unified database db_path = config.get('db_path', '/opt/media-downloader/database/media_downloader.db') unified_db = UnifiedDatabase(db_path, use_pool=False) set_database_reference(unified_db) try: # Create downloader instance using the factory downloader = downloader_factory(config, unified_db) username = config['username'] content_types = config['content_types'] output_dirs = config['output_dirs'] stderr_log(f"Processing {module_name} multi ({', '.join(content_types)}) for @{username}", "info") # Create temp directories for ct, dir_path in output_dirs.items(): Path(dir_path).mkdir(parents=True, exist_ok=True) try: results = downloader.download_multi( username=username, content_types=content_types, output_dirs=output_dirs, days_back=config.get('days_back', 3), max_downloads=config.get('max_downloads', 50), phrase_configs=config.get('phrase_configs'), defer_database=True ) # Log total to monitor total_count = sum(r.get('count', 0) for r in results.values()) log_download_result(scraper_id, username, total_count, error=None) return { 'status': 'success', 'results': results } except Exception as e: stderr_log(f"{module_name} multi-download error: {e}", "error") import traceback stderr_log(traceback.format_exc(), "error") log_download_result(scraper_id, username, 0, error=str(e)) return { 'status': 'error', 'message': str(e), 'results': {} } finally: if unified_db: try: unified_db.close() stderr_log("Database connection closed", "debug") except Exception as e: stderr_log(f"Error closing database: {e}", "warning") return run_download_multi def run_wrapper_main(run_download_func: callable): """ Standard main entry point for subprocess wrappers. Args: run_download_func: The download function created by create_download_wrapper """ import json # Read config from stdin config_json = sys.stdin.read() config = json.loads(config_json) # Run download result = run_download_func(config) # Output result as JSON print(json.dumps(result)) sys.exit(0 if result['status'] == 'success' else 1)