359 lines
12 KiB
Python
359 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Base Subprocess Wrapper
|
|
|
|
Provides common functionality for all subprocess wrappers:
|
|
- Signal handling for graceful cleanup
|
|
- Stderr logging
|
|
- Database connection management
|
|
|
|
Usage:
|
|
from wrappers.base_subprocess_wrapper import (
|
|
setup_signal_handlers,
|
|
stderr_log,
|
|
set_database_reference,
|
|
get_database_reference
|
|
)
|
|
"""
|
|
|
|
import sys
|
|
import signal
|
|
from pathlib import Path
|
|
|
|
# Add parent directory to path for module imports
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
# Bootstrap database backend (patches sqlite3 → pg_adapter if DATABASE_BACKEND=postgresql)
|
|
import modules.db_bootstrap # noqa: F401, E402
|
|
|
|
# Global database reference for cleanup in signal handler
|
|
_unified_db = None
|
|
_module_name = "Subprocess"
|
|
|
|
|
|
def set_module_name(name: str):
|
|
"""Set the module name used in log messages."""
|
|
global _module_name
|
|
_module_name = name
|
|
|
|
|
|
def set_database_reference(db):
|
|
"""Set the global database reference for signal handler cleanup."""
|
|
global _unified_db
|
|
_unified_db = db
|
|
|
|
|
|
def get_database_reference():
|
|
"""Get the current database reference."""
|
|
return _unified_db
|
|
|
|
|
|
def _signal_handler(signum, frame):
|
|
"""Handle termination signals gracefully with database cleanup."""
|
|
global _unified_db, _module_name
|
|
signal_name = signal.Signals(signum).name
|
|
print(f"[{_module_name}] [WARNING] Received {signal_name}, cleaning up...", file=sys.stderr, flush=True)
|
|
if _unified_db:
|
|
try:
|
|
_unified_db.close()
|
|
print(f"[{_module_name}] [DEBUG] Database connection closed on signal", file=sys.stderr, flush=True)
|
|
except Exception as e:
|
|
print(f"[{_module_name}] [WARNING] Error closing database on signal: {e}", file=sys.stderr, flush=True)
|
|
sys.exit(128 + signum)
|
|
|
|
|
|
def setup_signal_handlers(module_name: str = None):
|
|
"""
|
|
Register signal handlers for graceful termination.
|
|
|
|
Args:
|
|
module_name: Name to use in log messages (optional)
|
|
"""
|
|
if module_name:
|
|
set_module_name(module_name)
|
|
signal.signal(signal.SIGTERM, _signal_handler)
|
|
signal.signal(signal.SIGINT, _signal_handler)
|
|
|
|
|
|
def stderr_log(tag_or_message, level_or_tag="info", message=None):
|
|
"""
|
|
Log to stderr to avoid polluting stdout JSON.
|
|
|
|
Supports multiple call signatures:
|
|
- stderr_log("message") -> Logs message as-is
|
|
- stderr_log("message", "error") -> Logs message as-is
|
|
- stderr_log("Tag", "info", "message") -> Logs as "[Tag] [INFO] message"
|
|
|
|
Args:
|
|
tag_or_message: Either the tag or the complete message
|
|
level_or_tag: Either the level or the tag (when 3 args)
|
|
message: The actual message (when 3 args)
|
|
"""
|
|
if message is not None:
|
|
# 3 arguments: (tag, level, message)
|
|
tag = tag_or_message
|
|
level = level_or_tag
|
|
log_msg = f"[{tag}] [{level.upper()}] {message}"
|
|
else:
|
|
# 2 or 1 argument: message only
|
|
log_msg = tag_or_message
|
|
print(log_msg, file=sys.stderr, flush=True)
|
|
|
|
|
|
def create_json_result(success: bool, data: dict = None, error: str = None) -> dict:
|
|
"""
|
|
Create a standardized JSON result for subprocess output.
|
|
|
|
Args:
|
|
success: Whether the operation succeeded
|
|
data: Additional data to include in result
|
|
error: Error message if operation failed
|
|
|
|
Returns:
|
|
Dictionary suitable for JSON serialization
|
|
"""
|
|
result = {"success": success}
|
|
if data:
|
|
result.update(data)
|
|
if error:
|
|
result["error"] = error
|
|
return result
|
|
|
|
|
|
def create_download_wrapper(module_name: str, scraper_id: str, downloader_factory: callable):
|
|
"""
|
|
Factory function to create standardized download wrapper functions.
|
|
|
|
This eliminates code duplication across the individual wrapper files by
|
|
providing a common implementation pattern.
|
|
|
|
Args:
|
|
module_name: Display name for logging (e.g., "FastDL", "ImgInn")
|
|
scraper_id: Identifier for the scraper (e.g., "fastdl", "imginn")
|
|
downloader_factory: Callable that takes (config, unified_db) and returns a downloader instance
|
|
|
|
Returns:
|
|
A run_download function that can be used as the main entry point
|
|
|
|
Example usage in a wrapper file:
|
|
from wrappers.base_subprocess_wrapper import create_download_wrapper, run_wrapper_main
|
|
|
|
def create_downloader(config, unified_db):
|
|
from modules.fastdl_module import FastDLDownloader
|
|
return FastDLDownloader(
|
|
headless=config.get('headless', True),
|
|
show_progress=True,
|
|
use_database=True,
|
|
log_callback=None,
|
|
unified_db=unified_db,
|
|
high_res=config.get('high_res', False)
|
|
)
|
|
|
|
run_download = create_download_wrapper("FastDL", "fastdl", create_downloader)
|
|
|
|
if __name__ == '__main__':
|
|
run_wrapper_main(run_download)
|
|
"""
|
|
# Setup signal handlers at wrapper creation time
|
|
setup_signal_handlers(module_name)
|
|
|
|
def run_download(config: dict) -> dict:
|
|
"""Run download in isolated subprocess."""
|
|
from modules.unified_database import UnifiedDatabase
|
|
from modules.monitor_wrapper import log_download_result
|
|
|
|
# Validate required config parameters
|
|
required_keys = ['username', 'content_type', 'temp_dir']
|
|
missing_keys = [key for key in required_keys if key not in config]
|
|
if missing_keys:
|
|
return {'status': 'error', 'message': f'Missing required config keys: {missing_keys}', 'count': 0}
|
|
|
|
# Create unified database
|
|
db_path = config.get('db_path', '/opt/media-downloader/database/media_downloader.db')
|
|
unified_db = UnifiedDatabase(db_path, use_pool=False)
|
|
set_database_reference(unified_db) # Set for signal handler cleanup
|
|
|
|
try:
|
|
# Create downloader instance using the factory
|
|
downloader = downloader_factory(config, unified_db)
|
|
|
|
username = config['username']
|
|
content_type = config['content_type']
|
|
temp_dir = Path(config['temp_dir'])
|
|
|
|
# Log that we're starting this user (for scraping monitor)
|
|
stderr_log(f"Processing {module_name} {content_type} for @{username}", "info")
|
|
|
|
# Create directories
|
|
temp_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Download content with deferred database recording
|
|
try:
|
|
count = downloader.download(
|
|
username=username,
|
|
content_type=content_type,
|
|
days_back=config.get('days_back', 3),
|
|
max_downloads=config.get('max_downloads', 50),
|
|
output_dir=str(temp_dir),
|
|
phrase_config=config.get('phrase_config'),
|
|
defer_database=True # Defer recording until after file move
|
|
)
|
|
|
|
# Get pending downloads for main script to record after move
|
|
pending_downloads = downloader.get_pending_downloads()
|
|
|
|
# Log to monitor
|
|
log_download_result(scraper_id, username, count or 0, error=None)
|
|
|
|
result = {
|
|
'status': 'success',
|
|
'count': count or 0,
|
|
'pending_downloads': pending_downloads
|
|
}
|
|
|
|
# Propagate auth failure flag if the downloader supports it
|
|
if getattr(downloader, 'auth_failed', False):
|
|
result['auth_failed'] = True
|
|
if getattr(downloader, 'user_id_failed', False):
|
|
result['user_id_failed'] = True
|
|
if getattr(downloader, 'invalid_owner', False):
|
|
result['invalid_owner'] = True
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
stderr_log(f"{module_name} download error: {e}", "error")
|
|
import traceback
|
|
stderr_log(traceback.format_exc(), "error")
|
|
|
|
# Log failure to monitor
|
|
log_download_result(scraper_id, username, 0, error=str(e))
|
|
|
|
return {
|
|
'status': 'error',
|
|
'message': str(e),
|
|
'count': 0
|
|
}
|
|
finally:
|
|
# Explicitly close database connection before subprocess exits
|
|
if unified_db:
|
|
try:
|
|
unified_db.close()
|
|
stderr_log("Database connection closed", "debug")
|
|
except Exception as e:
|
|
stderr_log(f"Error closing database: {e}", "warning")
|
|
|
|
return run_download
|
|
|
|
|
|
def create_download_wrapper_multi(module_name: str, scraper_id: str, downloader_factory: callable):
|
|
"""
|
|
Factory function to create multi-content-type download wrapper functions.
|
|
|
|
Similar to create_download_wrapper but handles multiple content types
|
|
in a single browser session.
|
|
|
|
Args:
|
|
module_name: Display name for logging (e.g., "FastDL")
|
|
scraper_id: Identifier for the scraper (e.g., "fastdl")
|
|
downloader_factory: Callable that takes (config, unified_db) and returns a downloader instance
|
|
|
|
Returns:
|
|
A run_download_multi function that can be used as the main entry point
|
|
"""
|
|
# Setup signal handlers at wrapper creation time
|
|
setup_signal_handlers(module_name)
|
|
|
|
def run_download_multi(config: dict) -> dict:
|
|
"""Run multi-content-type download in isolated subprocess."""
|
|
from modules.unified_database import UnifiedDatabase
|
|
from modules.monitor_wrapper import log_download_result
|
|
|
|
# Validate required config parameters
|
|
required_keys = ['username', 'content_types', 'output_dirs']
|
|
missing_keys = [key for key in required_keys if key not in config]
|
|
if missing_keys:
|
|
return {'status': 'error', 'message': f'Missing required config keys: {missing_keys}', 'results': {}}
|
|
|
|
# Create unified database
|
|
db_path = config.get('db_path', '/opt/media-downloader/database/media_downloader.db')
|
|
unified_db = UnifiedDatabase(db_path, use_pool=False)
|
|
set_database_reference(unified_db)
|
|
|
|
try:
|
|
# Create downloader instance using the factory
|
|
downloader = downloader_factory(config, unified_db)
|
|
|
|
username = config['username']
|
|
content_types = config['content_types']
|
|
output_dirs = config['output_dirs']
|
|
|
|
stderr_log(f"Processing {module_name} multi ({', '.join(content_types)}) for @{username}", "info")
|
|
|
|
# Create temp directories
|
|
for ct, dir_path in output_dirs.items():
|
|
Path(dir_path).mkdir(parents=True, exist_ok=True)
|
|
|
|
try:
|
|
results = downloader.download_multi(
|
|
username=username,
|
|
content_types=content_types,
|
|
output_dirs=output_dirs,
|
|
days_back=config.get('days_back', 3),
|
|
max_downloads=config.get('max_downloads', 50),
|
|
phrase_configs=config.get('phrase_configs'),
|
|
defer_database=True
|
|
)
|
|
|
|
# Log total to monitor
|
|
total_count = sum(r.get('count', 0) for r in results.values())
|
|
log_download_result(scraper_id, username, total_count, error=None)
|
|
|
|
return {
|
|
'status': 'success',
|
|
'results': results
|
|
}
|
|
|
|
except Exception as e:
|
|
stderr_log(f"{module_name} multi-download error: {e}", "error")
|
|
import traceback
|
|
stderr_log(traceback.format_exc(), "error")
|
|
|
|
log_download_result(scraper_id, username, 0, error=str(e))
|
|
|
|
return {
|
|
'status': 'error',
|
|
'message': str(e),
|
|
'results': {}
|
|
}
|
|
finally:
|
|
if unified_db:
|
|
try:
|
|
unified_db.close()
|
|
stderr_log("Database connection closed", "debug")
|
|
except Exception as e:
|
|
stderr_log(f"Error closing database: {e}", "warning")
|
|
|
|
return run_download_multi
|
|
|
|
|
|
def run_wrapper_main(run_download_func: callable):
|
|
"""
|
|
Standard main entry point for subprocess wrappers.
|
|
|
|
Args:
|
|
run_download_func: The download function created by create_download_wrapper
|
|
"""
|
|
import json
|
|
|
|
# Read config from stdin
|
|
config_json = sys.stdin.read()
|
|
config = json.loads(config_json)
|
|
|
|
# Run download
|
|
result = run_download_func(config)
|
|
|
|
# Output result as JSON
|
|
print(json.dumps(result))
|
|
sys.exit(0 if result['status'] == 'success' else 1)
|