358
wrappers/base_subprocess_wrapper.py
Normal file
358
wrappers/base_subprocess_wrapper.py
Normal file
@@ -0,0 +1,358 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Base Subprocess Wrapper
|
||||
|
||||
Provides common functionality for all subprocess wrappers:
|
||||
- Signal handling for graceful cleanup
|
||||
- Stderr logging
|
||||
- Database connection management
|
||||
|
||||
Usage:
|
||||
from wrappers.base_subprocess_wrapper import (
|
||||
setup_signal_handlers,
|
||||
stderr_log,
|
||||
set_database_reference,
|
||||
get_database_reference
|
||||
)
|
||||
"""
|
||||
|
||||
import sys
|
||||
import signal
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path for module imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
# Bootstrap database backend (patches sqlite3 → pg_adapter if DATABASE_BACKEND=postgresql)
|
||||
import modules.db_bootstrap # noqa: F401, E402
|
||||
|
||||
# Global database reference for cleanup in signal handler
|
||||
_unified_db = None
|
||||
_module_name = "Subprocess"
|
||||
|
||||
|
||||
def set_module_name(name: str):
|
||||
"""Set the module name used in log messages."""
|
||||
global _module_name
|
||||
_module_name = name
|
||||
|
||||
|
||||
def set_database_reference(db):
|
||||
"""Set the global database reference for signal handler cleanup."""
|
||||
global _unified_db
|
||||
_unified_db = db
|
||||
|
||||
|
||||
def get_database_reference():
|
||||
"""Get the current database reference."""
|
||||
return _unified_db
|
||||
|
||||
|
||||
def _signal_handler(signum, frame):
|
||||
"""Handle termination signals gracefully with database cleanup."""
|
||||
global _unified_db, _module_name
|
||||
signal_name = signal.Signals(signum).name
|
||||
print(f"[{_module_name}] [WARNING] Received {signal_name}, cleaning up...", file=sys.stderr, flush=True)
|
||||
if _unified_db:
|
||||
try:
|
||||
_unified_db.close()
|
||||
print(f"[{_module_name}] [DEBUG] Database connection closed on signal", file=sys.stderr, flush=True)
|
||||
except Exception as e:
|
||||
print(f"[{_module_name}] [WARNING] Error closing database on signal: {e}", file=sys.stderr, flush=True)
|
||||
sys.exit(128 + signum)
|
||||
|
||||
|
||||
def setup_signal_handlers(module_name: str = None):
|
||||
"""
|
||||
Register signal handlers for graceful termination.
|
||||
|
||||
Args:
|
||||
module_name: Name to use in log messages (optional)
|
||||
"""
|
||||
if module_name:
|
||||
set_module_name(module_name)
|
||||
signal.signal(signal.SIGTERM, _signal_handler)
|
||||
signal.signal(signal.SIGINT, _signal_handler)
|
||||
|
||||
|
||||
def stderr_log(tag_or_message, level_or_tag="info", message=None):
|
||||
"""
|
||||
Log to stderr to avoid polluting stdout JSON.
|
||||
|
||||
Supports multiple call signatures:
|
||||
- stderr_log("message") -> Logs message as-is
|
||||
- stderr_log("message", "error") -> Logs message as-is
|
||||
- stderr_log("Tag", "info", "message") -> Logs as "[Tag] [INFO] message"
|
||||
|
||||
Args:
|
||||
tag_or_message: Either the tag or the complete message
|
||||
level_or_tag: Either the level or the tag (when 3 args)
|
||||
message: The actual message (when 3 args)
|
||||
"""
|
||||
if message is not None:
|
||||
# 3 arguments: (tag, level, message)
|
||||
tag = tag_or_message
|
||||
level = level_or_tag
|
||||
log_msg = f"[{tag}] [{level.upper()}] {message}"
|
||||
else:
|
||||
# 2 or 1 argument: message only
|
||||
log_msg = tag_or_message
|
||||
print(log_msg, file=sys.stderr, flush=True)
|
||||
|
||||
|
||||
def create_json_result(success: bool, data: dict = None, error: str = None) -> dict:
|
||||
"""
|
||||
Create a standardized JSON result for subprocess output.
|
||||
|
||||
Args:
|
||||
success: Whether the operation succeeded
|
||||
data: Additional data to include in result
|
||||
error: Error message if operation failed
|
||||
|
||||
Returns:
|
||||
Dictionary suitable for JSON serialization
|
||||
"""
|
||||
result = {"success": success}
|
||||
if data:
|
||||
result.update(data)
|
||||
if error:
|
||||
result["error"] = error
|
||||
return result
|
||||
|
||||
|
||||
def create_download_wrapper(module_name: str, scraper_id: str, downloader_factory: callable):
|
||||
"""
|
||||
Factory function to create standardized download wrapper functions.
|
||||
|
||||
This eliminates code duplication across the individual wrapper files by
|
||||
providing a common implementation pattern.
|
||||
|
||||
Args:
|
||||
module_name: Display name for logging (e.g., "FastDL", "ImgInn")
|
||||
scraper_id: Identifier for the scraper (e.g., "fastdl", "imginn")
|
||||
downloader_factory: Callable that takes (config, unified_db) and returns a downloader instance
|
||||
|
||||
Returns:
|
||||
A run_download function that can be used as the main entry point
|
||||
|
||||
Example usage in a wrapper file:
|
||||
from wrappers.base_subprocess_wrapper import create_download_wrapper, run_wrapper_main
|
||||
|
||||
def create_downloader(config, unified_db):
|
||||
from modules.fastdl_module import FastDLDownloader
|
||||
return FastDLDownloader(
|
||||
headless=config.get('headless', True),
|
||||
show_progress=True,
|
||||
use_database=True,
|
||||
log_callback=None,
|
||||
unified_db=unified_db,
|
||||
high_res=config.get('high_res', False)
|
||||
)
|
||||
|
||||
run_download = create_download_wrapper("FastDL", "fastdl", create_downloader)
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_wrapper_main(run_download)
|
||||
"""
|
||||
# Setup signal handlers at wrapper creation time
|
||||
setup_signal_handlers(module_name)
|
||||
|
||||
def run_download(config: dict) -> dict:
|
||||
"""Run download in isolated subprocess."""
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
from modules.monitor_wrapper import log_download_result
|
||||
|
||||
# Validate required config parameters
|
||||
required_keys = ['username', 'content_type', 'temp_dir']
|
||||
missing_keys = [key for key in required_keys if key not in config]
|
||||
if missing_keys:
|
||||
return {'status': 'error', 'message': f'Missing required config keys: {missing_keys}', 'count': 0}
|
||||
|
||||
# Create unified database
|
||||
db_path = config.get('db_path', '/opt/media-downloader/database/media_downloader.db')
|
||||
unified_db = UnifiedDatabase(db_path, use_pool=False)
|
||||
set_database_reference(unified_db) # Set for signal handler cleanup
|
||||
|
||||
try:
|
||||
# Create downloader instance using the factory
|
||||
downloader = downloader_factory(config, unified_db)
|
||||
|
||||
username = config['username']
|
||||
content_type = config['content_type']
|
||||
temp_dir = Path(config['temp_dir'])
|
||||
|
||||
# Log that we're starting this user (for scraping monitor)
|
||||
stderr_log(f"Processing {module_name} {content_type} for @{username}", "info")
|
||||
|
||||
# Create directories
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Download content with deferred database recording
|
||||
try:
|
||||
count = downloader.download(
|
||||
username=username,
|
||||
content_type=content_type,
|
||||
days_back=config.get('days_back', 3),
|
||||
max_downloads=config.get('max_downloads', 50),
|
||||
output_dir=str(temp_dir),
|
||||
phrase_config=config.get('phrase_config'),
|
||||
defer_database=True # Defer recording until after file move
|
||||
)
|
||||
|
||||
# Get pending downloads for main script to record after move
|
||||
pending_downloads = downloader.get_pending_downloads()
|
||||
|
||||
# Log to monitor
|
||||
log_download_result(scraper_id, username, count or 0, error=None)
|
||||
|
||||
result = {
|
||||
'status': 'success',
|
||||
'count': count or 0,
|
||||
'pending_downloads': pending_downloads
|
||||
}
|
||||
|
||||
# Propagate auth failure flag if the downloader supports it
|
||||
if getattr(downloader, 'auth_failed', False):
|
||||
result['auth_failed'] = True
|
||||
if getattr(downloader, 'user_id_failed', False):
|
||||
result['user_id_failed'] = True
|
||||
if getattr(downloader, 'invalid_owner', False):
|
||||
result['invalid_owner'] = True
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
stderr_log(f"{module_name} download error: {e}", "error")
|
||||
import traceback
|
||||
stderr_log(traceback.format_exc(), "error")
|
||||
|
||||
# Log failure to monitor
|
||||
log_download_result(scraper_id, username, 0, error=str(e))
|
||||
|
||||
return {
|
||||
'status': 'error',
|
||||
'message': str(e),
|
||||
'count': 0
|
||||
}
|
||||
finally:
|
||||
# Explicitly close database connection before subprocess exits
|
||||
if unified_db:
|
||||
try:
|
||||
unified_db.close()
|
||||
stderr_log("Database connection closed", "debug")
|
||||
except Exception as e:
|
||||
stderr_log(f"Error closing database: {e}", "warning")
|
||||
|
||||
return run_download
|
||||
|
||||
|
||||
def create_download_wrapper_multi(module_name: str, scraper_id: str, downloader_factory: callable):
|
||||
"""
|
||||
Factory function to create multi-content-type download wrapper functions.
|
||||
|
||||
Similar to create_download_wrapper but handles multiple content types
|
||||
in a single browser session.
|
||||
|
||||
Args:
|
||||
module_name: Display name for logging (e.g., "FastDL")
|
||||
scraper_id: Identifier for the scraper (e.g., "fastdl")
|
||||
downloader_factory: Callable that takes (config, unified_db) and returns a downloader instance
|
||||
|
||||
Returns:
|
||||
A run_download_multi function that can be used as the main entry point
|
||||
"""
|
||||
# Setup signal handlers at wrapper creation time
|
||||
setup_signal_handlers(module_name)
|
||||
|
||||
def run_download_multi(config: dict) -> dict:
|
||||
"""Run multi-content-type download in isolated subprocess."""
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
from modules.monitor_wrapper import log_download_result
|
||||
|
||||
# Validate required config parameters
|
||||
required_keys = ['username', 'content_types', 'output_dirs']
|
||||
missing_keys = [key for key in required_keys if key not in config]
|
||||
if missing_keys:
|
||||
return {'status': 'error', 'message': f'Missing required config keys: {missing_keys}', 'results': {}}
|
||||
|
||||
# Create unified database
|
||||
db_path = config.get('db_path', '/opt/media-downloader/database/media_downloader.db')
|
||||
unified_db = UnifiedDatabase(db_path, use_pool=False)
|
||||
set_database_reference(unified_db)
|
||||
|
||||
try:
|
||||
# Create downloader instance using the factory
|
||||
downloader = downloader_factory(config, unified_db)
|
||||
|
||||
username = config['username']
|
||||
content_types = config['content_types']
|
||||
output_dirs = config['output_dirs']
|
||||
|
||||
stderr_log(f"Processing {module_name} multi ({', '.join(content_types)}) for @{username}", "info")
|
||||
|
||||
# Create temp directories
|
||||
for ct, dir_path in output_dirs.items():
|
||||
Path(dir_path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
results = downloader.download_multi(
|
||||
username=username,
|
||||
content_types=content_types,
|
||||
output_dirs=output_dirs,
|
||||
days_back=config.get('days_back', 3),
|
||||
max_downloads=config.get('max_downloads', 50),
|
||||
phrase_configs=config.get('phrase_configs'),
|
||||
defer_database=True
|
||||
)
|
||||
|
||||
# Log total to monitor
|
||||
total_count = sum(r.get('count', 0) for r in results.values())
|
||||
log_download_result(scraper_id, username, total_count, error=None)
|
||||
|
||||
return {
|
||||
'status': 'success',
|
||||
'results': results
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
stderr_log(f"{module_name} multi-download error: {e}", "error")
|
||||
import traceback
|
||||
stderr_log(traceback.format_exc(), "error")
|
||||
|
||||
log_download_result(scraper_id, username, 0, error=str(e))
|
||||
|
||||
return {
|
||||
'status': 'error',
|
||||
'message': str(e),
|
||||
'results': {}
|
||||
}
|
||||
finally:
|
||||
if unified_db:
|
||||
try:
|
||||
unified_db.close()
|
||||
stderr_log("Database connection closed", "debug")
|
||||
except Exception as e:
|
||||
stderr_log(f"Error closing database: {e}", "warning")
|
||||
|
||||
return run_download_multi
|
||||
|
||||
|
||||
def run_wrapper_main(run_download_func: callable):
|
||||
"""
|
||||
Standard main entry point for subprocess wrappers.
|
||||
|
||||
Args:
|
||||
run_download_func: The download function created by create_download_wrapper
|
||||
"""
|
||||
import json
|
||||
|
||||
# Read config from stdin
|
||||
config_json = sys.stdin.read()
|
||||
config = json.loads(config_json)
|
||||
|
||||
# Run download
|
||||
result = run_download_func(config)
|
||||
|
||||
# Output result as JSON
|
||||
print(json.dumps(result))
|
||||
sys.exit(0 if result['status'] == 'success' else 1)
|
||||
52
wrappers/fastdl_subprocess_wrapper.py
Normal file
52
wrappers/fastdl_subprocess_wrapper.py
Normal file
@@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Subprocess wrapper for FastDL operations to avoid asyncio event loop conflicts.
|
||||
This runs FastDL operations in a completely isolated subprocess.
|
||||
|
||||
Supports both single content_type (backward compatible) and multi content_types mode.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from wrappers.base_subprocess_wrapper import create_download_wrapper, create_download_wrapper_multi
|
||||
|
||||
|
||||
def create_fastdl_downloader(config, unified_db):
|
||||
"""Factory function to create a FastDL downloader instance."""
|
||||
from modules.fastdl_module import FastDLDownloader
|
||||
|
||||
# When defer_database is set (paid content mode), skip FastDL's own dedup
|
||||
# checks — the caller handles dedup via its own known_ids filtering.
|
||||
use_db = not config.get('defer_database', False)
|
||||
|
||||
return FastDLDownloader(
|
||||
headless=config.get('headless', True),
|
||||
show_progress=True,
|
||||
use_database=use_db,
|
||||
log_callback=None, # Module uses universal logger, no callback needed
|
||||
unified_db=unified_db,
|
||||
high_res=config.get('high_res', False)
|
||||
)
|
||||
|
||||
|
||||
# Create download functions using the factories
|
||||
run_fastdl_download = create_download_wrapper("FastDL", "fastdl", create_fastdl_downloader)
|
||||
run_fastdl_download_multi = create_download_wrapper_multi("FastDL", "fastdl", create_fastdl_downloader)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
config_json = sys.stdin.read()
|
||||
config = json.loads(config_json)
|
||||
|
||||
if 'content_types' in config:
|
||||
result = run_fastdl_download_multi(config)
|
||||
else:
|
||||
result = run_fastdl_download(config)
|
||||
|
||||
print(json.dumps(result))
|
||||
sys.exit(0 if result.get('status') == 'success' else 1)
|
||||
306
wrappers/forum_subprocess_wrapper.py
Normal file
306
wrappers/forum_subprocess_wrapper.py
Normal file
@@ -0,0 +1,306 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Subprocess wrapper for forum operations to avoid asyncio event loop conflicts
|
||||
This runs forum operations in a completely isolated subprocess
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from wrappers.base_subprocess_wrapper import (
|
||||
setup_signal_handlers,
|
||||
stderr_log,
|
||||
set_database_reference
|
||||
)
|
||||
|
||||
# Setup signal handlers early for graceful termination
|
||||
setup_signal_handlers("Forums")
|
||||
|
||||
def run_forum_download(config):
|
||||
"""Run forum download in isolated subprocess"""
|
||||
# Redirect stdout to stderr to prevent library output from polluting JSON result
|
||||
# (InsightFace/ONNX prints status messages to stdout)
|
||||
original_stdout = sys.stdout
|
||||
sys.stdout = sys.stderr
|
||||
|
||||
from modules.forum_downloader import ForumDownloader
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
from modules.forum_db_adapter import ForumDatabaseAdapter
|
||||
from modules.move_module import MoveManager
|
||||
from modules.monitor_wrapper import log_download_result
|
||||
|
||||
# Create unified database (use db_path from config like other wrappers)
|
||||
db_path = config.get('db_path', '/opt/media-downloader/database/media_downloader.db')
|
||||
unified_db = UnifiedDatabase(db_path, use_pool=False)
|
||||
set_database_reference(unified_db) # For graceful cleanup on SIGTERM
|
||||
|
||||
# Track all moved files across threads for notification
|
||||
all_final_files = []
|
||||
all_review_files = []
|
||||
|
||||
try:
|
||||
# Validate required config parameters
|
||||
required_keys = ['download_dir', 'forum_name', 'forum_config', 'temp_dir', 'dest_dir']
|
||||
missing_keys = [key for key in required_keys if key not in config]
|
||||
if missing_keys:
|
||||
return {'status': 'error', 'message': f'Missing required config keys: {missing_keys}', 'count': 0}
|
||||
forum_db_adapter = ForumDatabaseAdapter(unified_db)
|
||||
|
||||
# Create ForumDownloader - uses its own universal logger now
|
||||
forum_module = ForumDownloader(
|
||||
headless=config.get('headless', True),
|
||||
show_progress=True,
|
||||
use_database=True,
|
||||
db_path=forum_db_adapter,
|
||||
download_dir=config['download_dir'],
|
||||
log_callback=None # Module uses universal logger
|
||||
)
|
||||
|
||||
# Create MoveManager - uses its own universal logger now
|
||||
move_manager = MoveManager(
|
||||
log_callback=None, # Module uses universal logger
|
||||
notifier=None, # Notifications handled by main process
|
||||
unified_db=unified_db,
|
||||
face_recognition_enabled=True # Enable face recognition for forum files
|
||||
)
|
||||
|
||||
forum_name = config['forum_name']
|
||||
forum_config = config['forum_config']
|
||||
temp_dir = Path(config['temp_dir'])
|
||||
dest_dir = Path(config['dest_dir'])
|
||||
|
||||
# Create directories
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Login if credentials provided
|
||||
if forum_config.get('username') and forum_config.get('password'):
|
||||
success = forum_module.login(
|
||||
forum_name=forum_name,
|
||||
username=forum_config['username'],
|
||||
password=forum_config['password'],
|
||||
forum_url=forum_config.get('forum_url'),
|
||||
forum_type=forum_config.get('forum_type'),
|
||||
cloudflare_enabled=forum_config.get('cloudflare_enabled', False)
|
||||
)
|
||||
if not success:
|
||||
return {'status': 'error', 'message': 'Login failed', 'count': 0}
|
||||
|
||||
# Use monitor_search to find threads
|
||||
search_result = forum_module.monitor_search(
|
||||
forum_name=forum_name,
|
||||
search_query=forum_config.get('search_query', ''),
|
||||
search_url=forum_config.get('search_url'),
|
||||
forum_url=forum_config.get('forum_url'),
|
||||
auto_track_days=forum_config.get('auto_track_days', 30),
|
||||
base_download_path=str(temp_dir),
|
||||
destination_path=str(dest_dir),
|
||||
username=forum_config.get('username'),
|
||||
password=forum_config.get('password'),
|
||||
newer_than_days=forum_config.get('newer_than_days', 3),
|
||||
older_than_days=forum_config.get('older_than_days'),
|
||||
external_only=forum_config.get('external_only', True),
|
||||
check_frequency_hours=0,
|
||||
cloudflare_enabled=forum_config.get('cloudflare_enabled', False)
|
||||
)
|
||||
|
||||
# Download the threads that were found
|
||||
total_images = 0
|
||||
new_threads = search_result.get('new_threads', 0)
|
||||
total_results = search_result.get('total_results', 0)
|
||||
|
||||
if search_result.get('status') == 'success' and search_result.get('results'):
|
||||
stderr_log(f"Downloading {len(search_result['results'])} new threads...")
|
||||
|
||||
for thread_result in search_result['results']:
|
||||
thread_url = thread_result.get('url')
|
||||
if thread_url:
|
||||
# Download thread in a separate thread to avoid Playwright async conflict
|
||||
# Playwright sync API doesn't work inside asyncio loops
|
||||
import concurrent.futures
|
||||
|
||||
def _download_in_thread():
|
||||
return forum_module.download_thread(
|
||||
thread_url=thread_url,
|
||||
forum_name=forum_name,
|
||||
download_images=True,
|
||||
base_download_path=str(temp_dir),
|
||||
destination_path=str(dest_dir),
|
||||
username=forum_config.get('username'),
|
||||
password=forum_config.get('password'),
|
||||
external_only=forum_config.get('external_only', True),
|
||||
skip_file_move=True, # Keep files in temp for move_manager
|
||||
cloudflare_enabled=forum_config.get('cloudflare_enabled', False),
|
||||
defer_database=True, # Defer recording until after file move
|
||||
auto_track_days=forum_config.get('auto_track_days', 30)
|
||||
)
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
||||
future = executor.submit(_download_in_thread)
|
||||
download_result = future.result()
|
||||
|
||||
if download_result.get('status') == 'success':
|
||||
images_downloaded = download_result.get('images_downloaded', 0)
|
||||
|
||||
# Use move_manager to process files (with face recognition)
|
||||
if images_downloaded > 0:
|
||||
thread_temp_dir = download_result.get('thread_dir')
|
||||
final_dir = download_result.get('final_dir')
|
||||
|
||||
if thread_temp_dir and final_dir:
|
||||
stderr_log(f"Processing {images_downloaded} images with face recognition...")
|
||||
|
||||
# Start batch context for proper database tracking
|
||||
thread_title = thread_result.get('title', 'Unknown Thread')
|
||||
move_manager.start_batch(
|
||||
platform='forums',
|
||||
source=forum_name,
|
||||
content_type='image',
|
||||
search_term=thread_title
|
||||
)
|
||||
|
||||
# Move files with face recognition filtering
|
||||
# preserve_if_no_timestamp=True preserves the timestamps already set on files
|
||||
stats = move_manager.move_files_batch(
|
||||
source_dir=thread_temp_dir,
|
||||
dest_dir=final_dir,
|
||||
file_timestamps=None,
|
||||
extensions=['.jpg', '.jpeg', '.png', '.gif', '.webp', '.mp4', '.webm'],
|
||||
preserve_if_no_timestamp=True
|
||||
)
|
||||
|
||||
# Capture file lists BEFORE end_batch() clears them
|
||||
# These will be returned for notification with correct paths
|
||||
for f in move_manager.moved_files:
|
||||
all_final_files.append({
|
||||
'file_path': f.get('file_path'),
|
||||
'filename': f.get('filename'),
|
||||
'content_type': f.get('content_type') or 'image'
|
||||
})
|
||||
for f in move_manager.review_queue_files:
|
||||
all_review_files.append({
|
||||
'file_path': f.get('file_path'),
|
||||
'filename': f.get('filename'),
|
||||
'content_type': f.get('content_type') or 'image'
|
||||
})
|
||||
|
||||
# End batch to finalize database records
|
||||
move_manager.end_batch()
|
||||
|
||||
# Record pending downloads after successful move
|
||||
# This ensures database records only exist for files that made it to final destination
|
||||
pending_downloads = forum_module.get_pending_downloads()
|
||||
if pending_downloads and stats.get('moved', 0) > 0:
|
||||
for download in pending_downloads:
|
||||
try:
|
||||
# Update file_path to final destination
|
||||
filename = download.get('filename')
|
||||
if filename:
|
||||
# Sanitize filename to prevent path traversal
|
||||
safe_filename = Path(filename).name # Strip any directory components
|
||||
actual_path = Path(final_dir) / safe_filename
|
||||
# Validate path is within final_dir (defense in depth)
|
||||
try:
|
||||
actual_path.resolve().relative_to(Path(final_dir).resolve())
|
||||
except ValueError:
|
||||
stderr_log(f"Path traversal attempt blocked: {filename}", "warning")
|
||||
continue
|
||||
if actual_path.exists():
|
||||
forum_db_adapter.record_download(
|
||||
url=download.get('url'),
|
||||
thread_id=download.get('thread_id'),
|
||||
post_id=download.get('post_id'),
|
||||
filename=safe_filename,
|
||||
metadata=download.get('metadata'),
|
||||
file_path=str(actual_path),
|
||||
post_date=download.get('post_date')
|
||||
)
|
||||
except Exception as e:
|
||||
stderr_log(f"Failed to record pending download: {e}")
|
||||
forum_module.clear_pending_downloads()
|
||||
stderr_log(f"Recorded {len(pending_downloads)} downloads to database after move")
|
||||
|
||||
# Update total with files that went to FINAL destination only (not review queue)
|
||||
# stats['moved'] includes ALL moves, stats['review_queue'] tracks review queue moves
|
||||
actual_final_count = stats.get('moved', 0) - stats.get('review_queue', 0)
|
||||
total_images += actual_final_count
|
||||
stderr_log(f"Moved {actual_final_count} images to final destination, {stats.get('review_queue', 0)} to review queue")
|
||||
else:
|
||||
# Fallback if dirs not provided
|
||||
total_images += images_downloaded
|
||||
stderr_log(f"Downloaded {images_downloaded} images from thread: {thread_result.get('title', 'Unknown')[:60]}...")
|
||||
else:
|
||||
stderr_log(f"No new images in thread: {thread_result.get('title', 'Unknown')[:60]}...")
|
||||
else:
|
||||
stderr_log(f"Failed to download thread: {thread_result.get('title', 'Unknown')}")
|
||||
|
||||
# Cleanup
|
||||
forum_module.cleanup()
|
||||
|
||||
# Log to monitor (success)
|
||||
forum_name = config['forum_name']
|
||||
log_download_result('forums', forum_name, total_images, error=None)
|
||||
|
||||
# Return result with file lists for notification
|
||||
return {
|
||||
'status': 'success',
|
||||
'count': total_images,
|
||||
'new_threads': new_threads,
|
||||
'total_results': total_results,
|
||||
'final_files': all_final_files, # Files that went to final destination
|
||||
'review_files': all_review_files # Files that went to review queue
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
forum_name = config.get('forum_name', 'unknown')
|
||||
stderr_log(f"Forum download error: {e}", "error")
|
||||
import traceback
|
||||
stderr_log(traceback.format_exc(), "error")
|
||||
|
||||
# Log failure to monitor
|
||||
log_download_result('forums', forum_name, 0, error=str(e))
|
||||
return {
|
||||
'status': 'error',
|
||||
'message': str(e),
|
||||
'count': 0
|
||||
}
|
||||
finally:
|
||||
# Restore stdout before returning (so JSON output goes to real stdout)
|
||||
sys.stdout = original_stdout
|
||||
|
||||
# Explicitly close database connection before subprocess exits
|
||||
if unified_db:
|
||||
try:
|
||||
unified_db.close()
|
||||
stderr_log("Database connection closed")
|
||||
except Exception as e:
|
||||
stderr_log(f"Error closing database: {e}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
# Read config from stdin
|
||||
config_json = sys.stdin.read()
|
||||
config = json.loads(config_json)
|
||||
|
||||
# Run forum download
|
||||
result = run_forum_download(config)
|
||||
except Exception as e:
|
||||
# Ensure we always output valid JSON even on early errors
|
||||
import traceback
|
||||
stderr_log(f"Fatal subprocess error: {e}")
|
||||
stderr_log(traceback.format_exc())
|
||||
result = {
|
||||
'status': 'error',
|
||||
'message': f'Subprocess error: {str(e)}',
|
||||
'count': 0
|
||||
}
|
||||
|
||||
# Output result as JSON - ensure flushing before exit
|
||||
stderr_log(f"Subprocess complete, outputting JSON result: status={result.get('status')}, count={result.get('count')}")
|
||||
print(json.dumps(result), flush=True)
|
||||
sys.stdout.flush()
|
||||
sys.exit(0 if result['status'] == 'success' else 1)
|
||||
138
wrappers/imginn_api_subprocess_wrapper.py
Normal file
138
wrappers/imginn_api_subprocess_wrapper.py
Normal file
@@ -0,0 +1,138 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Subprocess wrapper for ImgInn API-based operations.
|
||||
Uses the API-based module instead of DOM scraping.
|
||||
|
||||
Supports both single content_type (backward compatible) and multi content_types mode.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from wrappers.base_subprocess_wrapper import (
|
||||
create_download_wrapper, setup_signal_handlers, set_database_reference,
|
||||
stderr_log
|
||||
)
|
||||
|
||||
|
||||
def create_imginn_api_downloader(config, unified_db):
|
||||
"""Factory function to create an ImgInn API downloader instance."""
|
||||
from modules.imginn_api_module import ImgInnAPIDownloader
|
||||
|
||||
return ImgInnAPIDownloader(
|
||||
headless=config.get('headless', True),
|
||||
show_progress=True,
|
||||
use_database=True,
|
||||
log_callback=None,
|
||||
unified_db=unified_db
|
||||
)
|
||||
|
||||
|
||||
# Create the single-content download function using the factory
|
||||
run_imginn_api_download = create_download_wrapper("ImgInnAPI", "imginn_api", create_imginn_api_downloader)
|
||||
|
||||
|
||||
def run_imginn_api_download_multi(config: dict) -> dict:
|
||||
"""Run multi-content-type download by calling download() for each content type."""
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
from modules.monitor_wrapper import log_download_result
|
||||
|
||||
setup_signal_handlers("ImgInnAPI")
|
||||
|
||||
required_keys = ['username', 'content_types', 'output_dirs']
|
||||
missing_keys = [key for key in required_keys if key not in config]
|
||||
if missing_keys:
|
||||
return {'status': 'error', 'message': f'Missing required config keys: {missing_keys}', 'results': {}}
|
||||
|
||||
db_path = config.get('db_path', '/opt/media-downloader/database/media_downloader.db')
|
||||
unified_db = UnifiedDatabase(db_path, use_pool=False)
|
||||
set_database_reference(unified_db)
|
||||
|
||||
try:
|
||||
downloader = create_imginn_api_downloader(config, unified_db)
|
||||
|
||||
username = config['username']
|
||||
content_types = config['content_types']
|
||||
output_dirs = config['output_dirs']
|
||||
|
||||
stderr_log(f"Processing Instagram multi ({', '.join(content_types)}) for @{username}", "info")
|
||||
|
||||
# Create temp directories
|
||||
for ct, dir_path in output_dirs.items():
|
||||
Path(dir_path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
results = {}
|
||||
total_count = 0
|
||||
|
||||
for ct in content_types:
|
||||
output_dir = output_dirs.get(ct)
|
||||
if not output_dir:
|
||||
results[ct] = {'count': 0, 'pending_downloads': []}
|
||||
continue
|
||||
|
||||
try:
|
||||
count = downloader.download(
|
||||
username=username,
|
||||
content_type=ct,
|
||||
days_back=config.get('days_back', 3),
|
||||
max_downloads=config.get('max_downloads', 50),
|
||||
output_dir=output_dir,
|
||||
phrase_config=config.get('phrase_config'),
|
||||
defer_database=True
|
||||
)
|
||||
|
||||
pending_downloads = downloader.get_pending_downloads()
|
||||
downloader.clear_pending_downloads()
|
||||
|
||||
results[ct] = {
|
||||
'count': count or 0,
|
||||
'pending_downloads': pending_downloads
|
||||
}
|
||||
total_count += count or 0
|
||||
|
||||
except Exception as e:
|
||||
stderr_log(f"ImgInn API download error for {ct}: {e}", "error")
|
||||
import traceback
|
||||
stderr_log(traceback.format_exc(), "error")
|
||||
results[ct] = {'count': 0, 'pending_downloads': []}
|
||||
|
||||
log_download_result("imginn_api", username, total_count, error=None)
|
||||
|
||||
return {
|
||||
'status': 'success',
|
||||
'results': results
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
stderr_log(f"ImgInn API multi-download error: {e}", "error")
|
||||
import traceback
|
||||
stderr_log(traceback.format_exc(), "error")
|
||||
log_download_result("imginn_api", config.get('username', ''), 0, error=str(e))
|
||||
return {
|
||||
'status': 'error',
|
||||
'message': str(e),
|
||||
'results': {}
|
||||
}
|
||||
finally:
|
||||
if unified_db:
|
||||
try:
|
||||
unified_db.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
config_json = sys.stdin.read()
|
||||
config = json.loads(config_json)
|
||||
|
||||
if 'content_types' in config:
|
||||
result = run_imginn_api_download_multi(config)
|
||||
else:
|
||||
result = run_imginn_api_download(config)
|
||||
|
||||
print(json.dumps(result))
|
||||
sys.exit(0 if result.get('status') == 'success' else 1)
|
||||
37
wrappers/imginn_subprocess_wrapper.py
Normal file
37
wrappers/imginn_subprocess_wrapper.py
Normal file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Subprocess wrapper for ImgInn operations to avoid asyncio event loop conflicts.
|
||||
This runs ImgInn operations in a completely isolated subprocess.
|
||||
|
||||
Refactored to use the common wrapper factory from base_subprocess_wrapper.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from wrappers.base_subprocess_wrapper import create_download_wrapper, run_wrapper_main
|
||||
|
||||
|
||||
def create_imginn_downloader(config, unified_db):
|
||||
"""Factory function to create an ImgInn downloader instance."""
|
||||
from modules.imginn_module import ImgInnDownloader
|
||||
|
||||
return ImgInnDownloader(
|
||||
cookie_file=config.get('cookie_file', '/opt/media-downloader/cookies/imginn_cookies.json'),
|
||||
headless=config.get('headless', False), # Headed mode with Xvfb
|
||||
show_progress=True,
|
||||
use_database=True,
|
||||
log_callback=None, # Module uses universal logger
|
||||
unified_db=unified_db
|
||||
)
|
||||
|
||||
|
||||
# Create the download function using the factory
|
||||
run_imginn_download = create_download_wrapper("ImgInn", "imginn", create_imginn_downloader)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_wrapper_main(run_imginn_download)
|
||||
37
wrappers/instagram_client_subprocess_wrapper.py
Normal file
37
wrappers/instagram_client_subprocess_wrapper.py
Normal file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Subprocess wrapper for Instagram Client (direct API) operations.
|
||||
Runs in an isolated subprocess to avoid asyncio event loop conflicts.
|
||||
|
||||
Uses the common wrapper factory from base_subprocess_wrapper.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from wrappers.base_subprocess_wrapper import create_download_wrapper, run_wrapper_main
|
||||
|
||||
|
||||
def create_instagram_client_downloader(config, unified_db):
|
||||
"""Factory function to create an Instagram Client downloader instance."""
|
||||
from modules.instagram_client_module import InstagramClientDownloader
|
||||
|
||||
return InstagramClientDownloader(
|
||||
show_progress=True,
|
||||
use_database=True,
|
||||
log_callback=None, # Module uses universal logger
|
||||
unified_db=unified_db
|
||||
)
|
||||
|
||||
|
||||
# Create the download function using the factory
|
||||
run_instagram_client_download = create_download_wrapper(
|
||||
"InstagramClient", "instagram_client", create_instagram_client_downloader
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_wrapper_main(run_instagram_client_download)
|
||||
37
wrappers/snapchat_client_subprocess_wrapper.py
Normal file
37
wrappers/snapchat_client_subprocess_wrapper.py
Normal file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Subprocess wrapper for Snapchat Client (direct API) operations.
|
||||
Runs in an isolated subprocess to avoid asyncio event loop conflicts.
|
||||
|
||||
Uses the common wrapper factory from base_subprocess_wrapper.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from wrappers.base_subprocess_wrapper import create_download_wrapper, run_wrapper_main
|
||||
|
||||
|
||||
def create_snapchat_client_downloader(config, unified_db):
|
||||
"""Factory function to create a Snapchat Client downloader instance."""
|
||||
from modules.snapchat_client_module import SnapchatClientDownloader
|
||||
|
||||
return SnapchatClientDownloader(
|
||||
show_progress=True,
|
||||
use_database=True,
|
||||
log_callback=None, # Module uses universal logger
|
||||
unified_db=unified_db
|
||||
)
|
||||
|
||||
|
||||
# Create the download function using the factory
|
||||
run_snapchat_client_download = create_download_wrapper(
|
||||
"SnapchatClient", "snapchat_client", create_snapchat_client_downloader
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_wrapper_main(run_snapchat_client_download)
|
||||
36
wrappers/snapchat_subprocess_wrapper.py
Executable file
36
wrappers/snapchat_subprocess_wrapper.py
Executable file
@@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Subprocess wrapper for Snapchat operations to avoid asyncio event loop conflicts.
|
||||
This runs Snapchat operations in a completely isolated subprocess.
|
||||
|
||||
Refactored to use the common wrapper factory from base_subprocess_wrapper.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from wrappers.base_subprocess_wrapper import create_download_wrapper, run_wrapper_main
|
||||
|
||||
|
||||
def create_snapchat_downloader(config, unified_db):
|
||||
"""Factory function to create a Snapchat downloader instance."""
|
||||
from modules.snapchat_scraper import SnapchatDirectScraper
|
||||
|
||||
return SnapchatDirectScraper(
|
||||
headless=config.get('headless', False), # Headed mode with Xvfb
|
||||
show_progress=True,
|
||||
use_database=True,
|
||||
log_callback=None, # Module uses universal logger
|
||||
unified_db=unified_db
|
||||
)
|
||||
|
||||
|
||||
# Create the download function using the factory
|
||||
run_snapchat_download = create_download_wrapper("Snapchat", "snapchat", create_snapchat_downloader)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_wrapper_main(run_snapchat_download)
|
||||
39
wrappers/toolzu_subprocess_wrapper.py
Normal file
39
wrappers/toolzu_subprocess_wrapper.py
Normal file
@@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Subprocess wrapper for Toolzu operations to avoid asyncio event loop conflicts.
|
||||
This runs Toolzu operations in a completely isolated subprocess.
|
||||
|
||||
Refactored to use the common wrapper factory from base_subprocess_wrapper.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from wrappers.base_subprocess_wrapper import create_download_wrapper, run_wrapper_main
|
||||
|
||||
|
||||
def create_toolzu_downloader(config, unified_db):
|
||||
"""Factory function to create a Toolzu downloader instance."""
|
||||
from modules.toolzu_module import ToolzuDownloader
|
||||
|
||||
return ToolzuDownloader(
|
||||
headless=config.get('headless', False),
|
||||
show_progress=True,
|
||||
use_database=True,
|
||||
log_callback=None, # Module uses universal logger
|
||||
unified_db=unified_db,
|
||||
cookie_file=config.get('cookie_file', '/opt/media-downloader/cookies/toolzu_cookies.json'),
|
||||
toolzu_email=config.get('toolzu_email'),
|
||||
toolzu_password=config.get('toolzu_password')
|
||||
)
|
||||
|
||||
|
||||
# Create the download function using the factory
|
||||
run_toolzu_download = create_download_wrapper("Toolzu", "toolzu", create_toolzu_downloader)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_wrapper_main(run_toolzu_download)
|
||||
Reference in New Issue
Block a user