Files
media-downloader/scripts/profile_scheduler_full.py
Todd 0d7b2b1aab Initial commit
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-29 22:42:55 -04:00

179 lines
6.2 KiB
Python

#!/usr/bin/env python3
"""Full scheduler startup profiler - mimics media-downloader.py --scheduler exactly.
Adds memory logging at every stage and a background thread that monitors RSS every 2 seconds.
"""
import os
import sys
import gc
import threading
import time
# Set up environment exactly like the systemd service
os.environ['PYTHONUNBUFFERED'] = '1'
os.environ['PYTHONDONTWRITEBYTECODE'] = '1'
os.environ['DATABASE_BACKEND'] = 'postgresql'
os.environ['DATABASE_URL'] = 'postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader'
os.environ['HOME'] = '/root'
os.environ['PLAYWRIGHT_BROWSERS_PATH'] = '/root/.cache/ms-playwright'
os.environ.setdefault('DISPLAY', ':100')
os.chdir('/opt/media-downloader')
sys.path.insert(0, '/opt/media-downloader')
def get_rss_mb():
"""Get current RSS in MB from /proc/self/status"""
try:
with open('/proc/self/status') as f:
for line in f:
if line.startswith('VmRSS:'):
return int(line.split()[1]) / 1024
except:
pass
return 0
def get_child_rss_mb():
"""Get total RSS of child processes"""
import subprocess
try:
pid = os.getpid()
result = subprocess.run(
['ps', '--ppid', str(pid), '-o', 'rss='],
capture_output=True, text=True, timeout=5
)
total = 0
for line in result.stdout.strip().split('\n'):
line = line.strip()
if line:
total += int(line)
return total / 1024 # kB to MB
except:
return 0
# Memory monitoring thread
stop_monitor = False
peak_rss = 0
def memory_monitor():
global peak_rss
while not stop_monitor:
rss = get_rss_mb()
child_rss = get_child_rss_mb()
total = rss + child_rss
if rss > peak_rss:
peak_rss = rss
# Only print on significant changes or every 10s
sys.stderr.write(f"[MEMORY] RSS={rss:.0f}MB Children={child_rss:.0f}MB Total={total:.0f}MB Peak={peak_rss:.0f}MB\n")
sys.stderr.flush()
time.sleep(2)
# Start memory monitoring
monitor_thread = threading.Thread(target=memory_monitor, daemon=True)
monitor_thread.start()
sys.stderr.write(f"[STAGE] Baseline: {get_rss_mb():.0f}MB\n")
# Now do EXACTLY what media-downloader.py does
# --- Module-level code from media-downloader.py ---
try:
import nest_asyncio
nest_asyncio.apply()
except ImportError:
pass
import warnings
warnings.filterwarnings("ignore", message=".*pkg_resources is deprecated.*")
import modules.db_bootstrap
import json, sqlite3, logging, argparse, subprocess, random
from pathlib import Path
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any, Set, Tuple
import requests
from dataclasses import dataclass
sqlite3.register_adapter(datetime, lambda d: d.isoformat())
sqlite3.register_converter("datetime", lambda s: datetime.fromisoformat(s.decode()))
sys.path.insert(0, str(Path('/opt/media-downloader')))
sys.path.insert(0, str(Path('/opt/media-downloader') / 'modules'))
try:
from modules.instaloader_module import InstaLoaderModule as InstaLoaderDownloader
from modules.fastdl_module import FastDLDownloader
from modules.imginn_module import ImgInnDownloader
from modules.imginn_api_module import ImgInnAPIDownloader
from modules.instagram_client_module import InstagramClientDownloader
from modules.toolzu_module import ToolzuDownloader
from modules.snapchat_scraper import SnapchatDirectScraper
from modules.snapchat_client_module import SnapchatClientDownloader
from modules.tiktok_module import TikTokDownloader
from modules.forum_downloader import ForumDownloader
from modules.coppermine_module import CoppermineDownloader
from modules.download_manager import DownloadManager, DownloadItem
from modules.settings_manager import SettingsManager
from modules.date_utils import DateHandler, extract_date, update_timestamps
from modules.move_module import MoveManager
from modules.unified_database import UnifiedDatabase
from modules.universal_logger import get_logger
from modules.forum_db_adapter import ForumDatabaseAdapter
from modules.pushover_notifier import PushoverNotifier, create_notifier_from_config
from modules.service_health_monitor import ServiceHealthMonitor
from modules.dependency_updater import DependencyUpdater
from modules.downloader_monitor import get_monitor
from modules.activity_status import get_activity_manager
except ImportError as e:
print(f"Error importing modules: {e}")
sys.exit(1)
sys.stderr.write(f"[STAGE] All imports done: {get_rss_mb():.0f}MB\n")
# --- Scheduler section (what main() does with --scheduler) ---
from modules.scheduler import DownloadScheduler
from modules.unified_database import UnifiedDatabase
import signal
sys.stderr.write(f"[STAGE] Scheduler imported: {get_rss_mb():.0f}MB\n")
# Create unified database
unified_db = UnifiedDatabase('database/media_downloader.db', use_pool=True, pool_size=5)
sys.stderr.write(f"[STAGE] UnifiedDatabase created: {get_rss_mb():.0f}MB\n")
# Create SettingsManager
sm = SettingsManager('database/media_downloader.db')
# Create scheduler - pass settings_manager like main() does
scheduler = DownloadScheduler(unified_db=unified_db, settings_manager=sm)
sys.stderr.write(f"[STAGE] DownloadScheduler created: {get_rss_mb():.0f}MB\n")
# Set up graceful shutdown
shutdown_requested = False
def graceful_shutdown(signum, frame):
global shutdown_requested, stop_monitor
if shutdown_requested:
return
shutdown_requested = True
stop_monitor = True
sys.stderr.write(f"\n[SHUTDOWN] Signal received, stopping...\n")
sys.stderr.write(f"[SHUTDOWN] Final RSS: {get_rss_mb():.0f}MB, Peak: {peak_rss:.0f}MB\n")
scheduler.stop()
dl = getattr(scheduler, 'downloader', None)
if dl:
dl.cleanup_all_temp_dirs()
unified_db.close()
sys.exit(0)
signal.signal(signal.SIGTERM, graceful_shutdown)
signal.signal(signal.SIGINT, graceful_shutdown)
sys.stderr.write(f"[STAGE] About to call scheduler.start() - this will exec_module, create MediaDownloader, then enter main loop\n")
sys.stderr.write(f"[STAGE] Pre-start RSS: {get_rss_mb():.0f}MB\n")
sys.stderr.flush()
# Start scheduler (this blocks - enters main loop)
scheduler.start()