#!/usr/bin/env python3 """Profile memory usage at each stage of scheduler startup to find the 8GB culprit.""" import os import sys import gc sys.path.insert(0, '/opt/media-downloader') os.chdir('/opt/media-downloader') # Set environment like the service does os.environ['DATABASE_BACKEND'] = 'postgresql' os.environ['DATABASE_URL'] = 'postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader' os.environ['PYTHONUNBUFFERED'] = '1' def get_rss_mb(): """Get current RSS in MB from /proc/self/status""" with open('/proc/self/status') as f: for line in f: if line.startswith('VmRSS:'): return int(line.split()[1]) / 1024 # kB to MB return 0 def log_mem(label): gc.collect() rss = get_rss_mb() print(f"[{rss:7.1f} MB] {label}") return rss # Stage 0: Baseline log_mem("BASELINE (python + script)") # Stage 1: Basic imports (what media-downloader.py does at top level) import warnings warnings.filterwarnings("ignore", message=".*pkg_resources is deprecated.*") log_mem("After warnings") import modules.db_bootstrap log_mem("After db_bootstrap") import json, sqlite3, logging, argparse, time, subprocess, random from pathlib import Path from datetime import datetime, timedelta from typing import Dict, List, Optional, Any, Set, Tuple import requests from dataclasses import dataclass log_mem("After stdlib + requests") # Stage 2: Module imports (lines 52-80 of media-downloader.py) from modules.instaloader_module import InstaLoaderModule as InstaLoaderDownloader log_mem("After instaloader_module import") from modules.fastdl_module import FastDLDownloader log_mem("After fastdl_module import") from modules.imginn_module import ImgInnDownloader log_mem("After imginn_module import") from modules.imginn_api_module import ImgInnAPIDownloader log_mem("After imginn_api_module import") from modules.instagram_client_module import InstagramClientDownloader log_mem("After instagram_client_module import") from modules.toolzu_module import ToolzuDownloader log_mem("After toolzu_module import") from modules.snapchat_scraper import SnapchatDirectScraper log_mem("After snapchat_scraper import") from modules.snapchat_client_module import SnapchatClientDownloader log_mem("After snapchat_client_module import") from modules.tiktok_module import TikTokDownloader log_mem("After tiktok_module import") from modules.forum_downloader import ForumDownloader log_mem("After forum_downloader import (has Playwright)") from modules.coppermine_module import CoppermineDownloader log_mem("After coppermine_module import") from modules.download_manager import DownloadManager, DownloadItem log_mem("After download_manager import") from modules.settings_manager import SettingsManager from modules.date_utils import DateHandler, extract_date, update_timestamps from modules.move_module import MoveManager from modules.unified_database import UnifiedDatabase from modules.universal_logger import get_logger from modules.forum_db_adapter import ForumDatabaseAdapter from modules.pushover_notifier import PushoverNotifier, create_notifier_from_config from modules.service_health_monitor import ServiceHealthMonitor from modules.dependency_updater import DependencyUpdater from modules.downloader_monitor import get_monitor from modules.activity_status import get_activity_manager log_mem("After ALL module imports") # Stage 3: Import scheduler and its dependencies from modules.scheduler import DownloadScheduler log_mem("After scheduler import (includes monitors)") # Stage 4: Create UnifiedDatabase db_path = '/opt/media-downloader/database/media_downloader.db' unified_db = UnifiedDatabase(db_path, use_pool=True, pool_size=5) log_mem("After UnifiedDatabase creation") # Stage 5: Create DownloadScheduler from modules.settings_manager import SettingsManager sm = SettingsManager(db_path) scheduler = DownloadScheduler( config_path=None, unified_db=unified_db, settings_manager=sm ) log_mem("After DownloadScheduler creation") # Stage 6: exec_module to load media-downloader.py (what scheduler.start() does) import importlib.util spec = importlib.util.spec_from_file_location( "media_downloader", Path("/opt/media-downloader/media-downloader.py") ) media_downloader = importlib.util.module_from_spec(spec) spec.loader.exec_module(media_downloader) MediaDownloader = media_downloader.MediaDownloader log_mem("After exec_module (re-loads media-downloader.py)") # Stage 7: Create MediaDownloader instance downloader = MediaDownloader(enable_notifications=True, unified_db=unified_db) log_mem("After MediaDownloader creation (lazy modules)") # Stage 8: Access one lazy module to see how much it adds print("\n--- Testing individual module instantiation ---") if 'fastdl' in downloader.modules: _ = downloader.modules['fastdl'] log_mem("After instantiating FastDL module") downloader.modules.release('fastdl') gc.collect() log_mem("After releasing FastDL module") if 'forum' in downloader.modules or 'forums' in downloader.modules: key = 'forums' if 'forums' in downloader.modules else 'forum' _ = downloader.modules[key] log_mem(f"After instantiating {key} module (Playwright-based)") downloader.modules.release(key) gc.collect() log_mem(f"After releasing {key} module") # Stage 9: Create the monitors that scheduler creates print("\n--- Testing monitor creation ---") from modules.youtube_channel_monitor import YouTubeChannelMonitor from modules.easynews_monitor import EasynewsMonitor from modules.reddit_community_monitor import RedditCommunityMonitor yt = YouTubeChannelMonitor(db_path, get_activity_manager(unified_db)) log_mem("After YouTubeChannelMonitor creation") en = EasynewsMonitor(db_path, get_activity_manager(unified_db)) log_mem("After EasynewsMonitor creation") rd = RedditCommunityMonitor(db_path, get_activity_manager(unified_db)) log_mem("After RedditCommunityMonitor creation") # Stage 10: Simulate what happens when a background task runs print("\n--- Simulating background task execution ---") # Test: easynews check_all_celebrities print("Running Easynews check_all_celebrities...") try: result = en.check_all_celebrities(from_scheduler=True) log_mem(f"After Easynews check (results: {result.get('results_found', 0)})") except Exception as e: log_mem(f"After Easynews check (error: {e})") gc.collect() log_mem("After gc.collect") # Test: reddit check_all_now print("Running Reddit check_all_now...") try: import asyncio loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: count = loop.run_until_complete(rd.check_all_now(from_scheduler=True)) log_mem(f"After Reddit check (media: {count})") finally: loop.close() except Exception as e: log_mem(f"After Reddit check (error: {e})") gc.collect() log_mem("After gc.collect") # Test: youtube check_all_now print("Running YouTube check_all_now...") try: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: count = loop.run_until_complete(yt.check_all_now(from_scheduler=True)) log_mem(f"After YouTube check (videos: {count})") finally: loop.close() except Exception as e: log_mem(f"After YouTube check (error: {e})") gc.collect() log_mem("After gc.collect") print("\n--- DONE ---") print(f"Final RSS: {get_rss_mb():.1f} MB")