214 lines
7.2 KiB
Python
214 lines
7.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Profile memory usage at each stage of scheduler startup to find the 8GB culprit."""
|
|
|
|
import os
|
|
import sys
|
|
import gc
|
|
|
|
sys.path.insert(0, '/opt/media-downloader')
|
|
os.chdir('/opt/media-downloader')
|
|
|
|
# Set environment like the service does
|
|
os.environ['DATABASE_BACKEND'] = 'postgresql'
|
|
os.environ['DATABASE_URL'] = 'postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader'
|
|
os.environ['PYTHONUNBUFFERED'] = '1'
|
|
|
|
def get_rss_mb():
|
|
"""Get current RSS in MB from /proc/self/status"""
|
|
with open('/proc/self/status') as f:
|
|
for line in f:
|
|
if line.startswith('VmRSS:'):
|
|
return int(line.split()[1]) / 1024 # kB to MB
|
|
return 0
|
|
|
|
def log_mem(label):
|
|
gc.collect()
|
|
rss = get_rss_mb()
|
|
print(f"[{rss:7.1f} MB] {label}")
|
|
return rss
|
|
|
|
# Stage 0: Baseline
|
|
log_mem("BASELINE (python + script)")
|
|
|
|
# Stage 1: Basic imports (what media-downloader.py does at top level)
|
|
import warnings
|
|
warnings.filterwarnings("ignore", message=".*pkg_resources is deprecated.*")
|
|
log_mem("After warnings")
|
|
|
|
import modules.db_bootstrap
|
|
log_mem("After db_bootstrap")
|
|
|
|
import json, sqlite3, logging, argparse, time, subprocess, random
|
|
from pathlib import Path
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Optional, Any, Set, Tuple
|
|
import requests
|
|
from dataclasses import dataclass
|
|
log_mem("After stdlib + requests")
|
|
|
|
# Stage 2: Module imports (lines 52-80 of media-downloader.py)
|
|
from modules.instaloader_module import InstaLoaderModule as InstaLoaderDownloader
|
|
log_mem("After instaloader_module import")
|
|
|
|
from modules.fastdl_module import FastDLDownloader
|
|
log_mem("After fastdl_module import")
|
|
|
|
from modules.imginn_module import ImgInnDownloader
|
|
log_mem("After imginn_module import")
|
|
|
|
from modules.imginn_api_module import ImgInnAPIDownloader
|
|
log_mem("After imginn_api_module import")
|
|
|
|
from modules.instagram_client_module import InstagramClientDownloader
|
|
log_mem("After instagram_client_module import")
|
|
|
|
from modules.toolzu_module import ToolzuDownloader
|
|
log_mem("After toolzu_module import")
|
|
|
|
from modules.snapchat_scraper import SnapchatDirectScraper
|
|
log_mem("After snapchat_scraper import")
|
|
|
|
from modules.snapchat_client_module import SnapchatClientDownloader
|
|
log_mem("After snapchat_client_module import")
|
|
|
|
from modules.tiktok_module import TikTokDownloader
|
|
log_mem("After tiktok_module import")
|
|
|
|
from modules.forum_downloader import ForumDownloader
|
|
log_mem("After forum_downloader import (has Playwright)")
|
|
|
|
from modules.coppermine_module import CoppermineDownloader
|
|
log_mem("After coppermine_module import")
|
|
|
|
from modules.download_manager import DownloadManager, DownloadItem
|
|
log_mem("After download_manager import")
|
|
|
|
from modules.settings_manager import SettingsManager
|
|
from modules.date_utils import DateHandler, extract_date, update_timestamps
|
|
from modules.move_module import MoveManager
|
|
from modules.unified_database import UnifiedDatabase
|
|
from modules.universal_logger import get_logger
|
|
from modules.forum_db_adapter import ForumDatabaseAdapter
|
|
from modules.pushover_notifier import PushoverNotifier, create_notifier_from_config
|
|
from modules.service_health_monitor import ServiceHealthMonitor
|
|
from modules.dependency_updater import DependencyUpdater
|
|
from modules.downloader_monitor import get_monitor
|
|
from modules.activity_status import get_activity_manager
|
|
log_mem("After ALL module imports")
|
|
|
|
# Stage 3: Import scheduler and its dependencies
|
|
from modules.scheduler import DownloadScheduler
|
|
log_mem("After scheduler import (includes monitors)")
|
|
|
|
# Stage 4: Create UnifiedDatabase
|
|
db_path = '/opt/media-downloader/database/media_downloader.db'
|
|
unified_db = UnifiedDatabase(db_path, use_pool=True, pool_size=5)
|
|
log_mem("After UnifiedDatabase creation")
|
|
|
|
# Stage 5: Create DownloadScheduler
|
|
from modules.settings_manager import SettingsManager
|
|
sm = SettingsManager(db_path)
|
|
scheduler = DownloadScheduler(
|
|
config_path=None,
|
|
unified_db=unified_db,
|
|
settings_manager=sm
|
|
)
|
|
log_mem("After DownloadScheduler creation")
|
|
|
|
# Stage 6: exec_module to load media-downloader.py (what scheduler.start() does)
|
|
import importlib.util
|
|
spec = importlib.util.spec_from_file_location(
|
|
"media_downloader",
|
|
Path("/opt/media-downloader/media-downloader.py")
|
|
)
|
|
media_downloader = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(media_downloader)
|
|
MediaDownloader = media_downloader.MediaDownloader
|
|
log_mem("After exec_module (re-loads media-downloader.py)")
|
|
|
|
# Stage 7: Create MediaDownloader instance
|
|
downloader = MediaDownloader(enable_notifications=True, unified_db=unified_db)
|
|
log_mem("After MediaDownloader creation (lazy modules)")
|
|
|
|
# Stage 8: Access one lazy module to see how much it adds
|
|
print("\n--- Testing individual module instantiation ---")
|
|
if 'fastdl' in downloader.modules:
|
|
_ = downloader.modules['fastdl']
|
|
log_mem("After instantiating FastDL module")
|
|
downloader.modules.release('fastdl')
|
|
gc.collect()
|
|
log_mem("After releasing FastDL module")
|
|
|
|
if 'forum' in downloader.modules or 'forums' in downloader.modules:
|
|
key = 'forums' if 'forums' in downloader.modules else 'forum'
|
|
_ = downloader.modules[key]
|
|
log_mem(f"After instantiating {key} module (Playwright-based)")
|
|
downloader.modules.release(key)
|
|
gc.collect()
|
|
log_mem(f"After releasing {key} module")
|
|
|
|
# Stage 9: Create the monitors that scheduler creates
|
|
print("\n--- Testing monitor creation ---")
|
|
from modules.youtube_channel_monitor import YouTubeChannelMonitor
|
|
from modules.easynews_monitor import EasynewsMonitor
|
|
from modules.reddit_community_monitor import RedditCommunityMonitor
|
|
|
|
yt = YouTubeChannelMonitor(db_path, get_activity_manager(unified_db))
|
|
log_mem("After YouTubeChannelMonitor creation")
|
|
|
|
en = EasynewsMonitor(db_path, get_activity_manager(unified_db))
|
|
log_mem("After EasynewsMonitor creation")
|
|
|
|
rd = RedditCommunityMonitor(db_path, get_activity_manager(unified_db))
|
|
log_mem("After RedditCommunityMonitor creation")
|
|
|
|
# Stage 10: Simulate what happens when a background task runs
|
|
print("\n--- Simulating background task execution ---")
|
|
|
|
# Test: easynews check_all_celebrities
|
|
print("Running Easynews check_all_celebrities...")
|
|
try:
|
|
result = en.check_all_celebrities(from_scheduler=True)
|
|
log_mem(f"After Easynews check (results: {result.get('results_found', 0)})")
|
|
except Exception as e:
|
|
log_mem(f"After Easynews check (error: {e})")
|
|
|
|
gc.collect()
|
|
log_mem("After gc.collect")
|
|
|
|
# Test: reddit check_all_now
|
|
print("Running Reddit check_all_now...")
|
|
try:
|
|
import asyncio
|
|
loop = asyncio.new_event_loop()
|
|
asyncio.set_event_loop(loop)
|
|
try:
|
|
count = loop.run_until_complete(rd.check_all_now(from_scheduler=True))
|
|
log_mem(f"After Reddit check (media: {count})")
|
|
finally:
|
|
loop.close()
|
|
except Exception as e:
|
|
log_mem(f"After Reddit check (error: {e})")
|
|
|
|
gc.collect()
|
|
log_mem("After gc.collect")
|
|
|
|
# Test: youtube check_all_now
|
|
print("Running YouTube check_all_now...")
|
|
try:
|
|
loop = asyncio.new_event_loop()
|
|
asyncio.set_event_loop(loop)
|
|
try:
|
|
count = loop.run_until_complete(yt.check_all_now(from_scheduler=True))
|
|
log_mem(f"After YouTube check (videos: {count})")
|
|
finally:
|
|
loop.close()
|
|
except Exception as e:
|
|
log_mem(f"After YouTube check (error: {e})")
|
|
|
|
gc.collect()
|
|
log_mem("After gc.collect")
|
|
|
|
print("\n--- DONE ---")
|
|
print(f"Final RSS: {get_rss_mb():.1f} MB")
|