Initial commit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Todd
2026-03-29 22:42:55 -04:00
commit 0d7b2b1aab
389 changed files with 280296 additions and 0 deletions

View File

@@ -0,0 +1,213 @@
#!/usr/bin/env python3
"""Profile memory usage at each stage of scheduler startup to find the 8GB culprit."""
import os
import sys
import gc
sys.path.insert(0, '/opt/media-downloader')
os.chdir('/opt/media-downloader')
# Set environment like the service does
os.environ['DATABASE_BACKEND'] = 'postgresql'
os.environ['DATABASE_URL'] = 'postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader'
os.environ['PYTHONUNBUFFERED'] = '1'
def get_rss_mb():
"""Get current RSS in MB from /proc/self/status"""
with open('/proc/self/status') as f:
for line in f:
if line.startswith('VmRSS:'):
return int(line.split()[1]) / 1024 # kB to MB
return 0
def log_mem(label):
gc.collect()
rss = get_rss_mb()
print(f"[{rss:7.1f} MB] {label}")
return rss
# Stage 0: Baseline
log_mem("BASELINE (python + script)")
# Stage 1: Basic imports (what media-downloader.py does at top level)
import warnings
warnings.filterwarnings("ignore", message=".*pkg_resources is deprecated.*")
log_mem("After warnings")
import modules.db_bootstrap
log_mem("After db_bootstrap")
import json, sqlite3, logging, argparse, time, subprocess, random
from pathlib import Path
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any, Set, Tuple
import requests
from dataclasses import dataclass
log_mem("After stdlib + requests")
# Stage 2: Module imports (lines 52-80 of media-downloader.py)
from modules.instaloader_module import InstaLoaderModule as InstaLoaderDownloader
log_mem("After instaloader_module import")
from modules.fastdl_module import FastDLDownloader
log_mem("After fastdl_module import")
from modules.imginn_module import ImgInnDownloader
log_mem("After imginn_module import")
from modules.imginn_api_module import ImgInnAPIDownloader
log_mem("After imginn_api_module import")
from modules.instagram_client_module import InstagramClientDownloader
log_mem("After instagram_client_module import")
from modules.toolzu_module import ToolzuDownloader
log_mem("After toolzu_module import")
from modules.snapchat_scraper import SnapchatDirectScraper
log_mem("After snapchat_scraper import")
from modules.snapchat_client_module import SnapchatClientDownloader
log_mem("After snapchat_client_module import")
from modules.tiktok_module import TikTokDownloader
log_mem("After tiktok_module import")
from modules.forum_downloader import ForumDownloader
log_mem("After forum_downloader import (has Playwright)")
from modules.coppermine_module import CoppermineDownloader
log_mem("After coppermine_module import")
from modules.download_manager import DownloadManager, DownloadItem
log_mem("After download_manager import")
from modules.settings_manager import SettingsManager
from modules.date_utils import DateHandler, extract_date, update_timestamps
from modules.move_module import MoveManager
from modules.unified_database import UnifiedDatabase
from modules.universal_logger import get_logger
from modules.forum_db_adapter import ForumDatabaseAdapter
from modules.pushover_notifier import PushoverNotifier, create_notifier_from_config
from modules.service_health_monitor import ServiceHealthMonitor
from modules.dependency_updater import DependencyUpdater
from modules.downloader_monitor import get_monitor
from modules.activity_status import get_activity_manager
log_mem("After ALL module imports")
# Stage 3: Import scheduler and its dependencies
from modules.scheduler import DownloadScheduler
log_mem("After scheduler import (includes monitors)")
# Stage 4: Create UnifiedDatabase
db_path = '/opt/media-downloader/database/media_downloader.db'
unified_db = UnifiedDatabase(db_path, use_pool=True, pool_size=5)
log_mem("After UnifiedDatabase creation")
# Stage 5: Create DownloadScheduler
from modules.settings_manager import SettingsManager
sm = SettingsManager(db_path)
scheduler = DownloadScheduler(
config_path=None,
unified_db=unified_db,
settings_manager=sm
)
log_mem("After DownloadScheduler creation")
# Stage 6: exec_module to load media-downloader.py (what scheduler.start() does)
import importlib.util
spec = importlib.util.spec_from_file_location(
"media_downloader",
Path("/opt/media-downloader/media-downloader.py")
)
media_downloader = importlib.util.module_from_spec(spec)
spec.loader.exec_module(media_downloader)
MediaDownloader = media_downloader.MediaDownloader
log_mem("After exec_module (re-loads media-downloader.py)")
# Stage 7: Create MediaDownloader instance
downloader = MediaDownloader(enable_notifications=True, unified_db=unified_db)
log_mem("After MediaDownloader creation (lazy modules)")
# Stage 8: Access one lazy module to see how much it adds
print("\n--- Testing individual module instantiation ---")
if 'fastdl' in downloader.modules:
_ = downloader.modules['fastdl']
log_mem("After instantiating FastDL module")
downloader.modules.release('fastdl')
gc.collect()
log_mem("After releasing FastDL module")
if 'forum' in downloader.modules or 'forums' in downloader.modules:
key = 'forums' if 'forums' in downloader.modules else 'forum'
_ = downloader.modules[key]
log_mem(f"After instantiating {key} module (Playwright-based)")
downloader.modules.release(key)
gc.collect()
log_mem(f"After releasing {key} module")
# Stage 9: Create the monitors that scheduler creates
print("\n--- Testing monitor creation ---")
from modules.youtube_channel_monitor import YouTubeChannelMonitor
from modules.easynews_monitor import EasynewsMonitor
from modules.reddit_community_monitor import RedditCommunityMonitor
yt = YouTubeChannelMonitor(db_path, get_activity_manager(unified_db))
log_mem("After YouTubeChannelMonitor creation")
en = EasynewsMonitor(db_path, get_activity_manager(unified_db))
log_mem("After EasynewsMonitor creation")
rd = RedditCommunityMonitor(db_path, get_activity_manager(unified_db))
log_mem("After RedditCommunityMonitor creation")
# Stage 10: Simulate what happens when a background task runs
print("\n--- Simulating background task execution ---")
# Test: easynews check_all_celebrities
print("Running Easynews check_all_celebrities...")
try:
result = en.check_all_celebrities(from_scheduler=True)
log_mem(f"After Easynews check (results: {result.get('results_found', 0)})")
except Exception as e:
log_mem(f"After Easynews check (error: {e})")
gc.collect()
log_mem("After gc.collect")
# Test: reddit check_all_now
print("Running Reddit check_all_now...")
try:
import asyncio
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
count = loop.run_until_complete(rd.check_all_now(from_scheduler=True))
log_mem(f"After Reddit check (media: {count})")
finally:
loop.close()
except Exception as e:
log_mem(f"After Reddit check (error: {e})")
gc.collect()
log_mem("After gc.collect")
# Test: youtube check_all_now
print("Running YouTube check_all_now...")
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
count = loop.run_until_complete(yt.check_all_now(from_scheduler=True))
log_mem(f"After YouTube check (videos: {count})")
finally:
loop.close()
except Exception as e:
log_mem(f"After YouTube check (error: {e})")
gc.collect()
log_mem("After gc.collect")
print("\n--- DONE ---")
print(f"Final RSS: {get_rss_mb():.1f} MB")