100
scripts/add-backup-profile.sh
Executable file
100
scripts/add-backup-profile.sh
Executable file
@@ -0,0 +1,100 @@
|
||||
#!/bin/bash
|
||||
# Add Media Downloader backup profile to Backup Central
|
||||
# Run this script to create or recreate the backup profile
|
||||
|
||||
set -e
|
||||
|
||||
echo "Adding Media Downloader backup profile to Backup Central..."
|
||||
echo ""
|
||||
|
||||
# Delete existing profile if it exists
|
||||
sqlite3 /opt/backup-central/data/backup_cache.db "DELETE FROM backup_profiles WHERE id = 'profile-media-downloader';" 2>/dev/null || true
|
||||
|
||||
# Insert new profile
|
||||
sqlite3 /opt/backup-central/data/backup_cache.db <<'SQL'
|
||||
INSERT INTO backup_profiles (
|
||||
id,
|
||||
name,
|
||||
description,
|
||||
enabled,
|
||||
created_at,
|
||||
updated_at,
|
||||
destination_type,
|
||||
destination_path,
|
||||
sources,
|
||||
schedule_enabled,
|
||||
schedule_frequency,
|
||||
schedule_time,
|
||||
retention_daily,
|
||||
retention_weekly,
|
||||
retention_monthly,
|
||||
retention_yearly,
|
||||
notify_on_success,
|
||||
notify_on_warning,
|
||||
notify_on_failure,
|
||||
notify_channels,
|
||||
advanced_settings,
|
||||
total_runs,
|
||||
success_count,
|
||||
failure_count
|
||||
) VALUES (
|
||||
'profile-media-downloader',
|
||||
'Media Downloader System',
|
||||
'Daily backup of media-downloader configuration, database, and code',
|
||||
1,
|
||||
datetime('now'),
|
||||
datetime('now'),
|
||||
'local',
|
||||
'/media/backups/Ubuntu/restic-repo',
|
||||
'{"include":["/opt/media-downloader/data","/opt/media-downloader/database","/opt/media-downloader/cookies","/opt/media-downloader/sessions","/opt/media-downloader/modules","/opt/media-downloader/wrappers","/opt/media-downloader/scripts","/opt/media-downloader/web/backend","/opt/media-downloader/web/frontend/src","/opt/media-downloader/*.py","/opt/media-downloader/VERSION","/opt/media-downloader/README.md","/opt/media-downloader/requirements.txt","/opt/media-downloader/docs"],"exclude":["*.log","*.log.*","*.pyc","__pycache__","/opt/media-downloader/temp/*","/opt/media-downloader/logs/*","/opt/media-downloader/venv/*","/opt/media-downloader/.playwright/*","/opt/media-downloader/debug/*","/opt/media-downloader/database/*.db-shm","/opt/media-downloader/database/*.db-wal","*.swp","*.swo","*~",".*.swp"]}',
|
||||
1,
|
||||
'daily',
|
||||
'00:00',
|
||||
7,
|
||||
4,
|
||||
12,
|
||||
2,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
'["pushover"]',
|
||||
'{"custom_name_template":"{{version}}-{{datetime}}","auto_lock_all_backups":true}',
|
||||
0,
|
||||
0,
|
||||
0
|
||||
);
|
||||
SQL
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "✓ Profile added successfully"
|
||||
echo ""
|
||||
|
||||
# Restart backup-central to load the profile
|
||||
echo "Restarting Backup Central service..."
|
||||
sudo systemctl restart backup-central
|
||||
sleep 2
|
||||
|
||||
echo "✓ Backup Central restarted"
|
||||
echo ""
|
||||
|
||||
# Verify profile was created
|
||||
echo "Verifying profile..."
|
||||
backup-central profiles list | grep "Media Downloader"
|
||||
|
||||
echo ""
|
||||
echo "╔════════════════════════════════════════════════╗"
|
||||
echo "║ Profile Added Successfully ║"
|
||||
echo "╠════════════════════════════════════════════════╣"
|
||||
echo "║ ID: profile-media-downloader ║"
|
||||
echo "║ Name: Media Downloader System ║"
|
||||
echo "║ Schedule: Daily at 00:00 (midnight) ║"
|
||||
echo "║ Status: Enabled ║"
|
||||
echo "╚════════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
echo "To view full details:"
|
||||
echo " backup-central profiles --info profile-media-downloader"
|
||||
echo ""
|
||||
else
|
||||
echo "✗ Failed to add profile"
|
||||
exit 1
|
||||
fi
|
||||
112
scripts/add-database-indexes.sql
Normal file
112
scripts/add-database-indexes.sql
Normal file
@@ -0,0 +1,112 @@
|
||||
-- Database Performance Indexes
|
||||
-- Adds indexes to frequently queried columns for improved performance
|
||||
--
|
||||
-- Run with: sqlite3 /opt/media-downloader/database/downloads.db < scripts/add-database-indexes.sql
|
||||
|
||||
-- ============================================================================
|
||||
-- Downloads Table Indexes
|
||||
-- ============================================================================
|
||||
|
||||
-- Index on platform for filtering downloads by platform
|
||||
CREATE INDEX IF NOT EXISTS idx_downloads_platform
|
||||
ON downloads(platform);
|
||||
|
||||
-- Index on source for filtering downloads by source/username
|
||||
CREATE INDEX IF NOT EXISTS idx_downloads_source
|
||||
ON downloads(source);
|
||||
|
||||
-- Index on download_date for time-based queries (DESC for most recent first)
|
||||
CREATE INDEX IF NOT EXISTS idx_downloads_download_date
|
||||
ON downloads(download_date DESC);
|
||||
|
||||
-- Index on status for filtering by download status
|
||||
CREATE INDEX IF NOT EXISTS idx_downloads_status
|
||||
ON downloads(status);
|
||||
|
||||
-- Compound index for platform + source queries (common filter combination)
|
||||
CREATE INDEX IF NOT EXISTS idx_downloads_platform_source
|
||||
ON downloads(platform, source);
|
||||
|
||||
-- Compound index for platform + download_date (common for analytics)
|
||||
CREATE INDEX IF NOT EXISTS idx_downloads_platform_date
|
||||
ON downloads(platform, download_date DESC);
|
||||
|
||||
-- Index on filename for search queries
|
||||
CREATE INDEX IF NOT EXISTS idx_downloads_filename
|
||||
ON downloads(filename);
|
||||
|
||||
-- Index on media_id for duplicate detection
|
||||
CREATE INDEX IF NOT EXISTS idx_downloads_media_id
|
||||
ON downloads(media_id)
|
||||
WHERE media_id IS NOT NULL;
|
||||
|
||||
-- Index on file_hash for duplicate detection
|
||||
CREATE INDEX IF NOT EXISTS idx_downloads_file_hash
|
||||
ON downloads(file_hash)
|
||||
WHERE file_hash IS NOT NULL;
|
||||
|
||||
-- ============================================================================
|
||||
-- Notifications Table Indexes
|
||||
-- ============================================================================
|
||||
|
||||
-- Index on sent_at for time-based queries
|
||||
CREATE INDEX IF NOT EXISTS idx_notifications_sent_at
|
||||
ON notifications(sent_at DESC);
|
||||
|
||||
-- Index on platform for filtering notifications
|
||||
CREATE INDEX IF NOT EXISTS idx_notifications_platform
|
||||
ON notifications(platform);
|
||||
|
||||
-- Index on status for filtering by notification status
|
||||
CREATE INDEX IF NOT EXISTS idx_notifications_status
|
||||
ON notifications(status);
|
||||
|
||||
-- Compound index for platform + sent_at (common query)
|
||||
CREATE INDEX IF NOT EXISTS idx_notifications_platform_sent_at
|
||||
ON notifications(platform, sent_at DESC);
|
||||
|
||||
-- ============================================================================
|
||||
-- Scheduler State Table Indexes
|
||||
-- ============================================================================
|
||||
|
||||
-- Index on status for active task queries
|
||||
CREATE INDEX IF NOT EXISTS idx_scheduler_state_status
|
||||
ON scheduler_state(status);
|
||||
|
||||
-- Index on next_run for finding next scheduled tasks
|
||||
CREATE INDEX IF NOT EXISTS idx_scheduler_state_next_run
|
||||
ON scheduler_state(next_run ASC);
|
||||
|
||||
-- Index on platform for platform-specific queries
|
||||
CREATE INDEX IF NOT EXISTS idx_scheduler_state_platform
|
||||
ON scheduler_state(platform);
|
||||
|
||||
-- ============================================================================
|
||||
-- Users Table Indexes
|
||||
-- ============================================================================
|
||||
|
||||
-- Index on username for login queries
|
||||
CREATE INDEX IF NOT EXISTS idx_users_username
|
||||
ON users(username);
|
||||
|
||||
-- Index on email for lookup queries (if email is used)
|
||||
CREATE INDEX IF NOT EXISTS idx_users_email
|
||||
ON users(email)
|
||||
WHERE email IS NOT NULL;
|
||||
|
||||
-- ============================================================================
|
||||
-- Performance Analysis
|
||||
-- ============================================================================
|
||||
|
||||
-- Run ANALYZE to update query planner statistics
|
||||
ANALYZE;
|
||||
|
||||
-- Display index information
|
||||
SELECT
|
||||
'Index Information' as info,
|
||||
name as index_name,
|
||||
tbl_name as table_name
|
||||
FROM sqlite_master
|
||||
WHERE type = 'index'
|
||||
AND name LIKE 'idx_%'
|
||||
ORDER BY tbl_name, name;
|
||||
71
scripts/add_reference_face.py
Executable file
71
scripts/add_reference_face.py
Executable file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Add Reference Face Script
|
||||
Adds a reference face encoding to the database
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import resource
|
||||
import time
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
from modules.face_recognition_module import FaceRecognitionModule
|
||||
|
||||
# Limit CPU usage at Python level to prevent system freeze
|
||||
try:
|
||||
os.nice(19) # Lowest CPU priority
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
try:
|
||||
# Limit CPU time to 120 seconds max (face recognition can be slow)
|
||||
resource.setrlimit(resource.RLIMIT_CPU, (120, 120))
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
|
||||
# Small delay to reduce CPU spike
|
||||
time.sleep(0.2)
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 3:
|
||||
print("Usage: python3 add_reference_face.py <person_name> <image_path>")
|
||||
print("Example: python3 add_reference_face.py 'Eva Longoria' /path/to/reference_image.jpg")
|
||||
sys.exit(1)
|
||||
|
||||
person_name = sys.argv[1]
|
||||
image_path = sys.argv[2]
|
||||
|
||||
if not os.path.exists(image_path):
|
||||
print(f"Error: Image not found: {image_path}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Adding reference face for '{person_name}' from {image_path}")
|
||||
|
||||
# Initialize database
|
||||
db = UnifiedDatabase()
|
||||
|
||||
# Initialize face recognition module
|
||||
face_module = FaceRecognitionModule(unified_db=db)
|
||||
|
||||
# Add reference face
|
||||
success = face_module.add_reference_face(person_name, image_path)
|
||||
|
||||
if success:
|
||||
print(f"✓ Successfully added reference face for '{person_name}'")
|
||||
print(f"\nCurrent reference faces:")
|
||||
|
||||
refs = face_module.get_reference_faces()
|
||||
for ref in refs:
|
||||
print(f" - {ref['person_name']} (ID: {ref['id']}, added: {ref['created_at']})")
|
||||
else:
|
||||
print(f"✗ Failed to add reference face")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
17
scripts/api-call.sh
Executable file
17
scripts/api-call.sh
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
# Make authenticated API calls using saved token
|
||||
# Usage: /opt/media-downloader/scripts/api-call.sh "/api/endpoint?params"
|
||||
#
|
||||
# First run get-api-token.sh to get a token, then use this script.
|
||||
# Example: /opt/media-downloader/scripts/api-call.sh "/api/video-queue?limit=2"
|
||||
|
||||
ENDPOINT="$1"
|
||||
shift
|
||||
|
||||
if [ ! -f /tmp/api_token.txt ]; then
|
||||
echo "No token found. Run get-api-token.sh first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
TOKEN=$(cat /tmp/api_token.txt)
|
||||
curl -s "http://localhost:8000${ENDPOINT}" -b "auth_token=$TOKEN" "$@"
|
||||
145
scripts/backfill_dimensions.py
Executable file
145
scripts/backfill_dimensions.py
Executable file
@@ -0,0 +1,145 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Backfill missing dimensions (width/height) for files in file_inventory.
|
||||
|
||||
Uses PIL for images and ffprobe for videos.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Bootstrap PostgreSQL adapter before any sqlite3 imports
|
||||
sys.path.insert(0, '/opt/media-downloader')
|
||||
from modules.db_bootstrap import bootstrap_database
|
||||
bootstrap_database()
|
||||
import sqlite3
|
||||
|
||||
# Database path (routed to PostgreSQL via pgadapter)
|
||||
DB_PATH = "/opt/media-downloader/database/media_downloader.db"
|
||||
|
||||
|
||||
def get_image_dimensions(file_path: str) -> tuple:
|
||||
"""Get dimensions for an image file using PIL."""
|
||||
try:
|
||||
from PIL import Image
|
||||
with Image.open(file_path) as img:
|
||||
return img.size # (width, height)
|
||||
except Exception as e:
|
||||
print(f" PIL error for {file_path}: {e}")
|
||||
return None, None
|
||||
|
||||
|
||||
def get_video_dimensions(file_path: str) -> tuple:
|
||||
"""Get dimensions for a video file using ffprobe."""
|
||||
try:
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
['ffprobe', '-v', 'error', '-select_streams', 'v:0',
|
||||
'-show_entries', 'stream=width,height', '-of', 'csv=p=0',
|
||||
file_path],
|
||||
capture_output=True, text=True, timeout=30
|
||||
)
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
parts = result.stdout.strip().split(',')
|
||||
if len(parts) >= 2:
|
||||
return int(parts[0]), int(parts[1])
|
||||
except Exception as e:
|
||||
print(f" ffprobe error for {file_path}: {e}")
|
||||
return None, None
|
||||
|
||||
|
||||
def main():
|
||||
if not os.path.exists(DB_PATH):
|
||||
print(f"Database not found: {DB_PATH}")
|
||||
sys.exit(1)
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Get count of files missing dimensions
|
||||
cursor.execute('''
|
||||
SELECT COUNT(*) FROM file_inventory
|
||||
WHERE (width IS NULL OR height IS NULL)
|
||||
AND location IN ('final', 'review')
|
||||
''')
|
||||
total_missing = cursor.fetchone()[0]
|
||||
print(f"Found {total_missing} files with missing dimensions")
|
||||
|
||||
if total_missing == 0:
|
||||
print("No files need dimension backfill!")
|
||||
conn.close()
|
||||
return
|
||||
|
||||
# Process in batches
|
||||
batch_size = 100
|
||||
processed = 0
|
||||
updated = 0
|
||||
errors = 0
|
||||
|
||||
cursor.execute('''
|
||||
SELECT id, file_path, content_type FROM file_inventory
|
||||
WHERE (width IS NULL OR height IS NULL)
|
||||
AND location IN ('final', 'review')
|
||||
''')
|
||||
|
||||
update_cursor = conn.cursor()
|
||||
batch_updates = []
|
||||
|
||||
for row in cursor:
|
||||
file_id = row['id']
|
||||
file_path = row['file_path']
|
||||
content_type = row['content_type']
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
processed += 1
|
||||
continue
|
||||
|
||||
width, height = None, None
|
||||
|
||||
if content_type == 'image':
|
||||
width, height = get_image_dimensions(file_path)
|
||||
elif content_type == 'video':
|
||||
width, height = get_video_dimensions(file_path)
|
||||
else:
|
||||
# Try to determine from extension
|
||||
ext = Path(file_path).suffix.lower()
|
||||
image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.heic', '.heif', '.webp', '.bmp', '.tiff'}
|
||||
if ext in image_exts:
|
||||
width, height = get_image_dimensions(file_path)
|
||||
else:
|
||||
width, height = get_video_dimensions(file_path)
|
||||
|
||||
if width and height:
|
||||
batch_updates.append((width, height, file_id))
|
||||
updated += 1
|
||||
else:
|
||||
errors += 1
|
||||
|
||||
processed += 1
|
||||
|
||||
# Commit in batches
|
||||
if len(batch_updates) >= batch_size:
|
||||
update_cursor.executemany(
|
||||
'UPDATE file_inventory SET width = ?, height = ? WHERE id = ?',
|
||||
batch_updates
|
||||
)
|
||||
conn.commit()
|
||||
print(f" Processed {processed}/{total_missing} files, updated {updated}, errors {errors}")
|
||||
batch_updates = []
|
||||
|
||||
# Final batch
|
||||
if batch_updates:
|
||||
update_cursor.executemany(
|
||||
'UPDATE file_inventory SET width = ?, height = ? WHERE id = ?',
|
||||
batch_updates
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
print(f"\nComplete! Processed {processed} files, updated {updated} with dimensions, {errors} errors")
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
101
scripts/backfill_face_recognition.py
Executable file
101
scripts/backfill_face_recognition.py
Executable file
@@ -0,0 +1,101 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Backfill face recognition scan results for existing files
|
||||
|
||||
This script looks at existing downloads and infers face recognition results:
|
||||
- status='completed' and file in final destination = matched
|
||||
- status='review' and file in review queue = no match
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
from pathlib import Path
|
||||
|
||||
def backfill_face_recognition():
|
||||
"""Backfill face recognition results from existing downloads"""
|
||||
|
||||
db = UnifiedDatabase()
|
||||
|
||||
print("🔄 Backfilling face recognition scan results...")
|
||||
print("=" * 70)
|
||||
|
||||
with db.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Get all downloads with file paths
|
||||
cursor.execute('''
|
||||
SELECT id, filename, file_path, status
|
||||
FROM downloads
|
||||
WHERE file_path IS NOT NULL AND file_path != ''
|
||||
''')
|
||||
|
||||
downloads = cursor.fetchall()
|
||||
|
||||
matched_count = 0
|
||||
no_match_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
for download in downloads:
|
||||
download_id = download['id']
|
||||
filename = download['filename']
|
||||
file_path = download['file_path']
|
||||
status = download['status']
|
||||
|
||||
# Check if already scanned
|
||||
cursor.execute('''
|
||||
SELECT id FROM face_recognition_scans
|
||||
WHERE file_path = ?
|
||||
''', (file_path,))
|
||||
|
||||
if cursor.fetchone():
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Determine if file is image/video
|
||||
path = Path(file_path)
|
||||
ext = path.suffix.lower()
|
||||
image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.heic', '.heif'}
|
||||
video_exts = {'.mp4', '.mov', '.webm', '.avi', '.mkv', '.flv', '.m4v'}
|
||||
|
||||
if ext not in (image_exts | video_exts):
|
||||
continue # Skip non-media files
|
||||
|
||||
# Infer face recognition result from status
|
||||
if status == 'review':
|
||||
# File in review = no match
|
||||
has_match = False
|
||||
matched_person = None
|
||||
confidence = 0.0 # No match = 0% confidence
|
||||
no_match_count += 1
|
||||
elif status == 'completed' and '/review' not in file_path:
|
||||
# File in final destination = matched (assume Eva Longoria for now)
|
||||
has_match = True
|
||||
matched_person = 'Eva Longoria'
|
||||
confidence = 1.0 # Backfill assumes 100% for approved files
|
||||
matched_count += 1
|
||||
else:
|
||||
continue # Skip uncertain cases
|
||||
|
||||
# Insert retroactive scan result
|
||||
cursor.execute('''
|
||||
INSERT INTO face_recognition_scans
|
||||
(download_id, file_path, has_match, matched_person, confidence, face_count, scan_type)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
''', (download_id, file_path, has_match, matched_person, confidence, 0, 'backfill'))
|
||||
|
||||
conn.commit()
|
||||
|
||||
print()
|
||||
print("✅ Backfill complete!")
|
||||
print(f" Matched: {matched_count}")
|
||||
print(f" No match: {no_match_count}")
|
||||
print(f" Skipped (already scanned): {skipped_count}")
|
||||
print("=" * 70)
|
||||
|
||||
db.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
backfill_face_recognition()
|
||||
351
scripts/backfill_ig_posts.py
Normal file
351
scripts/backfill_ig_posts.py
Normal file
@@ -0,0 +1,351 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Backfill missing Instagram posts using authenticated browser cookies.
|
||||
|
||||
Paginates through the full timeline via /api/v1/feed/user/ and inserts
|
||||
any posts missing from paid_content_posts. Uses Edge browser fingerprint
|
||||
and the cookies stored in the instagram_browser scraper entry.
|
||||
|
||||
Usage:
|
||||
cd /opt/media-downloader
|
||||
./venv/bin/python3 -u scripts/backfill_ig_posts.py --creator-id 101
|
||||
./venv/bin/python3 -u scripts/backfill_ig_posts.py --creator-id 110
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Bootstrap DB (pgadapter) — module-level import activates monkey-patching
|
||||
import modules.db_bootstrap # noqa: F401
|
||||
|
||||
import sqlite3 # routed to PostgreSQL via pgadapter
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
from datetime import datetime
|
||||
|
||||
PER_PAGE = 33
|
||||
SLEEP_BETWEEN = 2.0 # seconds between API calls
|
||||
|
||||
|
||||
def load_cookies(conn):
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT cookies_json FROM scrapers WHERE id = 'instagram_browser'")
|
||||
row = cursor.fetchone()
|
||||
if not row or not row[0]:
|
||||
print("ERROR: No cookies found in instagram_browser scraper")
|
||||
sys.exit(1)
|
||||
cookies = json.loads(row[0])
|
||||
has_session = any(c.get('name') == 'sessionid' and c.get('value') for c in cookies)
|
||||
if not has_session:
|
||||
print("ERROR: No sessionid in cookies")
|
||||
sys.exit(1)
|
||||
return cookies
|
||||
|
||||
|
||||
def save_cookies(conn, session):
|
||||
updated = []
|
||||
for c in session.cookies.jar:
|
||||
updated.append({
|
||||
'name': c.name,
|
||||
'value': c.value,
|
||||
'domain': c.domain or '.instagram.com',
|
||||
})
|
||||
if updated:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"UPDATE scrapers SET cookies_json = ?, cookies_updated_at = ? WHERE id = 'instagram_browser'",
|
||||
(json.dumps(updated), datetime.now().isoformat())
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def load_known_post_ids(conn, creator_id):
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT post_id FROM paid_content_posts WHERE creator_id = ?", (creator_id,))
|
||||
return set(row[0] for row in cursor.fetchall())
|
||||
|
||||
|
||||
def lookup_ig_user_id(session, username):
|
||||
"""Look up Instagram user ID from username using authenticated session."""
|
||||
resp = session.get(
|
||||
f'https://www.instagram.com/api/v1/users/web_profile_info/?username={username}',
|
||||
timeout=10
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
print(f"ERROR: Failed to look up user ID for @{username}: HTTP {resp.status_code}")
|
||||
sys.exit(1)
|
||||
data = resp.json()
|
||||
user = data['data']['user']
|
||||
ig_user_id = user['id']
|
||||
ig_post_count = user['edge_owner_to_timeline_media']['count']
|
||||
print(f"Instagram user ID for @{username}: {ig_user_id} ({ig_post_count} posts)")
|
||||
return ig_user_id
|
||||
|
||||
|
||||
def best_media_url(node):
|
||||
media_type = node.get('media_type', 1)
|
||||
if media_type == 2 and node.get('video_versions'):
|
||||
best = max(node['video_versions'], key=lambda v: v.get('width', 0) * v.get('height', 0))
|
||||
return best.get('url', '')
|
||||
candidates = node.get('image_versions2', {}).get('candidates', [])
|
||||
if candidates:
|
||||
best = max(candidates, key=lambda c: c.get('width', 0) * c.get('height', 0))
|
||||
return best.get('url', '')
|
||||
return None
|
||||
|
||||
|
||||
def node_to_post_row(node):
|
||||
"""Convert an IG API node to DB row data."""
|
||||
code = node.get('code', '')
|
||||
if not code:
|
||||
return None
|
||||
|
||||
taken_at = node.get('taken_at', 0)
|
||||
published_at = datetime.fromtimestamp(taken_at).isoformat() if taken_at else None
|
||||
|
||||
caption_obj = node.get('caption')
|
||||
caption = caption_obj.get('text', '') if isinstance(caption_obj, dict) else ''
|
||||
|
||||
srcs = []
|
||||
media_type = node.get('media_type', 1)
|
||||
if media_type == 8 and node.get('carousel_media'):
|
||||
for child in node['carousel_media']:
|
||||
url = best_media_url(child)
|
||||
if url:
|
||||
srcs.append(url)
|
||||
else:
|
||||
url = best_media_url(node)
|
||||
if url:
|
||||
srcs.append(url)
|
||||
|
||||
if not srcs:
|
||||
return None
|
||||
|
||||
# Tagged users
|
||||
tagged_users = []
|
||||
for tag in (node.get('usertags') or {}).get('in', []):
|
||||
uname = (tag.get('user') or {}).get('username')
|
||||
if uname and uname not in tagged_users:
|
||||
tagged_users.append(uname)
|
||||
for cm in node.get('carousel_media') or []:
|
||||
for tag in (cm.get('usertags') or {}).get('in', []):
|
||||
uname = (tag.get('user') or {}).get('username')
|
||||
if uname and uname not in tagged_users:
|
||||
tagged_users.append(uname)
|
||||
|
||||
is_pinned = 1 if node.get('timeline_pinned_user_ids') else 0
|
||||
|
||||
return {
|
||||
'post_id': code,
|
||||
'published_at': published_at,
|
||||
'content': caption,
|
||||
'srcs': srcs,
|
||||
'attachment_count': len(srcs),
|
||||
'is_pinned': is_pinned,
|
||||
'tagged_users': tagged_users,
|
||||
}
|
||||
|
||||
|
||||
def insert_post(conn, creator_id, post_data):
|
||||
"""Insert a post + attachments into the DB."""
|
||||
cursor = conn.cursor()
|
||||
now = datetime.now().isoformat()
|
||||
|
||||
cursor.execute(
|
||||
"""INSERT INTO paid_content_posts
|
||||
(creator_id, post_id, title, content, published_at, added_at,
|
||||
has_attachments, attachment_count, downloaded, is_pinned)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(creator_id, post_data['post_id'], None, post_data['content'],
|
||||
post_data['published_at'], now,
|
||||
1 if post_data['attachment_count'] > 0 else 0,
|
||||
post_data['attachment_count'], False, post_data['is_pinned'])
|
||||
)
|
||||
|
||||
# Get the inserted post's ID
|
||||
cursor.execute(
|
||||
"SELECT id FROM paid_content_posts WHERE creator_id = ? AND post_id = ?",
|
||||
(creator_id, post_data['post_id'])
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
if not row:
|
||||
return
|
||||
db_post_id = row[0]
|
||||
|
||||
# Insert attachments
|
||||
for idx, src_url in enumerate(post_data['srcs']):
|
||||
ext = '.mp4' if '.mp4' in src_url.split('?')[0] else '.jpg'
|
||||
file_type = 'video' if ext == '.mp4' else 'image'
|
||||
name = f"{post_data['post_id']}_{idx}{ext}"
|
||||
|
||||
cursor.execute(
|
||||
"""INSERT INTO paid_content_attachments
|
||||
(post_id, attachment_index, name, file_type, extension, server_path, download_url, status)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, 'pending')""",
|
||||
(db_post_id, idx, name, file_type, ext,
|
||||
f"https://www.instagram.com/p/{post_data['post_id']}/?img_index={idx + 1}",
|
||||
src_url)
|
||||
)
|
||||
|
||||
# Insert tagged users
|
||||
for uname in post_data.get('tagged_users', []):
|
||||
cursor.execute(
|
||||
"""INSERT INTO paid_content_post_tagged_users (post_id, username, created_at)
|
||||
VALUES (?, ?, ?)
|
||||
ON CONFLICT (post_id, username) DO NOTHING""",
|
||||
(db_post_id, uname, now)
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
|
||||
|
||||
def create_session(cookie_list):
|
||||
session = CurlSession(impersonate='edge101')
|
||||
session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
|
||||
'X-IG-App-ID': '936619743392459',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': 'https://www.instagram.com/',
|
||||
'Origin': 'https://www.instagram.com',
|
||||
'Sec-CH-UA': '"Microsoft Edge";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
||||
'Sec-CH-UA-Mobile': '?0',
|
||||
'Sec-CH-UA-Platform': '"Windows"',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
})
|
||||
for c in cookie_list:
|
||||
name = c.get('name', '')
|
||||
value = c.get('value', '')
|
||||
domain = c.get('domain', '.instagram.com')
|
||||
if name and value:
|
||||
session.cookies.set(name, value, domain=domain)
|
||||
return session
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Backfill missing Instagram posts')
|
||||
parser.add_argument('--creator-id', type=int, required=True, help='Paid content creator ID')
|
||||
args = parser.parse_args()
|
||||
|
||||
conn = sqlite3.connect('media_downloader')
|
||||
|
||||
# Look up creator
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT username FROM paid_content_creators WHERE id = ? AND platform = 'instagram'",
|
||||
(args.creator_id,)
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
if not row:
|
||||
print(f"ERROR: Creator ID {args.creator_id} not found")
|
||||
sys.exit(1)
|
||||
username = row[0]
|
||||
|
||||
print(f"Backfilling @{username} (creator_id={args.creator_id})")
|
||||
|
||||
cookie_list = load_cookies(conn)
|
||||
session = create_session(cookie_list)
|
||||
|
||||
# Look up Instagram user ID
|
||||
ig_user_id = lookup_ig_user_id(session, username)
|
||||
time.sleep(1)
|
||||
|
||||
known = load_known_post_ids(conn, args.creator_id)
|
||||
print(f"Known posts in DB: {len(known)}")
|
||||
|
||||
max_id = None
|
||||
total_fetched = 0
|
||||
total_new = 0
|
||||
page = 0
|
||||
consecutive_errors = 0
|
||||
|
||||
while True:
|
||||
page += 1
|
||||
params = {'count': PER_PAGE}
|
||||
if max_id:
|
||||
params['max_id'] = max_id
|
||||
|
||||
try:
|
||||
resp = session.get(
|
||||
f'https://www.instagram.com/api/v1/feed/user/{ig_user_id}/',
|
||||
params=params,
|
||||
timeout=15
|
||||
)
|
||||
except Exception as e:
|
||||
print(f" Page {page}: request error: {e}")
|
||||
consecutive_errors += 1
|
||||
if consecutive_errors >= 3:
|
||||
print("Too many consecutive errors, stopping.")
|
||||
break
|
||||
time.sleep(5)
|
||||
continue
|
||||
|
||||
if resp.status_code != 200:
|
||||
print(f" Page {page}: HTTP {resp.status_code}")
|
||||
if resp.status_code == 401:
|
||||
print("Session expired! Stopping.")
|
||||
break
|
||||
if resp.status_code == 429:
|
||||
print("Rate limited. Waiting 60s...")
|
||||
time.sleep(60)
|
||||
continue
|
||||
consecutive_errors += 1
|
||||
if consecutive_errors >= 3:
|
||||
print("Too many consecutive errors, stopping.")
|
||||
break
|
||||
time.sleep(5)
|
||||
continue
|
||||
|
||||
consecutive_errors = 0
|
||||
data = resp.json()
|
||||
items = data.get('items', [])
|
||||
more = data.get('more_available', False)
|
||||
next_max_id = data.get('next_max_id')
|
||||
|
||||
if not items:
|
||||
print(f" Page {page}: no items returned, done.")
|
||||
break
|
||||
|
||||
total_fetched += len(items)
|
||||
page_new = 0
|
||||
|
||||
for node in items:
|
||||
code = node.get('code', '')
|
||||
if not code:
|
||||
continue
|
||||
if code in known:
|
||||
continue
|
||||
|
||||
post_data = node_to_post_row(node)
|
||||
if not post_data:
|
||||
continue
|
||||
|
||||
insert_post(conn, args.creator_id, post_data)
|
||||
known.add(code)
|
||||
page_new += 1
|
||||
total_new += 1
|
||||
|
||||
print(f" Page {page}: {len(items)} items, {page_new} new (total: {total_fetched} fetched, {total_new} new)")
|
||||
|
||||
if not more or not next_max_id:
|
||||
print("No more pages available.")
|
||||
break
|
||||
|
||||
max_id = next_max_id
|
||||
time.sleep(SLEEP_BETWEEN)
|
||||
|
||||
# Save updated cookies
|
||||
try:
|
||||
save_cookies(conn, session)
|
||||
except Exception as e:
|
||||
print(f"Warning: failed to save cookies: {e}")
|
||||
|
||||
conn.close()
|
||||
print(f"\nDone! Fetched {total_fetched} posts total, inserted {total_new} new posts for @{username}.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
296
scripts/backfill_kylie_posts.py
Normal file
296
scripts/backfill_kylie_posts.py
Normal file
@@ -0,0 +1,296 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Backfill missing kyliejenner posts using authenticated browser cookies.
|
||||
|
||||
Paginates through the full timeline via /api/v1/feed/user/ and inserts
|
||||
any posts missing from paid_content_posts. Uses Edge browser fingerprint
|
||||
and the cookies stored in the instagram_browser scraper entry.
|
||||
|
||||
Usage:
|
||||
cd /opt/media-downloader
|
||||
./venv/bin/python3 -u scripts/backfill_kylie_posts.py
|
||||
"""
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Bootstrap DB (pgadapter) — module-level import activates monkey-patching
|
||||
import modules.db_bootstrap # noqa: F401
|
||||
|
||||
import sqlite3 # routed to PostgreSQL via pgadapter
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
from datetime import datetime
|
||||
|
||||
CREATOR_ID = 101
|
||||
USERNAME = 'kyliejenner'
|
||||
IG_USER_ID = '12281817'
|
||||
PER_PAGE = 33
|
||||
SLEEP_BETWEEN = 2.0 # seconds between API calls
|
||||
|
||||
|
||||
def load_cookies(conn):
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT cookies_json FROM scrapers WHERE id = 'instagram_browser'")
|
||||
row = cursor.fetchone()
|
||||
if not row or not row[0]:
|
||||
print("ERROR: No cookies found in instagram_browser scraper")
|
||||
sys.exit(1)
|
||||
cookies = json.loads(row[0])
|
||||
has_session = any(c.get('name') == 'sessionid' and c.get('value') for c in cookies)
|
||||
if not has_session:
|
||||
print("ERROR: No sessionid in cookies")
|
||||
sys.exit(1)
|
||||
return cookies
|
||||
|
||||
|
||||
def save_cookies(conn, session):
|
||||
updated = []
|
||||
for c in session.cookies:
|
||||
updated.append({
|
||||
'name': c.name,
|
||||
'value': c.value,
|
||||
'domain': c.domain or '.instagram.com',
|
||||
})
|
||||
if updated:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"UPDATE scrapers SET cookies_json = ?, cookies_updated_at = ? WHERE id = 'instagram_browser'",
|
||||
(json.dumps(updated), datetime.now().isoformat())
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def load_known_post_ids(conn):
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT post_id FROM paid_content_posts WHERE creator_id = ?", (CREATOR_ID,))
|
||||
return set(row[0] for row in cursor.fetchall())
|
||||
|
||||
|
||||
def best_media_url(node):
|
||||
media_type = node.get('media_type', 1)
|
||||
if media_type == 2 and node.get('video_versions'):
|
||||
best = max(node['video_versions'], key=lambda v: v.get('width', 0) * v.get('height', 0))
|
||||
return best.get('url', '')
|
||||
candidates = node.get('image_versions2', {}).get('candidates', [])
|
||||
if candidates:
|
||||
best = max(candidates, key=lambda c: c.get('width', 0) * c.get('height', 0))
|
||||
return best.get('url', '')
|
||||
return None
|
||||
|
||||
|
||||
def node_to_post_row(node):
|
||||
"""Convert an IG API node to DB row data."""
|
||||
code = node.get('code', '')
|
||||
if not code:
|
||||
return None
|
||||
|
||||
taken_at = node.get('taken_at', 0)
|
||||
published_at = datetime.fromtimestamp(taken_at).isoformat() if taken_at else None
|
||||
|
||||
caption_obj = node.get('caption')
|
||||
caption = caption_obj.get('text', '') if isinstance(caption_obj, dict) else ''
|
||||
|
||||
srcs = []
|
||||
media_type = node.get('media_type', 1)
|
||||
if media_type == 8 and node.get('carousel_media'):
|
||||
for child in node['carousel_media']:
|
||||
url = best_media_url(child)
|
||||
if url:
|
||||
srcs.append(url)
|
||||
else:
|
||||
url = best_media_url(node)
|
||||
if url:
|
||||
srcs.append(url)
|
||||
|
||||
if not srcs:
|
||||
return None
|
||||
|
||||
# Tagged users
|
||||
tagged_users = []
|
||||
for tag in (node.get('usertags') or {}).get('in', []):
|
||||
uname = (tag.get('user') or {}).get('username')
|
||||
if uname and uname not in tagged_users:
|
||||
tagged_users.append(uname)
|
||||
for cm in node.get('carousel_media') or []:
|
||||
for tag in (cm.get('usertags') or {}).get('in', []):
|
||||
uname = (tag.get('user') or {}).get('username')
|
||||
if uname and uname not in tagged_users:
|
||||
tagged_users.append(uname)
|
||||
|
||||
is_pinned = 1 if node.get('timeline_pinned_user_ids') else 0
|
||||
|
||||
return {
|
||||
'post_id': code,
|
||||
'published_at': published_at,
|
||||
'content': caption,
|
||||
'srcs': srcs,
|
||||
'attachment_count': len(srcs),
|
||||
'is_pinned': is_pinned,
|
||||
'tagged_users': tagged_users,
|
||||
}
|
||||
|
||||
|
||||
def insert_post(conn, post_data):
|
||||
"""Insert a post + attachments into the DB."""
|
||||
cursor = conn.cursor()
|
||||
now = datetime.now().isoformat()
|
||||
|
||||
cursor.execute(
|
||||
"""INSERT INTO paid_content_posts
|
||||
(creator_id, post_id, title, content, published_at, added_at,
|
||||
has_attachments, attachment_count, downloaded, is_pinned)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(CREATOR_ID, post_data['post_id'], None, post_data['content'],
|
||||
post_data['published_at'], now,
|
||||
1 if post_data['attachment_count'] > 0 else 0,
|
||||
post_data['attachment_count'], False, post_data['is_pinned'])
|
||||
)
|
||||
|
||||
# Get the inserted post's ID
|
||||
cursor.execute(
|
||||
"SELECT id FROM paid_content_posts WHERE creator_id = ? AND post_id = ?",
|
||||
(CREATOR_ID, post_data['post_id'])
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
if not row:
|
||||
return
|
||||
db_post_id = row[0]
|
||||
|
||||
# Insert attachments
|
||||
for idx, src_url in enumerate(post_data['srcs']):
|
||||
ext = '.mp4' if '.mp4' in src_url.split('?')[0] else '.jpg'
|
||||
file_type = 'video' if ext == '.mp4' else 'image'
|
||||
name = f"{post_data['post_id']}_{idx}{ext}"
|
||||
|
||||
cursor.execute(
|
||||
"""INSERT INTO paid_content_attachments
|
||||
(post_id, attachment_index, name, file_type, extension, server_path, download_url, status)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, 'pending')""",
|
||||
(db_post_id, idx, name, file_type, ext,
|
||||
f"https://www.instagram.com/p/{post_data['post_id']}/?img_index={idx + 1}",
|
||||
src_url)
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
|
||||
|
||||
def main():
|
||||
conn = sqlite3.connect('media_downloader')
|
||||
cookie_list = load_cookies(conn)
|
||||
known = load_known_post_ids(conn)
|
||||
|
||||
print(f"Known posts in DB: {len(known)}")
|
||||
|
||||
session = CurlSession(impersonate='edge101')
|
||||
session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
|
||||
'X-IG-App-ID': '936619743392459',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': 'https://www.instagram.com/',
|
||||
'Origin': 'https://www.instagram.com',
|
||||
'Sec-CH-UA': '"Microsoft Edge";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
||||
'Sec-CH-UA-Mobile': '?0',
|
||||
'Sec-CH-UA-Platform': '"Windows"',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
})
|
||||
for c in cookie_list:
|
||||
name = c.get('name', '')
|
||||
value = c.get('value', '')
|
||||
domain = c.get('domain', '.instagram.com')
|
||||
if name and value:
|
||||
session.cookies.set(name, value, domain=domain)
|
||||
|
||||
max_id = None
|
||||
total_fetched = 0
|
||||
total_new = 0
|
||||
page = 0
|
||||
consecutive_errors = 0
|
||||
|
||||
while True:
|
||||
page += 1
|
||||
params = {'count': PER_PAGE}
|
||||
if max_id:
|
||||
params['max_id'] = max_id
|
||||
|
||||
try:
|
||||
resp = session.get(
|
||||
f'https://www.instagram.com/api/v1/feed/user/{IG_USER_ID}/',
|
||||
params=params,
|
||||
timeout=15
|
||||
)
|
||||
except Exception as e:
|
||||
print(f" Page {page}: request error: {e}")
|
||||
consecutive_errors += 1
|
||||
if consecutive_errors >= 3:
|
||||
print("Too many consecutive errors, stopping.")
|
||||
break
|
||||
time.sleep(5)
|
||||
continue
|
||||
|
||||
if resp.status_code != 200:
|
||||
print(f" Page {page}: HTTP {resp.status_code}")
|
||||
if resp.status_code == 401:
|
||||
print("Session expired! Stopping.")
|
||||
break
|
||||
if resp.status_code == 429:
|
||||
print("Rate limited. Waiting 60s...")
|
||||
time.sleep(60)
|
||||
continue
|
||||
consecutive_errors += 1
|
||||
if consecutive_errors >= 3:
|
||||
print("Too many consecutive errors, stopping.")
|
||||
break
|
||||
time.sleep(5)
|
||||
continue
|
||||
|
||||
consecutive_errors = 0
|
||||
data = resp.json()
|
||||
items = data.get('items', [])
|
||||
more = data.get('more_available', False)
|
||||
next_max_id = data.get('next_max_id')
|
||||
|
||||
if not items:
|
||||
print(f" Page {page}: no items returned, done.")
|
||||
break
|
||||
|
||||
total_fetched += len(items)
|
||||
page_new = 0
|
||||
|
||||
for node in items:
|
||||
code = node.get('code', '')
|
||||
if not code:
|
||||
continue
|
||||
if code in known:
|
||||
continue
|
||||
|
||||
post_data = node_to_post_row(node)
|
||||
if not post_data:
|
||||
continue
|
||||
|
||||
insert_post(conn, post_data)
|
||||
known.add(code)
|
||||
page_new += 1
|
||||
total_new += 1
|
||||
|
||||
print(f" Page {page}: {len(items)} items, {page_new} new (total: {total_fetched} fetched, {total_new} new)")
|
||||
|
||||
if not more or not next_max_id:
|
||||
print("No more pages available.")
|
||||
break
|
||||
|
||||
max_id = next_max_id
|
||||
time.sleep(SLEEP_BETWEEN)
|
||||
|
||||
# Save updated cookies
|
||||
save_cookies(conn, session)
|
||||
conn.close()
|
||||
|
||||
print(f"\nDone! Fetched {total_fetched} posts total, inserted {total_new} new posts.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
358
scripts/backfill_paid_content.py
Executable file
358
scripts/backfill_paid_content.py
Executable file
@@ -0,0 +1,358 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Backfill Paid Content from existing downloaded files.
|
||||
|
||||
This script:
|
||||
1. Scans a source directory containing previously downloaded content
|
||||
2. Matches files to posts/attachments in the database by ID
|
||||
3. Copies files to the proper download location
|
||||
4. Generates thumbnails
|
||||
5. Updates database records as if they were freshly downloaded
|
||||
|
||||
Usage:
|
||||
python3 backfill_paid_content.py /path/to/source/files --creator puffinasmr --platform fansly
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from io import BytesIO
|
||||
|
||||
# Add project root to path
|
||||
sys.path.insert(0, '/opt/media-downloader')
|
||||
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
|
||||
|
||||
def get_file_hash(file_path: Path) -> str:
|
||||
"""Compute SHA256 hash of file"""
|
||||
sha256 = hashlib.sha256()
|
||||
with open(file_path, 'rb') as f:
|
||||
for chunk in iter(lambda: f.read(8192), b''):
|
||||
sha256.update(chunk)
|
||||
return sha256.hexdigest()
|
||||
|
||||
|
||||
def generate_thumbnail(file_path: Path, file_type: str) -> bytes:
|
||||
"""Generate thumbnail for image or video"""
|
||||
try:
|
||||
if file_type == 'image':
|
||||
from PIL import Image
|
||||
img = Image.open(file_path)
|
||||
img.thumbnail((400, 400), Image.Resampling.LANCZOS)
|
||||
if img.mode in ('RGBA', 'P'):
|
||||
img = img.convert('RGB')
|
||||
buffer = BytesIO()
|
||||
img.save(buffer, format='JPEG', quality=85)
|
||||
return buffer.getvalue()
|
||||
elif file_type == 'video':
|
||||
# Use ffmpeg to extract a frame
|
||||
result = subprocess.run([
|
||||
'ffmpeg', '-i', str(file_path),
|
||||
'-ss', '00:00:01', # 1 second in
|
||||
'-vframes', '1',
|
||||
'-vf', 'scale=400:-1',
|
||||
'-f', 'image2pipe',
|
||||
'-vcodec', 'mjpeg',
|
||||
'-'
|
||||
], capture_output=True, timeout=30)
|
||||
if result.returncode == 0 and result.stdout:
|
||||
return result.stdout
|
||||
except Exception as e:
|
||||
print(f" Warning: Failed to generate thumbnail: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def get_file_type(filename: str) -> str:
|
||||
"""Determine file type from extension"""
|
||||
ext = Path(filename).suffix.lower()
|
||||
if ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp']:
|
||||
return 'image'
|
||||
elif ext in ['.mp4', '.webm', '.mov', '.avi', '.mkv', '.m4v']:
|
||||
return 'video'
|
||||
else:
|
||||
return 'other'
|
||||
|
||||
|
||||
def sanitize_filename(name: str) -> str:
|
||||
"""Sanitize string for use in filename/directory"""
|
||||
name = re.sub(r'[<>:"/\\|?*]', '', name)
|
||||
name = re.sub(r'\s+', '-', name.strip())
|
||||
return name or 'unnamed'
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Backfill paid content from existing files')
|
||||
parser.add_argument('source_dir', help='Source directory containing downloaded files')
|
||||
parser.add_argument('--creator', required=True, help='Creator username')
|
||||
parser.add_argument('--platform', required=True, help='Platform (fansly, onlyfans, etc.)')
|
||||
parser.add_argument('--dry-run', action='store_true', help='Show what would be done without making changes')
|
||||
parser.add_argument('--limit', type=int, help='Limit number of posts to process')
|
||||
args = parser.parse_args()
|
||||
|
||||
source_dir = Path(args.source_dir)
|
||||
if not source_dir.exists():
|
||||
print(f"Error: Source directory does not exist: {source_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
# Initialize database
|
||||
db = UnifiedDatabase()
|
||||
|
||||
# Get config for base download path
|
||||
with db.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT base_download_path FROM paid_content_config WHERE id = 1")
|
||||
row = cursor.fetchone()
|
||||
base_path = Path(row[0] if row else '/opt/immich/paid')
|
||||
|
||||
# Find the creator in database
|
||||
with db.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT id, username, platform, service_id
|
||||
FROM paid_content_creators
|
||||
WHERE LOWER(username) = LOWER(?) AND LOWER(platform) = LOWER(?)
|
||||
""", (args.creator, args.platform))
|
||||
creator = cursor.fetchone()
|
||||
|
||||
if not creator:
|
||||
print(f"Error: Creator '{args.creator}' on platform '{args.platform}' not found in database")
|
||||
sys.exit(1)
|
||||
|
||||
creator_id, username, platform, service_id = creator
|
||||
print(f"Found creator: {username} ({platform}) - ID: {creator_id}")
|
||||
|
||||
# Scan source directory for post folders
|
||||
post_folders = [d for d in source_dir.iterdir() if d.is_dir() and d.name.isdigit()]
|
||||
print(f"Found {len(post_folders)} post folders in source directory")
|
||||
|
||||
if args.limit:
|
||||
post_folders = post_folders[:args.limit]
|
||||
print(f"Limited to {args.limit} posts")
|
||||
|
||||
# Stats
|
||||
stats = {
|
||||
'posts_found': 0,
|
||||
'posts_matched': 0,
|
||||
'files_copied': 0,
|
||||
'files_skipped': 0,
|
||||
'thumbnails_generated': 0,
|
||||
'errors': 0
|
||||
}
|
||||
|
||||
for post_folder in post_folders:
|
||||
post_id = post_folder.name
|
||||
|
||||
# Find post in database
|
||||
with db.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT id, title, published_at
|
||||
FROM paid_content_posts
|
||||
WHERE creator_id = ? AND post_id = ?
|
||||
""", (creator_id, post_id))
|
||||
post = cursor.fetchone()
|
||||
|
||||
if not post:
|
||||
# Try partial match (post_id might be truncated in DB)
|
||||
with db.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT id, title, published_at, post_id
|
||||
FROM paid_content_posts
|
||||
WHERE creator_id = ? AND post_id LIKE ?
|
||||
""", (creator_id, f"{post_id[:12]}%"))
|
||||
post = cursor.fetchone()
|
||||
if post:
|
||||
post_id = post[3] # Use the full post_id from DB
|
||||
|
||||
if not post:
|
||||
print(f" Post {post_id}: Not found in database, skipping")
|
||||
continue
|
||||
|
||||
post_db_id, post_title, published_at = post[0], post[1], post[2]
|
||||
stats['posts_matched'] += 1
|
||||
|
||||
# Build destination directory - matches scraper's _build_file_path structure
|
||||
# Format: /base/platform/username/date/post_id/
|
||||
post_date = published_at[:10] if published_at else 'unknown-date'
|
||||
post_dir_name = post_id # Just post_id, no prefix
|
||||
dest_dir = base_path / platform / sanitize_filename(username) / post_date / post_dir_name
|
||||
|
||||
print(f" Post {post_id}: {post_title or '(no title)'}")
|
||||
print(f" -> {dest_dir}")
|
||||
|
||||
# Get attachments for this post
|
||||
with db.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT id, name, server_path, status, local_path, attachment_index
|
||||
FROM paid_content_attachments
|
||||
WHERE post_id = ?
|
||||
ORDER BY attachment_index
|
||||
""", (post_db_id,))
|
||||
attachments = cursor.fetchall()
|
||||
|
||||
# Scan files in source folder
|
||||
source_files = list(post_folder.iterdir())
|
||||
source_files = [f for f in source_files if f.is_file()]
|
||||
|
||||
print(f" Found {len(source_files)} files, {len(attachments)} attachments in DB")
|
||||
|
||||
for att in attachments:
|
||||
att_id, att_name, server_path, status, local_path, att_index = att
|
||||
|
||||
# Skip if already completed with valid local_path
|
||||
if status == 'completed' and local_path and Path(local_path).exists():
|
||||
print(f" [{att_index}] Already downloaded: {att_name}")
|
||||
stats['files_skipped'] += 1
|
||||
continue
|
||||
|
||||
# Try to find matching file in source
|
||||
# Files might be named with attachment ID or just the filename
|
||||
matching_file = None
|
||||
|
||||
# Extract potential file ID from server_path or name
|
||||
if server_path:
|
||||
# Server path like /27/37/2737100bd05f040ae0a0b10c452be9efdf54816577e53775b96b035eac200cde.jpg
|
||||
server_filename = Path(server_path).stem # Get hash without extension
|
||||
|
||||
for src_file in source_files:
|
||||
src_stem = src_file.stem
|
||||
src_name = src_file.name
|
||||
|
||||
# Match by various patterns
|
||||
if att_name and src_name == att_name:
|
||||
matching_file = src_file
|
||||
break
|
||||
if att_name and src_stem == Path(att_name).stem:
|
||||
matching_file = src_file
|
||||
break
|
||||
# Match by attachment ID in filename (Fansly style: 286246551964098560.png)
|
||||
if src_stem.isdigit():
|
||||
# Could be attachment ID
|
||||
if att_name and src_stem in att_name:
|
||||
matching_file = src_file
|
||||
break
|
||||
|
||||
if not matching_file:
|
||||
# Try to match by index
|
||||
if att_index < len(source_files):
|
||||
# Sort source files and pick by index
|
||||
sorted_files = sorted(source_files, key=lambda f: f.name)
|
||||
matching_file = sorted_files[att_index]
|
||||
print(f" [{att_index}] Matched by index: {matching_file.name}")
|
||||
|
||||
if not matching_file:
|
||||
print(f" [{att_index}] No matching file found for: {att_name}")
|
||||
stats['errors'] += 1
|
||||
continue
|
||||
|
||||
# Determine file type and extension
|
||||
file_type = get_file_type(matching_file.name)
|
||||
ext = matching_file.suffix or Path(att_name).suffix if att_name else '.bin'
|
||||
|
||||
# Build destination filename - matches scraper's _build_file_path
|
||||
# Fansly uses just media ID (unique), other platforms use index prefix
|
||||
if att_name:
|
||||
sanitized_name = sanitize_filename(att_name)
|
||||
# Ensure extension is preserved
|
||||
if not sanitized_name.lower().endswith(ext.lower()):
|
||||
sanitized_name = Path(att_name).stem + ext
|
||||
dest_filename = sanitized_name # Fansly: no index prefix needed
|
||||
else:
|
||||
# Fallback to source filename
|
||||
dest_filename = matching_file.name
|
||||
|
||||
dest_path = dest_dir / dest_filename
|
||||
|
||||
print(f" [{att_index}] {matching_file.name} -> {dest_filename}")
|
||||
|
||||
if args.dry_run:
|
||||
stats['files_copied'] += 1
|
||||
continue
|
||||
|
||||
# Create destination directory
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Copy file
|
||||
try:
|
||||
shutil.copy2(matching_file, dest_path)
|
||||
stats['files_copied'] += 1
|
||||
except Exception as e:
|
||||
print(f" Error copying file: {e}")
|
||||
stats['errors'] += 1
|
||||
continue
|
||||
|
||||
# Compute file hash
|
||||
file_hash = get_file_hash(dest_path)
|
||||
file_size = dest_path.stat().st_size
|
||||
|
||||
# Generate thumbnail
|
||||
thumbnail_data = generate_thumbnail(dest_path, file_type)
|
||||
if thumbnail_data:
|
||||
stats['thumbnails_generated'] += 1
|
||||
|
||||
# Update database
|
||||
now = datetime.now().isoformat()
|
||||
with db.get_connection(for_write=True) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
UPDATE paid_content_attachments
|
||||
SET status = 'completed',
|
||||
local_path = ?,
|
||||
local_filename = ?,
|
||||
file_hash = ?,
|
||||
file_size = ?,
|
||||
file_type = ?,
|
||||
downloaded_at = ?,
|
||||
thumbnail_data = ?
|
||||
WHERE id = ?
|
||||
""", (str(dest_path), dest_filename, file_hash, file_size, file_type, now, thumbnail_data, att_id))
|
||||
conn.commit()
|
||||
|
||||
# Update post downloaded status
|
||||
if not args.dry_run:
|
||||
with db.get_connection(for_write=True) as conn:
|
||||
cursor = conn.cursor()
|
||||
# Check if all attachments are now completed
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*) FROM paid_content_attachments
|
||||
WHERE post_id = ? AND status != 'completed'
|
||||
""", (post_db_id,))
|
||||
pending = cursor.fetchone()[0]
|
||||
|
||||
if pending == 0:
|
||||
cursor.execute("""
|
||||
UPDATE paid_content_posts
|
||||
SET downloaded = 1, download_date = ?
|
||||
WHERE id = ?
|
||||
""", (datetime.now().isoformat(), post_db_id))
|
||||
conn.commit()
|
||||
|
||||
stats['posts_found'] += 1
|
||||
|
||||
# Print summary
|
||||
print("\n" + "=" * 50)
|
||||
print("BACKFILL SUMMARY")
|
||||
print("=" * 50)
|
||||
print(f"Posts found in source: {len(post_folders)}")
|
||||
print(f"Posts matched in DB: {stats['posts_matched']}")
|
||||
print(f"Files copied: {stats['files_copied']}")
|
||||
print(f"Files skipped (existing): {stats['files_skipped']}")
|
||||
print(f"Thumbnails generated: {stats['thumbnails_generated']}")
|
||||
print(f"Errors: {stats['errors']}")
|
||||
|
||||
if args.dry_run:
|
||||
print("\n(Dry run - no changes made)")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
594
scripts/backfill_press.py
Normal file
594
scripts/backfill_press.py
Normal file
@@ -0,0 +1,594 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Backfill press articles from Google News RSS for the last year.
|
||||
|
||||
Google News RSS:
|
||||
- 100 articles per query (cap)
|
||||
- No rate limiting, no API key needed
|
||||
- ~12 months of history
|
||||
- Strategy: 1-week windows to stay under the 100 cap
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime, timedelta
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# Bootstrap database
|
||||
sys.path.insert(0, '/opt/media-downloader')
|
||||
import modules.db_bootstrap # noqa: E402,F401
|
||||
|
||||
from modules.universal_logger import get_logger
|
||||
|
||||
logger = get_logger('PressBackfill')
|
||||
|
||||
DB_PASSWORD = "PNsihOXvvuPwWiIvGlsc9Fh2YmMmB"
|
||||
WEEKS_BACK = 52
|
||||
|
||||
# Domains that return no content even with FlareSolverr
|
||||
SKIP_DOMAINS = {
|
||||
'msn.com',
|
||||
'news.google.com',
|
||||
'imdb.com',
|
||||
'st-aug.edu',
|
||||
}
|
||||
|
||||
|
||||
def fetch_google_news_window(name: str, start_date: str, end_date: str) -> list:
|
||||
"""Fetch Google News RSS articles for a specific time window.
|
||||
Returns list of dicts with: title, url, published_date, source."""
|
||||
query = f'%22{name.replace(" ", "+")}%22+after:{start_date}+before:{end_date}'
|
||||
url = f'https://news.google.com/rss/search?q={query}&hl=en&gl=US&ceid=US:en'
|
||||
|
||||
for attempt in range(3):
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||||
})
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
data = response.read().decode('utf-8')
|
||||
|
||||
root = ET.fromstring(data)
|
||||
articles = []
|
||||
for item in root.findall('.//item'):
|
||||
title_el = item.find('title')
|
||||
link_el = item.find('link')
|
||||
pub_el = item.find('pubDate')
|
||||
source_el = item.find('source')
|
||||
|
||||
if title_el is None or link_el is None:
|
||||
continue
|
||||
|
||||
title = title_el.text or ''
|
||||
# Google News titles often end with " - Source Name", strip it
|
||||
source_name = source_el.text if source_el is not None else ''
|
||||
if source_name and title.endswith(f' - {source_name}'):
|
||||
title = title[:-len(f' - {source_name}')].strip()
|
||||
|
||||
# Parse pubDate (RFC 2822 format)
|
||||
published_date = ''
|
||||
if pub_el is not None and pub_el.text:
|
||||
try:
|
||||
from email.utils import parsedate_to_datetime
|
||||
dt = parsedate_to_datetime(pub_el.text)
|
||||
published_date = dt.isoformat()
|
||||
except Exception:
|
||||
published_date = pub_el.text
|
||||
|
||||
articles.append({
|
||||
'title': title,
|
||||
'url': link_el.text or '',
|
||||
'published_date': published_date,
|
||||
'source': source_name,
|
||||
})
|
||||
return articles
|
||||
except Exception as e:
|
||||
if attempt < 2:
|
||||
time.sleep(5)
|
||||
continue
|
||||
print(f" Error fetching Google News: {e}")
|
||||
return []
|
||||
return []
|
||||
|
||||
|
||||
PRESS_IMAGE_CACHE = '/opt/media-downloader/data/press_images'
|
||||
os.makedirs(PRESS_IMAGE_CACHE, exist_ok=True)
|
||||
|
||||
|
||||
def cache_press_image(image_url: str) -> str | None:
|
||||
"""Download and cache an image locally. Returns API path."""
|
||||
if not image_url:
|
||||
return None
|
||||
|
||||
url_hash = hashlib.sha256(image_url.encode('utf-8')).hexdigest()[:16]
|
||||
|
||||
# Check if already cached
|
||||
for ext in ('.jpg', '.jpeg', '.png', '.webp', '.gif'):
|
||||
cached = os.path.join(PRESS_IMAGE_CACHE, f"{url_hash}{ext}")
|
||||
if os.path.exists(cached) and os.path.getsize(cached) > 0:
|
||||
return f"/api/press/images/{url_hash}{ext}"
|
||||
|
||||
# Download
|
||||
try:
|
||||
req = urllib.request.Request(image_url, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||
'Accept': 'image/*,*/*',
|
||||
})
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
image_data = resp.read()
|
||||
if len(image_data) < 1000:
|
||||
return None
|
||||
except Exception:
|
||||
# Try via FlareSolverr — but it can't fetch binary, so try fetching
|
||||
# the page and extracting the image URL that works
|
||||
return None
|
||||
|
||||
ext = '.jpg'
|
||||
url_lower = image_url.lower()
|
||||
if '.png' in url_lower:
|
||||
ext = '.png'
|
||||
elif '.webp' in url_lower:
|
||||
ext = '.webp'
|
||||
elif '.gif' in url_lower:
|
||||
ext = '.gif'
|
||||
|
||||
cached_path = os.path.join(PRESS_IMAGE_CACHE, f"{url_hash}{ext}")
|
||||
with open(cached_path, 'wb') as f:
|
||||
f.write(image_data)
|
||||
return f"/api/press/images/{url_hash}{ext}"
|
||||
|
||||
|
||||
def cache_content_images(html_content: str) -> str:
|
||||
"""Find all <img ...> in HTML content, cache each image locally,
|
||||
and rewrite src to /api/press/images/... proxy path.
|
||||
Removes img tags where caching fails (broken > missing)."""
|
||||
if not html_content:
|
||||
return html_content
|
||||
import re as _re
|
||||
def _replace_img(match):
|
||||
full_tag = match.group(0)
|
||||
src = match.group(1)
|
||||
if not src or src.startswith('/api/press/images/'):
|
||||
return full_tag
|
||||
cached = cache_press_image(src)
|
||||
if cached:
|
||||
return full_tag.replace(src, cached)
|
||||
return '' # Remove img if caching failed
|
||||
return _re.sub(r'<img\s+src="([^"]+)"[^>]*>', _replace_img, html_content)
|
||||
|
||||
|
||||
def decode_google_news_url(google_url: str) -> str | None:
|
||||
"""Decode a Google News redirect URL to the real article URL."""
|
||||
if 'news.google.com' not in google_url:
|
||||
return google_url
|
||||
try:
|
||||
from googlenewsdecoder import gnewsdecoder
|
||||
result = gnewsdecoder(google_url, interval=1)
|
||||
if result.get('status'):
|
||||
return result['decoded_url']
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def extract_content(article_url: str) -> tuple[str | None, str | None]:
|
||||
"""Extract article content and og:image from the real article URL.
|
||||
Tries direct fetch first, falls back to FlareSolverr for bot-protected sites.
|
||||
Returns (content_html, image_url)."""
|
||||
content, image = _extract_content_direct(article_url)
|
||||
if content:
|
||||
return (content, image)
|
||||
# Fallback to FlareSolverr for bot-protected sites
|
||||
content2, image2 = _extract_content_flaresolverr(article_url)
|
||||
return (content2, image2 or image)
|
||||
|
||||
|
||||
def _fetch_html_flaresolverr(url: str) -> str | None:
|
||||
"""Fetch HTML via FlareSolverr (headless browser)."""
|
||||
try:
|
||||
import requests
|
||||
resp = requests.post('http://localhost:8191/v1', json={
|
||||
'cmd': 'request.get',
|
||||
'url': url,
|
||||
'maxTimeout': 30000
|
||||
}, timeout=45)
|
||||
data = resp.json()
|
||||
if data.get('status') == 'ok':
|
||||
html = data.get('solution', {}).get('response', '')
|
||||
if len(html) > 500:
|
||||
return html
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _extract_content_flaresolverr(url: str) -> tuple[str | None, str | None]:
|
||||
"""Extract content using FlareSolverr as fetcher."""
|
||||
raw_html = _fetch_html_flaresolverr(url)
|
||||
if not raw_html:
|
||||
return (None, None)
|
||||
return _parse_article_html(raw_html, url)
|
||||
|
||||
|
||||
def _extract_content_direct(url: str) -> tuple[str | None, str | None]:
|
||||
"""Self-contained article extraction. Returns (content_html, image_url)."""
|
||||
import urllib.request
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
||||
})
|
||||
with urllib.request.urlopen(req, timeout=20) as response:
|
||||
raw_html = response.read().decode('utf-8', errors='replace')
|
||||
return _parse_article_html(raw_html, url)
|
||||
except Exception:
|
||||
return (None, None)
|
||||
|
||||
|
||||
def _parse_article_html(raw_html: str, url: str) -> tuple[str | None, str | None]:
|
||||
"""Parse raw HTML into article content and og:image. Returns (content_html, image_url)."""
|
||||
import re
|
||||
from urllib.parse import urljoin
|
||||
|
||||
try:
|
||||
|
||||
from readability import Document
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# Extract og:image for thumbnail
|
||||
og_soup = BeautifulSoup(raw_html, 'html.parser')
|
||||
og_image = None
|
||||
og_tag = og_soup.find('meta', property='og:image')
|
||||
if og_tag and og_tag.get('content'):
|
||||
og_image = og_tag['content']
|
||||
if not og_image:
|
||||
tw_tag = og_soup.find('meta', attrs={'name': 'twitter:image'})
|
||||
if tw_tag and tw_tag.get('content'):
|
||||
og_image = tw_tag['content']
|
||||
import bleach
|
||||
|
||||
doc = Document(raw_html, url=url)
|
||||
content_html = doc.summary()
|
||||
|
||||
if not content_html or len(content_html.strip()) < 50:
|
||||
return (None, og_image)
|
||||
|
||||
reader_soup = BeautifulSoup(content_html, 'html.parser')
|
||||
|
||||
junk_text_re = re.compile(
|
||||
r'(^published|^updated|^modified|^posted|^by\s|^written by|^photo:|^image:|^credit:|'
|
||||
r'share or comment|share this article|comment on this|follow us on|'
|
||||
r'sign up for|subscribe to|have you got a story|tips@|email us)',
|
||||
re.I
|
||||
)
|
||||
|
||||
inline_tags = ['b', 'i', 'em', 'strong', 'a', 'br']
|
||||
inline_attrs = {'a': ['href']}
|
||||
|
||||
html_parts = []
|
||||
for el in reader_soup.find_all(['p', 'h2', 'h3', 'h4', 'blockquote', 'ul', 'ol']):
|
||||
text = el.get_text(strip=True)
|
||||
if len(text) < 30:
|
||||
continue
|
||||
if junk_text_re.search(text):
|
||||
continue
|
||||
tag = el.name
|
||||
inner = bleach.clean(
|
||||
el.decode_contents(), tags=inline_tags,
|
||||
attributes=inline_attrs, strip=True, protocols=['http', 'https']
|
||||
).strip()
|
||||
if not inner:
|
||||
continue
|
||||
if tag == 'p':
|
||||
html_parts.append(f'<p>{inner}</p>')
|
||||
elif tag in ('h2', 'h3', 'h4'):
|
||||
html_parts.append(f'<{tag}>{inner}</{tag}>')
|
||||
elif tag == 'blockquote':
|
||||
html_parts.append(f'<blockquote><p>{inner}</p></blockquote>')
|
||||
elif tag in ('ul', 'ol'):
|
||||
items = []
|
||||
for li in el.find_all('li', recursive=False):
|
||||
li_inner = bleach.clean(
|
||||
li.decode_contents(), tags=inline_tags,
|
||||
attributes=inline_attrs, strip=True, protocols=['http', 'https']
|
||||
).strip()
|
||||
if li_inner and len(li.get_text(strip=True)) > 10:
|
||||
items.append(f'<li>{li_inner}</li>')
|
||||
if items:
|
||||
html_parts.append(f'<{tag}>{"".join(items)}</{tag}>')
|
||||
|
||||
# Images from readability
|
||||
junk_img_re = re.compile(r'(logo|icon|pixel|spacer|blank|1x1|svg|avatar|spinner|/ct/|cts\.businesswire)', re.I)
|
||||
seen_srcs = set()
|
||||
article_images = []
|
||||
for img in reader_soup.find_all('img'):
|
||||
src = img.get('src', '')
|
||||
if src and src.startswith(('http://', 'https://')) and src not in seen_srcs:
|
||||
if junk_img_re.search(src):
|
||||
continue
|
||||
seen_srcs.add(src)
|
||||
alt = (img.get('alt', '') or '').strip()
|
||||
article_images.append(f'<img src="{bleach.clean(src)}" alt="{bleach.clean(alt)}">')
|
||||
|
||||
# If readability found no images, grab first real image from original HTML
|
||||
if not article_images:
|
||||
orig_soup = BeautifulSoup(raw_html, 'html.parser')
|
||||
for noise in orig_soup.find_all(['script', 'style', 'nav', 'footer', 'header',
|
||||
'aside', 'form', 'noscript', 'svg']):
|
||||
noise.decompose()
|
||||
for img in orig_soup.find_all('img'):
|
||||
src = (img.get('data-src') or img.get('data-lazy-src') or
|
||||
img.get('data-original') or img.get('src') or '')
|
||||
if not src or not src.startswith(('http://', 'https://')):
|
||||
continue
|
||||
src_lower = src.lower()
|
||||
if any(x in src_lower for x in ('logo', 'icon', 'pixel', 'spacer', 'blank',
|
||||
'1x1', 'svg', 'avatar', 'spinner', '/ct/')):
|
||||
continue
|
||||
alt = (img.get('alt', '') or '').strip()
|
||||
article_images.append(f'<img src="{bleach.clean(src)}" alt="{bleach.clean(alt)}">')
|
||||
break # Only first real image
|
||||
|
||||
# Merge text + images
|
||||
if article_images and html_parts:
|
||||
text_count = len(html_parts)
|
||||
img_count = len(article_images)
|
||||
interval = max(1, text_count // (img_count + 1))
|
||||
merged = []
|
||||
img_idx = 0
|
||||
for i, part in enumerate(html_parts):
|
||||
merged.append(part)
|
||||
if img_idx < img_count and (i + 1) % interval == 0:
|
||||
merged.append(article_images[img_idx])
|
||||
img_idx += 1
|
||||
while img_idx < img_count:
|
||||
merged.append(article_images[img_idx])
|
||||
img_idx += 1
|
||||
html_parts = merged
|
||||
elif article_images and not html_parts:
|
||||
html_parts = article_images
|
||||
|
||||
if not html_parts:
|
||||
text = reader_soup.get_text(separator='\n\n', strip=True)
|
||||
if text:
|
||||
for para in text.split('\n\n'):
|
||||
para = para.strip()
|
||||
if len(para) > 30:
|
||||
html_parts.append(f'<p>{bleach.clean(para)}</p>')
|
||||
|
||||
if not html_parts:
|
||||
return (None, og_image)
|
||||
|
||||
# Quality check
|
||||
from bs4 import BeautifulSoup as BS
|
||||
clean_parts = []
|
||||
for part in html_parts:
|
||||
part_soup = BS(part, 'html.parser')
|
||||
part_text = part_soup.get_text(strip=True)
|
||||
if len(part_text) > 100:
|
||||
words = part_text.split()
|
||||
avg_word_len = len(part_text) / max(len(words), 1)
|
||||
if avg_word_len > 12:
|
||||
continue
|
||||
clean_parts.append(part)
|
||||
|
||||
if not clean_parts:
|
||||
return (None, og_image)
|
||||
|
||||
result = '\n'.join(clean_parts)
|
||||
plain_text = BS(result, 'html.parser').get_text(separator=' ', strip=True)
|
||||
|
||||
garbage_re = re.compile(
|
||||
r'(use (left|right|escape)|arrow keys|navigate between|'
|
||||
r'sign (in|up) with|we won.t post|social account|'
|
||||
r'accept cookies|cookie policy|privacy policy|terms of (use|service)|'
|
||||
r'AlabamaAlaska|CaliforniaColorado|United States of America)',
|
||||
re.I
|
||||
)
|
||||
if len(plain_text) < 200 or garbage_re.search(plain_text):
|
||||
return (None, og_image)
|
||||
|
||||
return (result, og_image)
|
||||
except Exception:
|
||||
return (None, None)
|
||||
|
||||
|
||||
def main():
|
||||
# Get configured celebrities
|
||||
env = os.environ.copy()
|
||||
env['PGPASSWORD'] = DB_PASSWORD
|
||||
|
||||
result = subprocess.run(
|
||||
['psql', '-h', 'localhost', '-U', 'media_downloader', '-d', 'media_downloader',
|
||||
'-tAc', "SELECT celebrity_ids FROM press_config WHERE id = 1"],
|
||||
capture_output=True, text=True, env=env
|
||||
)
|
||||
celebrity_ids = json.loads(result.stdout.strip()) if result.stdout.strip() else []
|
||||
|
||||
if not celebrity_ids:
|
||||
print("No celebrities configured in press_config")
|
||||
return
|
||||
|
||||
# Get celebrity names
|
||||
placeholders = ','.join(str(i) for i in celebrity_ids)
|
||||
result = subprocess.run(
|
||||
['psql', '-h', 'localhost', '-U', 'media_downloader', '-d', 'media_downloader',
|
||||
'-tAF', '|', '-c', f"SELECT id, name FROM celebrity_profiles WHERE id IN ({placeholders})"],
|
||||
capture_output=True, text=True, env=env
|
||||
)
|
||||
celebrities = []
|
||||
for line in result.stdout.strip().splitlines():
|
||||
if '|' in line:
|
||||
parts = line.split('|')
|
||||
celebrities.append({'id': int(parts[0]), 'name': parts[1].strip()})
|
||||
|
||||
if not celebrities:
|
||||
print("No celebrities found")
|
||||
return
|
||||
|
||||
# Get existing URL hashes for dedup
|
||||
result = subprocess.run(
|
||||
['psql', '-h', 'localhost', '-U', 'media_downloader', '-d', 'media_downloader',
|
||||
'-tAc', "SELECT url_hash FROM press_articles"],
|
||||
capture_output=True, text=True, env=env
|
||||
)
|
||||
existing_hashes = set(line.strip() for line in result.stdout.strip().splitlines() if line.strip())
|
||||
print(f"Existing articles: {len(existing_hashes)}")
|
||||
|
||||
# Also get existing titles per celebrity for dedup
|
||||
result = subprocess.run(
|
||||
['psql', '-h', 'localhost', '-U', 'media_downloader', '-d', 'media_downloader',
|
||||
'-tAF', '|', '-c', "SELECT celebrity_id, title FROM press_articles"],
|
||||
capture_output=True, text=True, env=env
|
||||
)
|
||||
existing_titles = set()
|
||||
for line in result.stdout.strip().splitlines():
|
||||
if '|' in line:
|
||||
parts = line.split('|', 1)
|
||||
existing_titles.add((parts[0].strip(), parts[1].strip()))
|
||||
|
||||
now = datetime.now()
|
||||
total_new = 0
|
||||
total_fetched = 0
|
||||
|
||||
for celeb in celebrities:
|
||||
celeb_id = celeb['id']
|
||||
celeb_name = celeb['name']
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Backfilling: {celeb_name} (id={celeb_id})")
|
||||
print(f"{'='*60}")
|
||||
|
||||
celeb_new = 0
|
||||
|
||||
# Query in 1-week windows going back
|
||||
for week in range(WEEKS_BACK):
|
||||
end_dt = now - timedelta(weeks=week)
|
||||
start_dt = now - timedelta(weeks=week + 1)
|
||||
|
||||
start_str = start_dt.strftime('%Y-%m-%d')
|
||||
end_str = end_dt.strftime('%Y-%m-%d')
|
||||
week_label = f"Week -{week+1} ({start_dt.strftime('%b %d')} - {end_dt.strftime('%b %d')})"
|
||||
print(f"\n {week_label}...", end='', flush=True)
|
||||
|
||||
articles = fetch_google_news_window(celeb_name, start_str, end_str)
|
||||
total_fetched += len(articles)
|
||||
|
||||
if not articles:
|
||||
print(f" no articles")
|
||||
continue
|
||||
|
||||
# Warn if we hit the 100 cap (may be missing articles)
|
||||
cap_warning = " [HIT 100 CAP]" if len(articles) >= 100 else ""
|
||||
print(f" {len(articles)} found{cap_warning}", flush=True)
|
||||
week_new = 0
|
||||
|
||||
for article in articles:
|
||||
google_url = article.get('url', '')
|
||||
if not google_url:
|
||||
continue
|
||||
|
||||
title = article.get('title', '').strip()
|
||||
if title and (str(celeb_id), title) in existing_titles:
|
||||
continue
|
||||
|
||||
# Only keep articles where celeb name appears in the title
|
||||
if not title or celeb_name.lower() not in title.lower():
|
||||
continue
|
||||
|
||||
# Decode Google News URL to real article URL
|
||||
article_url = decode_google_news_url(google_url)
|
||||
if not article_url:
|
||||
continue
|
||||
|
||||
# Skip domains that are JS-rendered or block scrapers
|
||||
parsed_check = urlparse(article_url)
|
||||
host = parsed_check.netloc.lower()
|
||||
# Check if host or any parent domain is in SKIP_DOMAINS
|
||||
if any(host == d or host.endswith('.' + d) for d in SKIP_DOMAINS):
|
||||
continue
|
||||
|
||||
url_hash = hashlib.sha256(article_url.encode('utf-8')).hexdigest()
|
||||
if url_hash in existing_hashes:
|
||||
continue
|
||||
|
||||
# Parse domain from real URL
|
||||
parsed = urlparse(article_url)
|
||||
domain = parsed.netloc.replace('www.', '')
|
||||
|
||||
published_date = article.get('published_date', '')
|
||||
source = article.get('source', '')
|
||||
|
||||
# Extract content and og:image (with rate limiting to be polite)
|
||||
content, og_image = extract_content(article_url)
|
||||
|
||||
# Cache all inline images in the content to local proxy
|
||||
if content:
|
||||
content = cache_content_images(content)
|
||||
|
||||
if content:
|
||||
import re as _re3
|
||||
snippet = _re3.sub(r'<[^>]+>', ' ', content)
|
||||
snippet = ' '.join(snippet.split())[:300]
|
||||
else:
|
||||
snippet = title[:300] if title else ''
|
||||
|
||||
# Cache the og:image locally, fall back to first inline image
|
||||
image_url = cache_press_image(og_image) if og_image else None
|
||||
if not image_url and content:
|
||||
import re as _re2
|
||||
m = _re2.search(r'<img\s+src="(/api/press/images/[^"]+)"', content)
|
||||
if m:
|
||||
image_url = m.group(1)
|
||||
time.sleep(0.5)
|
||||
|
||||
# Insert using parameterized query via psycopg2
|
||||
import psycopg2
|
||||
try:
|
||||
pg_conn = psycopg2.connect(
|
||||
host='localhost', user='media_downloader',
|
||||
password=env.get('PGPASSWORD', ''), dbname='media_downloader'
|
||||
)
|
||||
pg_cur = pg_conn.cursor()
|
||||
pg_cur.execute("""INSERT INTO press_articles
|
||||
(celebrity_id, title, url, url_hash, domain, published_date,
|
||||
image_url, language, country, article_content, snippet, notified, read)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, 'en', '', %s, %s, 1, 0)
|
||||
ON CONFLICT DO NOTHING""",
|
||||
(celeb_id, title, article_url, url_hash, domain,
|
||||
published_date, image_url or '', content, snippet))
|
||||
inserted = pg_cur.rowcount > 0
|
||||
pg_conn.commit()
|
||||
pg_cur.close()
|
||||
pg_conn.close()
|
||||
except Exception as db_err:
|
||||
print(f" DB error: {db_err}")
|
||||
inserted = False
|
||||
if inserted:
|
||||
week_new += 1
|
||||
existing_hashes.add(url_hash)
|
||||
existing_titles.add((str(celeb_id), title))
|
||||
|
||||
if week_new > 0:
|
||||
print(f" Added {week_new} new articles")
|
||||
celeb_new += week_new
|
||||
|
||||
# Small delay between queries to be polite
|
||||
time.sleep(1)
|
||||
|
||||
total_new += celeb_new
|
||||
print(f"\n {celeb_name}: {celeb_new} new articles added")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"DONE: Fetched {total_fetched} total, added {total_new} new articles")
|
||||
print(f"{'='*60}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
404
scripts/backfill_tagged_users.py
Executable file
404
scripts/backfill_tagged_users.py
Executable file
@@ -0,0 +1,404 @@
|
||||
#!/opt/media-downloader/venv/bin/python3 -u
|
||||
"""
|
||||
One-off script to backfill tagged users for existing Instagram posts.
|
||||
|
||||
Tries imginn.com first (no rate limits), then falls back to instagram.com
|
||||
for posts where imginn didn't have tag data.
|
||||
|
||||
Tracks checked posts in a local file so re-runs skip already-checked posts
|
||||
without polluting the database.
|
||||
|
||||
Usage:
|
||||
python3 /opt/media-downloader/scripts/backfill_tagged_users.py [--creator-id N] [--limit N] [--dry-run]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
sys.path.insert(0, '/opt/media-downloader')
|
||||
import modules.db_bootstrap # noqa: F401 — triggers pgadapter monkey-patch
|
||||
|
||||
import sqlite3
|
||||
|
||||
CHECKED_FILE = '/tmp/backfill_checked_posts.txt'
|
||||
NORDVPN_CREDS = 'Dc9mgrpJnFnkTtc5iQkGNwLM:fKd2ZEjBUJ3YDQ5hhcoTKsnW'
|
||||
NORDVPN_SERVERS = [
|
||||
'us.socks.nordhold.net',
|
||||
'nl.socks.nordhold.net',
|
||||
'se.socks.nordhold.net',
|
||||
'amsterdam.nl.socks.nordhold.net',
|
||||
'atlanta.us.socks.nordhold.net',
|
||||
'chicago.us.socks.nordhold.net',
|
||||
'dallas.us.socks.nordhold.net',
|
||||
'los-angeles.us.socks.nordhold.net',
|
||||
'new-york.us.socks.nordhold.net',
|
||||
'stockholm.se.socks.nordhold.net',
|
||||
]
|
||||
_proxy_index = 0
|
||||
|
||||
|
||||
def get_current_proxy():
|
||||
"""Return the current proxy without rotating."""
|
||||
server = NORDVPN_SERVERS[_proxy_index % len(NORDVPN_SERVERS)]
|
||||
return {'https': f'socks5://{NORDVPN_CREDS}@{server}:1080'}
|
||||
|
||||
|
||||
def rotate_proxy():
|
||||
"""Switch to the next server. Call on rate limit or persistent errors."""
|
||||
global _proxy_index
|
||||
_proxy_index += 1
|
||||
server = NORDVPN_SERVERS[_proxy_index % len(NORDVPN_SERVERS)]
|
||||
print(f' Rotating proxy -> {server}', flush=True)
|
||||
return {'https': f'socks5://{NORDVPN_CREDS}@{server}:1080'}
|
||||
|
||||
|
||||
def get_next_proxy():
|
||||
"""Legacy: rotate and return. Only used in retry path."""
|
||||
return rotate_proxy()
|
||||
|
||||
|
||||
def load_checked_posts():
|
||||
"""Load set of already-checked post DB IDs from local file."""
|
||||
if not os.path.exists(CHECKED_FILE):
|
||||
return set()
|
||||
with open(CHECKED_FILE, 'r') as f:
|
||||
return {int(line.strip()) for line in f if line.strip().isdigit()}
|
||||
|
||||
|
||||
def save_checked_post(db_id):
|
||||
"""Append a checked post ID to the local tracking file."""
|
||||
with open(CHECKED_FILE, 'a') as f:
|
||||
f.write(f'{db_id}\n')
|
||||
|
||||
|
||||
def get_session():
|
||||
"""Get a curl_cffi session with browser impersonation."""
|
||||
from curl_cffi.requests import Session
|
||||
|
||||
# Try multiple browser versions for curl_cffi compatibility
|
||||
for _browser in ("chrome131", "chrome136", "chrome"):
|
||||
try:
|
||||
session = Session(impersonate=_browser)
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
else:
|
||||
session = Session()
|
||||
session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
|
||||
'(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
'Referer': 'https://www.instagram.com/',
|
||||
})
|
||||
return session
|
||||
|
||||
|
||||
def get_posts_without_tags(conn, creator_id=None, limit=0):
|
||||
"""Get Instagram posts without tagged users, newest first."""
|
||||
query = """
|
||||
SELECT p.id, p.post_id, p.creator_id, c.username
|
||||
FROM paid_content_posts p
|
||||
JOIN paid_content_creators c ON p.creator_id = c.id
|
||||
LEFT JOIN paid_content_post_tagged_users tu ON tu.post_id = p.id
|
||||
WHERE c.service_id = 'instagram'
|
||||
AND p.deleted_at IS NULL
|
||||
AND p.post_id NOT LIKE 'story_%%'
|
||||
AND p.post_id NOT LIKE 'highlight_%%'
|
||||
AND tu.post_id IS NULL
|
||||
"""
|
||||
params = []
|
||||
if creator_id:
|
||||
query += " AND p.creator_id = ?"
|
||||
params.append(creator_id)
|
||||
query += " ORDER BY p.id ASC"
|
||||
if limit:
|
||||
query += f" LIMIT {limit}"
|
||||
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(query, params)
|
||||
return [(row[0], row[1], row[2], row[3]) for row in cursor.fetchall()]
|
||||
|
||||
|
||||
def save_tagged_users(conn, post_db_id, usernames):
|
||||
"""Save tagged users for a post."""
|
||||
cursor = conn.cursor()
|
||||
now = time.strftime('%Y-%m-%dT%H:%M:%S')
|
||||
for username in usernames:
|
||||
cursor.execute("""
|
||||
INSERT INTO paid_content_post_tagged_users (post_id, username, created_at)
|
||||
VALUES (?, ?, ?)
|
||||
ON CONFLICT DO NOTHING
|
||||
""", (post_db_id, username, now))
|
||||
conn.commit()
|
||||
|
||||
|
||||
def extract_usertags_from_item(item):
|
||||
"""Extract tagged usernames from an Instagram media item dict."""
|
||||
tagged = []
|
||||
|
||||
# Main item usertags
|
||||
usertags = item.get('usertags') or {}
|
||||
for tag in usertags.get('in', []):
|
||||
username = (tag.get('user') or {}).get('username')
|
||||
if username and username not in tagged:
|
||||
tagged.append(username)
|
||||
|
||||
# Carousel item usertags
|
||||
for cm in item.get('carousel_media') or []:
|
||||
cm_usertags = cm.get('usertags') or {}
|
||||
for tag in cm_usertags.get('in', []):
|
||||
username = (tag.get('user') or {}).get('username')
|
||||
if username and username not in tagged:
|
||||
tagged.append(username)
|
||||
|
||||
return tagged
|
||||
|
||||
|
||||
def create_flaresolverr_session():
|
||||
"""Create a persistent FlareSolverr session for imginn requests."""
|
||||
import requests as std_requests
|
||||
|
||||
try:
|
||||
resp = std_requests.post('http://localhost:8191/v1', json={
|
||||
'cmd': 'sessions.create',
|
||||
}, timeout=30)
|
||||
session_id = resp.json().get('session')
|
||||
if session_id:
|
||||
print(f" FlareSolverr session: {session_id}")
|
||||
return session_id
|
||||
except Exception as e:
|
||||
print(f" FlareSolverr unavailable: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def destroy_flaresolverr_session(session_id):
|
||||
"""Clean up FlareSolverr session."""
|
||||
if not session_id:
|
||||
return
|
||||
import requests as std_requests
|
||||
try:
|
||||
std_requests.post('http://localhost:8191/v1', json={
|
||||
'cmd': 'sessions.destroy',
|
||||
'session': session_id,
|
||||
}, timeout=10)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def fetch_usertags_imginn(shortcode, flaresolverr_session=None):
|
||||
"""Try fetching usertags from imginn.com via FlareSolverr."""
|
||||
import requests as std_requests
|
||||
|
||||
url = f'https://imginn.com/p/{shortcode}/'
|
||||
|
||||
try:
|
||||
payload = {
|
||||
'cmd': 'request.get',
|
||||
'url': url,
|
||||
'maxTimeout': 60000,
|
||||
}
|
||||
if flaresolverr_session:
|
||||
payload['session'] = flaresolverr_session
|
||||
|
||||
resp = std_requests.post('http://localhost:8191/v1', json=payload, timeout=70)
|
||||
|
||||
data = resp.json()
|
||||
if data.get('status') != 'ok':
|
||||
return None, 'imginn_flaresolverr_fail'
|
||||
|
||||
html = data.get('solution', {}).get('response', '')
|
||||
if not html:
|
||||
return None, 'imginn_empty'
|
||||
|
||||
# Parse tagged users from imginn HTML (same logic as instagram_adapter)
|
||||
tagged = []
|
||||
idx = html.find('class="tagged-user-list"')
|
||||
if idx < 0:
|
||||
return [], 'imginn_no_tags'
|
||||
|
||||
chunk = html[idx:idx + 5000]
|
||||
for m in re.finditer(r'class="name">\s*(\S+)\s*</div>', chunk):
|
||||
username = m.group(1).strip()
|
||||
if re.match(r'^[a-zA-Z0-9_.]{1,30}$', username):
|
||||
tagged.append(username)
|
||||
|
||||
return tagged, 'imginn_ok'
|
||||
|
||||
except Exception as e:
|
||||
return None, f'imginn_error: {e}'
|
||||
|
||||
|
||||
def fetch_usertags_instagram(session, shortcode, max_retries=3):
|
||||
"""Fetch usertags from Instagram post page HTML via proxy.
|
||||
Sticks with current proxy server; only rotates on connection errors or rate limits."""
|
||||
url = f'https://www.instagram.com/p/{shortcode}/'
|
||||
last_error = None
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
proxy = get_current_proxy() if attempt == 0 else rotate_proxy()
|
||||
server = list(proxy.values())[0].split('@')[1].split(':')[0]
|
||||
resp = session.get(url, timeout=20, proxies=proxy)
|
||||
|
||||
if resp.status_code == 429:
|
||||
rotate_proxy()
|
||||
return None, 'rate_limited'
|
||||
if resp.status_code == 404:
|
||||
return None, 'not_found'
|
||||
if resp.status_code != 200:
|
||||
last_error = f'http_{resp.status_code}'
|
||||
print(f' retry {attempt+1}/{max_retries}: http {resp.status_code} via {server}', flush=True)
|
||||
continue
|
||||
|
||||
html = resp.text
|
||||
|
||||
# Find embedded post JSON in the page
|
||||
idx = html.find('xdt_api__v1__media__shortcode__web_info')
|
||||
if idx < 0:
|
||||
if 'LoginAndSignupPage' in html or '"require_login":true' in html:
|
||||
return None, 'login_required'
|
||||
return None, 'no_embedded_data'
|
||||
|
||||
# Find the items JSON object
|
||||
items_start = html.find('{"items":[{"code"', idx)
|
||||
if items_start < 0:
|
||||
return None, 'no_items_data'
|
||||
|
||||
# Find balanced braces to extract the full JSON
|
||||
depth = 0
|
||||
end = items_start
|
||||
for i in range(items_start, min(items_start + 500000, len(html))):
|
||||
if html[i] == '{':
|
||||
depth += 1
|
||||
elif html[i] == '}':
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
end = i + 1
|
||||
break
|
||||
|
||||
data = json.loads(html[items_start:end])
|
||||
items = data.get('items', [])
|
||||
if not items:
|
||||
return [], 'ok'
|
||||
|
||||
tagged = extract_usertags_from_item(items[0])
|
||||
return tagged, 'ok'
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
return None, f'json_error: {e}'
|
||||
except Exception as e:
|
||||
last_error = str(e)
|
||||
short_err = str(e).split('.')[0][:80]
|
||||
print(f' retry {attempt+1}/{max_retries}: {short_err} via {server}', flush=True)
|
||||
# Connection/proxy errors → retry with next server
|
||||
continue
|
||||
|
||||
return None, last_error or 'max_retries'
|
||||
|
||||
|
||||
def fetch_post_usertags(session, shortcode, flaresolverr_session=None):
|
||||
"""Fetch usertags from Instagram via proxy."""
|
||||
return fetch_usertags_instagram(session, shortcode)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Backfill tagged users for Instagram posts')
|
||||
parser.add_argument('--dry-run', action='store_true', help='Show what would be done without saving')
|
||||
parser.add_argument('--creator-id', type=int, help='Only process a specific creator')
|
||||
parser.add_argument('--limit', type=int, default=0, help='Max posts to process (0 = all)')
|
||||
args = parser.parse_args()
|
||||
|
||||
conn = sqlite3.connect('media_downloader')
|
||||
posts = get_posts_without_tags(conn, args.creator_id, args.limit)
|
||||
|
||||
if not posts:
|
||||
print("No posts need tagged user backfill.")
|
||||
return
|
||||
|
||||
# Filter out already-checked posts (from previous runs)
|
||||
checked = load_checked_posts()
|
||||
if checked:
|
||||
before = len(posts)
|
||||
posts = [(db_id, sc, cid, u) for db_id, sc, cid, u in posts if db_id not in checked]
|
||||
if before != len(posts):
|
||||
print(f"Skipping {before - len(posts)} already-checked posts from previous runs")
|
||||
|
||||
if not posts:
|
||||
print("All posts already checked.")
|
||||
return
|
||||
|
||||
# Group by creator for display
|
||||
creators = {}
|
||||
for db_id, shortcode, creator_id, username in posts:
|
||||
if creator_id not in creators:
|
||||
creators[creator_id] = {'username': username, 'count': 0}
|
||||
creators[creator_id]['count'] += 1
|
||||
|
||||
print(f"Found {len(posts)} Instagram posts to check across {len(creators)} creators")
|
||||
for cid, info in creators.items():
|
||||
print(f" @{info['username']}: {info['count']} posts")
|
||||
if args.dry_run:
|
||||
print("DRY RUN - no changes will be saved\n")
|
||||
|
||||
session = get_session()
|
||||
|
||||
total_tagged = 0
|
||||
total_no_tags = 0
|
||||
total_errors = 0
|
||||
rate_limit_wait = 60
|
||||
|
||||
try:
|
||||
for i, (db_id, shortcode, creator_id, username) in enumerate(posts):
|
||||
if i > 0:
|
||||
time.sleep(2)
|
||||
print(f" [{i+1}/{len(posts)}] @{username} {shortcode}: ", end='', flush=True)
|
||||
tagged, status = fetch_post_usertags(session, shortcode)
|
||||
|
||||
if status == 'rate_limited':
|
||||
print(f"rate limited, waiting {rate_limit_wait}s...", flush=True)
|
||||
time.sleep(rate_limit_wait)
|
||||
rate_limit_wait = min(rate_limit_wait * 2, 600)
|
||||
print(f" [{i+1}/{len(posts)}] @{username} {shortcode}: ", end='', flush=True)
|
||||
tagged, status = fetch_post_usertags(session, shortcode)
|
||||
|
||||
if tagged is None:
|
||||
print(f"ERROR {status}", flush=True)
|
||||
total_errors += 1
|
||||
if status == 'login_required':
|
||||
print(" Instagram is requiring login. Stopping.")
|
||||
break
|
||||
continue
|
||||
|
||||
# Reset rate limit backoff on success
|
||||
rate_limit_wait = 60
|
||||
|
||||
if tagged:
|
||||
if args.dry_run:
|
||||
print(f"would tag {tagged}", flush=True)
|
||||
else:
|
||||
save_tagged_users(conn, db_id, tagged)
|
||||
print(f"tagged {tagged}", flush=True)
|
||||
total_tagged += 1
|
||||
else:
|
||||
if not args.dry_run:
|
||||
save_checked_post(db_id)
|
||||
print(f"no tags", flush=True)
|
||||
total_no_tags += 1
|
||||
|
||||
# Polite delay
|
||||
time.sleep(random.uniform(0.5, 1.5))
|
||||
except KeyboardInterrupt:
|
||||
print("\nInterrupted!")
|
||||
|
||||
conn.close()
|
||||
print(f"\nDone! {total_tagged} posts had tagged users, {total_no_tags} had none, {total_errors} errors (out of {len(posts)} total)")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
225
scripts/bellazon_scraper.py
Normal file
225
scripts/bellazon_scraper.py
Normal file
@@ -0,0 +1,225 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Bellazon Forum Thread Image Scraper
|
||||
|
||||
Downloads all full-size images from a Bellazon forum thread.
|
||||
Bellazon uses <a href="full.jpg"><img src="full_thumb.jpg"></a> pattern.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import hashlib
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse, urljoin
|
||||
from html import unescape
|
||||
|
||||
THREAD_URL = sys.argv[1] if len(sys.argv) > 1 else "https://www.bellazon.com/main/topic/39089-india-reynolds/"
|
||||
OUTPUT_DIR = sys.argv[2] if len(sys.argv) > 2 else "/opt/media-downloader/data/bellazon/india-reynolds"
|
||||
|
||||
HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Referer': 'https://www.bellazon.com/',
|
||||
}
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.webm', '.mov', '.avi', '.mkv'}
|
||||
MEDIA_EXTENSIONS = IMAGE_EXTENSIONS | VIDEO_EXTENSIONS
|
||||
|
||||
SKIP_PATTERNS = [
|
||||
'avatar', 'emoji', 'icon', '/public/', 'rep_', 'style_',
|
||||
'star_', '/js/', '/css/', 'button', 'logo', 'loading',
|
||||
'spinner', 'pixel', 'spacer', '/default_photo',
|
||||
'profile_photo', '/skin_', '/set_resources/', 'screenshot',
|
||||
]
|
||||
|
||||
|
||||
def get_page_count(html: str) -> int:
|
||||
match = re.search(r'Page\s+\d+\s+of\s+(\d+)', html)
|
||||
return int(match.group(1)) if match else 1
|
||||
|
||||
|
||||
def is_media_url(url: str) -> bool:
|
||||
parsed = urlparse(url)
|
||||
ext = Path(parsed.path).suffix.lower()
|
||||
return ext in MEDIA_EXTENSIONS
|
||||
|
||||
|
||||
def should_skip(url: str) -> bool:
|
||||
lower = url.lower()
|
||||
return any(skip in lower for skip in SKIP_PATTERNS)
|
||||
|
||||
|
||||
def extract_images_from_html(html: str, base_url: str) -> list:
|
||||
"""Extract full-size image URLs from page HTML.
|
||||
|
||||
Priority: <a href="full.jpg"> wrapping <img src="thumb.jpg">
|
||||
Fallback: standalone <img src="image.jpg"> (non-thumb)
|
||||
"""
|
||||
images = []
|
||||
thumb_urls = set() # track thumbnails so we don't add them as standalone
|
||||
|
||||
# Pattern 1: <a href="full-size"><img src="thumb"></a>
|
||||
# This catches the bellazon pattern where thumbnails link to full images
|
||||
for match in re.finditer(
|
||||
r'<a[^>]+href=["\']([^"\']+)["\'][^>]*>\s*<img[^>]+src=["\']([^"\']+)["\']',
|
||||
html, re.IGNORECASE | re.DOTALL
|
||||
):
|
||||
href = unescape(match.group(1))
|
||||
img_src = unescape(match.group(2))
|
||||
|
||||
if is_media_url(href) and not should_skip(href):
|
||||
full_url = urljoin(base_url, href)
|
||||
images.append(full_url)
|
||||
# Track the thumbnail so we skip it later
|
||||
thumb_urls.add(urljoin(base_url, img_src))
|
||||
|
||||
# Pattern 2: Standalone <img> tags not wrapped in links to full-size
|
||||
for match in re.finditer(r'<img[^>]+src=["\']([^"\']+)["\']', html, re.IGNORECASE):
|
||||
url = unescape(match.group(1))
|
||||
if should_skip(url):
|
||||
continue
|
||||
full_url = urljoin(base_url, url)
|
||||
# Skip if this is a thumbnail we already have the full version of
|
||||
if full_url in thumb_urls:
|
||||
continue
|
||||
# Skip anything with _thumb or .thumb in the name
|
||||
if '_thumb' in url or '.thumb.' in url:
|
||||
continue
|
||||
if is_media_url(url):
|
||||
images.append(full_url)
|
||||
|
||||
# Pattern 3: Links to external image files (not bellazon)
|
||||
for match in re.finditer(r'href=["\']([^"\']+)["\']', html, re.IGNORECASE):
|
||||
url = unescape(match.group(1))
|
||||
parsed = urlparse(url)
|
||||
if parsed.netloc and 'bellazon' not in parsed.netloc and is_media_url(url):
|
||||
images.append(url)
|
||||
|
||||
# Pattern 4: Forum attachments (attachment.php?id=XXX) with video/image filenames
|
||||
# e.g. <a href="...attachment.php?id=6887160">B7A65853...MP4</a>
|
||||
for match in re.finditer(
|
||||
r'<a[^>]+href=["\']([^"\']*attachment\.php\?id=\d+)["\'][^>]*>([^<]+)</a>',
|
||||
html, re.IGNORECASE
|
||||
):
|
||||
href = unescape(match.group(1))
|
||||
link_text = match.group(2).strip()
|
||||
ext = Path(link_text).suffix.lower()
|
||||
if ext in MEDIA_EXTENSIONS:
|
||||
full_url = urljoin(base_url, href)
|
||||
images.append((full_url, link_text)) # tuple: (url, filename)
|
||||
|
||||
# Deduplicate preserving order
|
||||
seen = set()
|
||||
unique = []
|
||||
for item in images:
|
||||
key = item[0] if isinstance(item, tuple) else item
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique.append(item)
|
||||
return unique
|
||||
|
||||
|
||||
def download_media(item, output_dir: Path, session: requests.Session, seen_hashes: set) -> bool:
|
||||
# item is either a URL string or a (url, filename) tuple
|
||||
if isinstance(item, tuple):
|
||||
url, orig_filename = item
|
||||
else:
|
||||
url, orig_filename = item, None
|
||||
|
||||
try:
|
||||
resp = session.get(url, timeout=60)
|
||||
if resp.status_code != 200:
|
||||
return False
|
||||
|
||||
content_type = resp.headers.get('content-type', '')
|
||||
if not any(t in content_type for t in ['image', 'video', 'octet-stream']):
|
||||
return False
|
||||
|
||||
data = resp.content
|
||||
if len(data) < 5000: # Skip tiny files (icons/placeholders)
|
||||
return False
|
||||
|
||||
file_hash = hashlib.md5(data).hexdigest()
|
||||
if file_hash in seen_hashes:
|
||||
return False
|
||||
seen_hashes.add(file_hash)
|
||||
|
||||
if orig_filename:
|
||||
filename = re.sub(r'[^\w\-_.]', '_', orig_filename)
|
||||
else:
|
||||
parsed = urlparse(url)
|
||||
filename = Path(parsed.path).name
|
||||
filename = re.sub(r'[^\w\-_.]', '_', filename)
|
||||
if not filename or filename == '_':
|
||||
filename = f"{file_hash}.jpg"
|
||||
|
||||
filepath = output_dir / filename
|
||||
if filepath.exists():
|
||||
filepath = output_dir / f"{filepath.stem}_{file_hash[:8]}{filepath.suffix}"
|
||||
|
||||
filepath.write_bytes(data)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
display = url[:80] if not orig_filename else orig_filename
|
||||
print(f" Error: {display}: {e}", flush=True)
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
output_dir = Path(OUTPUT_DIR)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
session = requests.Session()
|
||||
session.headers.update(HEADERS)
|
||||
|
||||
print(f"Fetching: {THREAD_URL}", flush=True)
|
||||
resp = session.get(THREAD_URL, timeout=30)
|
||||
resp.raise_for_status()
|
||||
|
||||
total_pages = get_page_count(resp.text)
|
||||
print(f"Total pages: {total_pages}", flush=True)
|
||||
|
||||
seen_hashes = set()
|
||||
total_downloaded = 0
|
||||
total_skipped = 0
|
||||
|
||||
for page_num in range(1, total_pages + 1):
|
||||
if page_num == 1:
|
||||
page_url = THREAD_URL
|
||||
html = resp.text
|
||||
else:
|
||||
page_url = f"{THREAD_URL.rstrip('/')}/page/{page_num}/"
|
||||
try:
|
||||
resp = session.get(page_url, timeout=30)
|
||||
resp.raise_for_status()
|
||||
html = resp.text
|
||||
except Exception as e:
|
||||
print(f" Error fetching page {page_num}: {e}", flush=True)
|
||||
continue
|
||||
|
||||
images = extract_images_from_html(html, page_url)
|
||||
page_dl = 0
|
||||
|
||||
for img_url in images:
|
||||
if download_media(img_url, output_dir, session, seen_hashes):
|
||||
page_dl += 1
|
||||
total_downloaded += 1
|
||||
else:
|
||||
total_skipped += 1
|
||||
|
||||
print(f"Page {page_num}/{total_pages}: {page_dl} downloaded ({len(images)} found, {total_downloaded} total)", flush=True)
|
||||
|
||||
if page_num < total_pages:
|
||||
time.sleep(1)
|
||||
|
||||
print(f"\nDone! {total_downloaded} images saved to {output_dir}", flush=True)
|
||||
print(f"Skipped: {total_skipped}", flush=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
106
scripts/cleanup-old-logs.py
Executable file
106
scripts/cleanup-old-logs.py
Executable file
@@ -0,0 +1,106 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Log Cleanup Script for Media Downloader
|
||||
Removes log files older than 7 days
|
||||
|
||||
Usage: python3 scripts/cleanup-old-logs.py
|
||||
Cron: 0 0 * * * /opt/media-downloader/venv/bin/python3 /opt/media-downloader/scripts/cleanup-old-logs.py
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
import glob
|
||||
from modules.universal_logger import get_logger
|
||||
|
||||
# Configuration
|
||||
RETENTION_DAYS = 7
|
||||
LOG_DIR = Path("/opt/media-downloader/logs")
|
||||
|
||||
# Initialize logger
|
||||
logger = get_logger('LogCleanup')
|
||||
|
||||
def cleanup_old_logs():
|
||||
"""Remove log files older than retention days"""
|
||||
|
||||
logger.info("LogCleanup", f"Starting log cleanup (retention: {RETENTION_DAYS} days)")
|
||||
|
||||
# Check if log directory exists
|
||||
if not LOG_DIR.exists():
|
||||
logger.error("LogCleanup", f"Log directory not found: {LOG_DIR}")
|
||||
return False
|
||||
|
||||
# Calculate cutoff date
|
||||
cutoff_date = datetime.now() - timedelta(days=RETENTION_DAYS)
|
||||
logger.debug("LogCleanup", f"Cutoff date: {cutoff_date.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
|
||||
# Find all log files
|
||||
datetime_logs = list(LOG_DIR.glob("[0-9]*_*.log")) # YYYYMMDD_component.log
|
||||
rotated_logs = list(LOG_DIR.glob("*.log.*")) # component.log.1, component.log.2, etc.
|
||||
all_logs = datetime_logs + rotated_logs
|
||||
|
||||
logger.info("LogCleanup", f"Found {len(all_logs)} total log files to check")
|
||||
|
||||
# Track cleanup stats
|
||||
removed_count = 0
|
||||
removed_size = 0
|
||||
skipped_count = 0
|
||||
error_count = 0
|
||||
|
||||
# Process each log file
|
||||
for log_file in all_logs:
|
||||
try:
|
||||
# Check file modification time
|
||||
mtime = datetime.fromtimestamp(log_file.stat().st_mtime)
|
||||
file_age_days = (datetime.now() - mtime).days
|
||||
file_size = log_file.stat().st_size
|
||||
|
||||
if mtime < cutoff_date:
|
||||
# File is old enough to delete
|
||||
try:
|
||||
log_file.unlink()
|
||||
removed_count += 1
|
||||
removed_size += file_size
|
||||
logger.info("LogCleanup", f"Removed old log: {log_file.name} (age: {file_age_days} days, size: {file_size:,} bytes)")
|
||||
except Exception as e:
|
||||
error_count += 1
|
||||
logger.error("LogCleanup", f"Failed to remove {log_file.name}: {e}")
|
||||
else:
|
||||
# File is still within retention period
|
||||
skipped_count += 1
|
||||
logger.debug("LogCleanup", f"Kept: {log_file.name} (age: {file_age_days} days)")
|
||||
|
||||
except Exception as e:
|
||||
error_count += 1
|
||||
logger.error("LogCleanup", f"Error processing {log_file.name}: {e}")
|
||||
|
||||
# Log summary
|
||||
if removed_count > 0:
|
||||
size_mb = removed_size / (1024 * 1024)
|
||||
logger.success("LogCleanup", f"Cleanup complete: Removed {removed_count} log file(s), freed {size_mb:.2f} MB")
|
||||
else:
|
||||
logger.info("LogCleanup", f"No old logs to clean up (all {skipped_count} logs are within {RETENTION_DAYS} days)")
|
||||
|
||||
if error_count > 0:
|
||||
logger.warning("LogCleanup", f"Encountered {error_count} error(s) during cleanup")
|
||||
|
||||
# Log final stats
|
||||
logger.info("LogCleanup", f"Summary: {removed_count} removed, {skipped_count} kept, {error_count} errors")
|
||||
|
||||
return error_count == 0
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
try:
|
||||
success = cleanup_old_logs()
|
||||
sys.exit(0 if success else 1)
|
||||
except Exception as e:
|
||||
logger.error("LogCleanup", f"Fatal error during log cleanup: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
506
scripts/cloud_backup_restore.sh
Executable file
506
scripts/cloud_backup_restore.sh
Executable file
@@ -0,0 +1,506 @@
|
||||
#!/usr/bin/env bash
|
||||
# ============================================================================
|
||||
# Cloud Backup Restore Script
|
||||
#
|
||||
# Restores the full media-downloader + Immich stack from a B2 cloud backup.
|
||||
# Run on a fresh Ubuntu 24.04 server (or the same machine after failure).
|
||||
#
|
||||
# Usage:
|
||||
# sudo bash cloud_backup_restore.sh [--rclone-conf /path/to/rclone.conf]
|
||||
#
|
||||
# Prerequisites on a fresh machine:
|
||||
# apt update && apt install -y rclone
|
||||
# Then place your rclone.conf at /root/.config/rclone/rclone.conf
|
||||
# (contains cloud-backup-remote + cloud-backup-crypt sections)
|
||||
#
|
||||
# The script is interactive — it will ask before each destructive step.
|
||||
# ============================================================================
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ── Configuration ──────────────────────────────────────────────────────────
|
||||
|
||||
IMMICH_BASE="/opt/immich"
|
||||
APP_DIR="/opt/media-downloader"
|
||||
RCLONE_CONF="${1:---rclone-conf}"
|
||||
RESTORE_TMP="/tmp/cloud-backup-restore"
|
||||
LOG_FILE="/tmp/cloud_backup_restore.log"
|
||||
|
||||
# If --rclone-conf was passed, grab the value
|
||||
if [[ "${1:-}" == "--rclone-conf" ]]; then
|
||||
RCLONE_CONF_PATH="${2:-/root/.config/rclone/rclone.conf}"
|
||||
else
|
||||
RCLONE_CONF_PATH="/root/.config/rclone/rclone.conf"
|
||||
fi
|
||||
|
||||
RCLONE_CRYPT="cloud-backup-crypt"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
# ── Helpers ────────────────────────────────────────────────────────────────
|
||||
|
||||
log() { echo -e "${GREEN}[$(date '+%H:%M:%S')]${NC} $*" | tee -a "$LOG_FILE"; }
|
||||
warn() { echo -e "${YELLOW}[$(date '+%H:%M:%S')] WARNING:${NC} $*" | tee -a "$LOG_FILE"; }
|
||||
err() { echo -e "${RED}[$(date '+%H:%M:%S')] ERROR:${NC} $*" | tee -a "$LOG_FILE"; }
|
||||
step() { echo -e "\n${BLUE}━━━ $* ━━━${NC}" | tee -a "$LOG_FILE"; }
|
||||
|
||||
confirm() {
|
||||
local msg="$1"
|
||||
echo -en "${YELLOW}$msg [y/N]: ${NC}"
|
||||
read -r answer
|
||||
[[ "$answer" =~ ^[Yy]$ ]]
|
||||
}
|
||||
|
||||
check_root() {
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
err "This script must be run as root"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ── Pre-flight checks ─────────────────────────────────────────────────────
|
||||
|
||||
preflight() {
|
||||
step "Pre-flight checks"
|
||||
|
||||
check_root
|
||||
|
||||
# Check rclone
|
||||
if ! command -v rclone &>/dev/null; then
|
||||
err "rclone not installed. Install with: apt install -y rclone"
|
||||
exit 1
|
||||
fi
|
||||
log "rclone: $(rclone --version | head -1)"
|
||||
|
||||
# Check rclone config
|
||||
if [[ ! -f "$RCLONE_CONF_PATH" ]]; then
|
||||
err "rclone config not found at $RCLONE_CONF_PATH"
|
||||
echo "You need the rclone.conf with [cloud-backup-remote] and [cloud-backup-crypt] sections."
|
||||
echo "If restoring to a new machine, copy rclone.conf from your backup records."
|
||||
exit 1
|
||||
fi
|
||||
log "rclone config: $RCLONE_CONF_PATH"
|
||||
|
||||
# Test remote connection
|
||||
log "Testing remote connection..."
|
||||
if rclone lsd "${RCLONE_CRYPT}:" --config "$RCLONE_CONF_PATH" --max-depth 1 &>/dev/null; then
|
||||
log "Remote connection: OK"
|
||||
else
|
||||
err "Cannot connect to remote. Check your rclone config and encryption passwords."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Show what's available on remote
|
||||
log "Remote directories:"
|
||||
rclone lsd "${RCLONE_CRYPT}:" --config "$RCLONE_CONF_PATH" --max-depth 1 2>/dev/null | tee -a "$LOG_FILE"
|
||||
|
||||
mkdir -p "$RESTORE_TMP"
|
||||
}
|
||||
|
||||
# ── Step 1: Download app_backup and db_dumps first ────────────────────────
|
||||
|
||||
download_configs() {
|
||||
step "Step 1: Downloading app_backup and db_dumps from remote"
|
||||
|
||||
mkdir -p "$RESTORE_TMP/app_backup" "$RESTORE_TMP/db_dumps"
|
||||
|
||||
log "Downloading app_backup..."
|
||||
rclone copy "${RCLONE_CRYPT}:app_backup" "$RESTORE_TMP/app_backup" \
|
||||
--config "$RCLONE_CONF_PATH" --progress 2>&1 | tee -a "$LOG_FILE"
|
||||
|
||||
log "Downloading db_dumps..."
|
||||
rclone copy "${RCLONE_CRYPT}:db_dumps" "$RESTORE_TMP/db_dumps" \
|
||||
--config "$RCLONE_CONF_PATH" --progress 2>&1 | tee -a "$LOG_FILE"
|
||||
|
||||
# Verify we got the essentials
|
||||
if [[ ! -f "$RESTORE_TMP/app_backup/media-downloader-app.tar.gz" ]]; then
|
||||
err "media-downloader-app.tar.gz not found in backup!"
|
||||
exit 1
|
||||
fi
|
||||
log "App archive size: $(du -sh "$RESTORE_TMP/app_backup/media-downloader-app.tar.gz" | cut -f1)"
|
||||
|
||||
ls -la "$RESTORE_TMP/db_dumps/" | tee -a "$LOG_FILE"
|
||||
ls -la "$RESTORE_TMP/app_backup/" | tee -a "$LOG_FILE"
|
||||
ls -la "$RESTORE_TMP/app_backup/systemd/" 2>/dev/null | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
# ── Step 2: Install system dependencies ───────────────────────────────────
|
||||
|
||||
install_dependencies() {
|
||||
step "Step 2: Install system dependencies"
|
||||
|
||||
if ! confirm "Install system packages (python3, postgresql, docker, node, etc.)?"; then
|
||||
warn "Skipping dependency installation"
|
||||
return
|
||||
fi
|
||||
|
||||
log "Updating package lists..."
|
||||
apt update
|
||||
|
||||
log "Installing core packages..."
|
||||
apt install -y \
|
||||
python3 python3-venv python3-pip python3-dev \
|
||||
postgresql postgresql-client \
|
||||
docker.io docker-compose-v2 \
|
||||
nodejs npm \
|
||||
rclone \
|
||||
xvfb \
|
||||
python3-pyinotify \
|
||||
nginx \
|
||||
git curl wget jq \
|
||||
build-essential libffi-dev libssl-dev \
|
||||
libgl1-mesa-glx libglib2.0-0 \
|
||||
ffmpeg \
|
||||
2>&1 | tee -a "$LOG_FILE"
|
||||
|
||||
# Enable and start essential services
|
||||
systemctl enable --now docker
|
||||
systemctl enable --now postgresql
|
||||
|
||||
log "System dependencies installed"
|
||||
}
|
||||
|
||||
# ── Step 3: Restore media-downloader application ──────────────────────────
|
||||
|
||||
restore_app() {
|
||||
step "Step 3: Restore media-downloader application"
|
||||
|
||||
if [[ -d "$APP_DIR" ]]; then
|
||||
if confirm "$APP_DIR already exists. Back it up and replace?"; then
|
||||
local backup_name="${APP_DIR}.bak.$(date +%Y%m%d_%H%M%S)"
|
||||
log "Moving existing app to $backup_name"
|
||||
mv "$APP_DIR" "$backup_name"
|
||||
else
|
||||
warn "Skipping app restore"
|
||||
return
|
||||
fi
|
||||
fi
|
||||
|
||||
log "Extracting media-downloader app..."
|
||||
mkdir -p /opt
|
||||
tar xzf "$RESTORE_TMP/app_backup/media-downloader-app.tar.gz" -C /opt
|
||||
log "App extracted to $APP_DIR"
|
||||
|
||||
# Recreate venv
|
||||
log "Creating Python virtual environment..."
|
||||
python3 -m venv "$APP_DIR/venv"
|
||||
|
||||
log "Installing Python dependencies (this may take a while)..."
|
||||
"$APP_DIR/venv/bin/pip" install --upgrade pip wheel 2>&1 | tail -3 | tee -a "$LOG_FILE"
|
||||
"$APP_DIR/venv/bin/pip" install -r "$APP_DIR/requirements.txt" 2>&1 | tail -10 | tee -a "$LOG_FILE"
|
||||
log "Python dependencies installed"
|
||||
|
||||
# Rebuild frontend
|
||||
log "Installing frontend dependencies..."
|
||||
cd "$APP_DIR/web/frontend"
|
||||
npm install 2>&1 | tail -5 | tee -a "$LOG_FILE"
|
||||
log "Building frontend..."
|
||||
npx tsc && npx vite build 2>&1 | tail -5 | tee -a "$LOG_FILE"
|
||||
log "Frontend built"
|
||||
|
||||
# Install Playwright browsers
|
||||
log "Installing Playwright browsers..."
|
||||
"$APP_DIR/venv/bin/python3" -m playwright install chromium firefox 2>&1 | tail -5 | tee -a "$LOG_FILE"
|
||||
"$APP_DIR/venv/bin/python3" -m playwright install-deps 2>&1 | tail -5 | tee -a "$LOG_FILE"
|
||||
|
||||
# Create required directories
|
||||
mkdir -p "$APP_DIR/logs" "$APP_DIR/temp" "$APP_DIR/cache/thumbnails"
|
||||
|
||||
log "Application restored"
|
||||
}
|
||||
|
||||
# ── Step 4: Restore Immich ────────────────────────────────────────────────
|
||||
|
||||
restore_immich() {
|
||||
step "Step 4: Restore Immich configuration"
|
||||
|
||||
mkdir -p "$IMMICH_BASE"
|
||||
|
||||
# Restore docker-compose and .env
|
||||
if [[ -f "$RESTORE_TMP/app_backup/immich-docker-compose.yml" ]]; then
|
||||
cp "$RESTORE_TMP/app_backup/immich-docker-compose.yml" "$IMMICH_BASE/docker-compose.yml"
|
||||
log "Restored Immich docker-compose.yml"
|
||||
fi
|
||||
if [[ -f "$RESTORE_TMP/app_backup/immich-env" ]]; then
|
||||
cp "$RESTORE_TMP/app_backup/immich-env" "$IMMICH_BASE/.env"
|
||||
log "Restored Immich .env"
|
||||
fi
|
||||
|
||||
# Create required directories
|
||||
mkdir -p "$IMMICH_BASE/upload" "$IMMICH_BASE/db" "$IMMICH_BASE/db_dumps" "$IMMICH_BASE/app_backup"
|
||||
|
||||
log "Immich config restored. Media files will be synced in Step 7."
|
||||
}
|
||||
|
||||
# ── Step 5: Restore databases ─────────────────────────────────────────────
|
||||
|
||||
restore_databases() {
|
||||
step "Step 5: Restore databases"
|
||||
|
||||
# Media Downloader PostgreSQL (supports both .dump and legacy .sql)
|
||||
# Media Downloader PostgreSQL (supports directory dump, .dump, and legacy .sql)
|
||||
local md_dir="$RESTORE_TMP/db_dumps/media_downloader_dump"
|
||||
local md_dump="$RESTORE_TMP/db_dumps/media_downloader.dump"
|
||||
local md_sql="$RESTORE_TMP/db_dumps/media_downloader.sql"
|
||||
if [[ -d "$md_dir" || -f "$md_dump" || -f "$md_sql" ]]; then
|
||||
if confirm "Restore media_downloader PostgreSQL database?"; then
|
||||
log "Creating media_downloader database and user..."
|
||||
sudo -u postgres psql -c "CREATE USER media_downloader WITH PASSWORD 'PNsihOXvvuPwWiIvGlsc9Fh2YmMmB';" 2>/dev/null || true
|
||||
sudo -u postgres psql -c "DROP DATABASE IF EXISTS media_downloader;" 2>/dev/null || true
|
||||
sudo -u postgres psql -c "CREATE DATABASE media_downloader OWNER media_downloader;" 2>/dev/null || true
|
||||
|
||||
if [[ -d "$md_dir" ]]; then
|
||||
log "Importing media_downloader dump (parallel directory format)..."
|
||||
PGPASSWORD=PNsihOXvvuPwWiIvGlsc9Fh2YmMmB pg_restore -h localhost -U media_downloader \
|
||||
-d media_downloader --no-owner --no-acl -j 4 "$md_dir" 2>&1 | tail -5 | tee -a "$LOG_FILE"
|
||||
elif [[ -f "$md_dump" ]]; then
|
||||
log "Importing media_downloader dump (custom format)..."
|
||||
PGPASSWORD=PNsihOXvvuPwWiIvGlsc9Fh2YmMmB pg_restore -h localhost -U media_downloader \
|
||||
-d media_downloader --no-owner --no-acl "$md_dump" 2>&1 | tail -5 | tee -a "$LOG_FILE"
|
||||
else
|
||||
log "Importing media_downloader dump (SQL format)..."
|
||||
PGPASSWORD=PNsihOXvvuPwWiIvGlsc9Fh2YmMmB psql -h localhost -U media_downloader \
|
||||
-d media_downloader < "$md_sql" 2>&1 | tail -5 | tee -a "$LOG_FILE"
|
||||
fi
|
||||
log "media_downloader database restored"
|
||||
fi
|
||||
else
|
||||
warn "media_downloader dump not found in backup"
|
||||
fi
|
||||
|
||||
# Immich PostgreSQL (supports .tar directory dump, .dump, and legacy .sql)
|
||||
local im_tar="$RESTORE_TMP/db_dumps/immich_dump.tar"
|
||||
local im_dump="$RESTORE_TMP/db_dumps/immich.dump"
|
||||
local im_sql="$RESTORE_TMP/db_dumps/immich.sql"
|
||||
if [[ -f "$im_tar" || -f "$im_dump" || -f "$im_sql" ]]; then
|
||||
if confirm "Restore Immich PostgreSQL database? (starts Immich containers first)"; then
|
||||
log "Starting Immich database container..."
|
||||
cd "$IMMICH_BASE"
|
||||
docker compose up -d database 2>&1 | tee -a "$LOG_FILE"
|
||||
|
||||
log "Waiting for Immich PostgreSQL to be ready..."
|
||||
for i in $(seq 1 30); do
|
||||
if docker exec immich_postgres pg_isready -U postgres &>/dev/null; then
|
||||
break
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
|
||||
if [[ -f "$im_tar" ]]; then
|
||||
log "Importing Immich dump (parallel directory format)..."
|
||||
docker cp "$im_tar" immich_postgres:/tmp/immich_dump.tar
|
||||
docker exec immich_postgres sh -c "cd /tmp && tar xf immich_dump.tar"
|
||||
docker exec immich_postgres pg_restore -U postgres -d immich \
|
||||
--no-owner --no-acl -j 4 /tmp/immich_dump 2>&1 | tail -5 | tee -a "$LOG_FILE"
|
||||
docker exec immich_postgres sh -c "rm -rf /tmp/immich_dump /tmp/immich_dump.tar"
|
||||
elif [[ -f "$im_dump" ]]; then
|
||||
log "Importing Immich dump (custom format)..."
|
||||
docker cp "$im_dump" immich_postgres:/tmp/immich.dump
|
||||
docker exec immich_postgres pg_restore -U postgres -d immich \
|
||||
--no-owner --no-acl /tmp/immich.dump 2>&1 | tail -5 | tee -a "$LOG_FILE"
|
||||
docker exec immich_postgres rm -f /tmp/immich.dump
|
||||
else
|
||||
log "Importing Immich dump (SQL format)..."
|
||||
docker exec -i immich_postgres psql -U postgres -d immich \
|
||||
< "$im_sql" 2>&1 | tail -5 | tee -a "$LOG_FILE"
|
||||
fi
|
||||
log "Immich database restored"
|
||||
fi
|
||||
else
|
||||
warn "immich dump not found in backup"
|
||||
fi
|
||||
}
|
||||
|
||||
# ── Step 6: Restore systemd services & rclone config ─────────────────────
|
||||
|
||||
restore_services() {
|
||||
step "Step 6: Restore systemd services and configs"
|
||||
|
||||
# Systemd service files
|
||||
if [[ -d "$RESTORE_TMP/app_backup/systemd" ]]; then
|
||||
if confirm "Install systemd service files?"; then
|
||||
for svc in "$RESTORE_TMP/app_backup/systemd/"*; do
|
||||
local name=$(basename "$svc")
|
||||
cp "$svc" "/etc/systemd/system/$name"
|
||||
log "Installed $name"
|
||||
done
|
||||
systemctl daemon-reload
|
||||
log "systemd reloaded"
|
||||
fi
|
||||
fi
|
||||
|
||||
# rclone config
|
||||
if [[ -f "$RESTORE_TMP/app_backup/rclone.conf" ]]; then
|
||||
if confirm "Restore rclone.conf?"; then
|
||||
mkdir -p /root/.config/rclone
|
||||
cp "$RESTORE_TMP/app_backup/rclone.conf" /root/.config/rclone/rclone.conf
|
||||
chmod 600 /root/.config/rclone/rclone.conf
|
||||
log "rclone.conf restored"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# ── Step 7: Download media files ──────────────────────────────────────────
|
||||
|
||||
restore_media() {
|
||||
step "Step 7: Download media files from remote"
|
||||
|
||||
local dirs=$(rclone lsd "${RCLONE_CRYPT}:" --config "$RCLONE_CONF_PATH" --max-depth 1 2>/dev/null | awk '{print $NF}')
|
||||
|
||||
echo ""
|
||||
log "Available remote directories:"
|
||||
echo "$dirs" | while read -r d; do echo " - $d"; done
|
||||
|
||||
if ! confirm "Download all media directories? (This may take a long time for large backups)"; then
|
||||
warn "Skipping media download. You can sync manually later with:"
|
||||
echo " rclone copy ${RCLONE_CRYPT}:<dir> ${IMMICH_BASE}/<dir> --config $RCLONE_CONF_PATH --progress --transfers 4"
|
||||
return
|
||||
fi
|
||||
|
||||
echo "$dirs" | while read -r dir_name; do
|
||||
# Skip dirs we already downloaded
|
||||
[[ "$dir_name" == "app_backup" || "$dir_name" == "db_dumps" ]] && continue
|
||||
[[ -z "$dir_name" ]] && continue
|
||||
|
||||
log "Syncing $dir_name..."
|
||||
mkdir -p "$IMMICH_BASE/$dir_name"
|
||||
rclone copy "${RCLONE_CRYPT}:${dir_name}" "$IMMICH_BASE/$dir_name" \
|
||||
--config "$RCLONE_CONF_PATH" \
|
||||
--progress \
|
||||
--transfers 4 \
|
||||
--checkers 8 \
|
||||
2>&1 | tee -a "$LOG_FILE"
|
||||
log "$dir_name done"
|
||||
done
|
||||
|
||||
log "Media files restored"
|
||||
}
|
||||
|
||||
# ── Step 8: Start services ───────────────────────────────────────────────
|
||||
|
||||
start_services() {
|
||||
step "Step 8: Start services"
|
||||
|
||||
if confirm "Start all services?"; then
|
||||
# Start Immich stack
|
||||
log "Starting Immich..."
|
||||
cd "$IMMICH_BASE"
|
||||
docker compose up -d 2>&1 | tee -a "$LOG_FILE"
|
||||
|
||||
# Enable and start media-downloader services
|
||||
log "Starting media-downloader services..."
|
||||
systemctl enable --now xvfb-media-downloader.service 2>/dev/null || true
|
||||
systemctl enable --now media-downloader-api.service
|
||||
systemctl enable --now media-downloader.service
|
||||
systemctl enable --now media-downloader-frontend.service 2>/dev/null || true
|
||||
systemctl enable --now media-downloader-db-cleanup.timer 2>/dev/null || true
|
||||
systemctl enable --now cloud-backup-sync.service
|
||||
|
||||
sleep 5
|
||||
|
||||
# Status check
|
||||
log "Service status:"
|
||||
for svc in media-downloader-api media-downloader cloud-backup-sync; do
|
||||
local status=$(systemctl is-active "$svc" 2>/dev/null || echo "not found")
|
||||
if [[ "$status" == "active" ]]; then
|
||||
echo -e " ${GREEN}●${NC} $svc: $status"
|
||||
else
|
||||
echo -e " ${RED}●${NC} $svc: $status"
|
||||
fi
|
||||
done
|
||||
|
||||
# Docker containers
|
||||
log "Docker containers:"
|
||||
docker ps --format "table {{.Names}}\t{{.Status}}" | tee -a "$LOG_FILE"
|
||||
fi
|
||||
}
|
||||
|
||||
# ── Step 9: Post-restore verification ─────────────────────────────────────
|
||||
|
||||
verify() {
|
||||
step "Step 9: Post-restore verification"
|
||||
|
||||
local issues=0
|
||||
|
||||
# Check API
|
||||
if curl -sf http://localhost:8000/api/health &>/dev/null; then
|
||||
log "API health check: OK"
|
||||
else
|
||||
warn "API health check: FAILED (may still be starting)"
|
||||
((issues++))
|
||||
fi
|
||||
|
||||
# Check Immich
|
||||
if curl -sf http://localhost:2283/api/server-info/ping &>/dev/null; then
|
||||
log "Immich health check: OK"
|
||||
else
|
||||
warn "Immich health check: FAILED (may still be starting)"
|
||||
((issues++))
|
||||
fi
|
||||
|
||||
# Check database
|
||||
if PGPASSWORD=PNsihOXvvuPwWiIvGlsc9Fh2YmMmB psql -h localhost -U media_downloader -d media_downloader -c "SELECT 1" &>/dev/null; then
|
||||
log "Media Downloader DB: OK"
|
||||
else
|
||||
warn "Media Downloader DB: FAILED"
|
||||
((issues++))
|
||||
fi
|
||||
|
||||
# Disk usage
|
||||
log "Disk usage:"
|
||||
df -h /opt/immich /opt/media-downloader 2>/dev/null | tee -a "$LOG_FILE"
|
||||
|
||||
echo ""
|
||||
if [[ $issues -eq 0 ]]; then
|
||||
log "${GREEN}Restore completed successfully!${NC}"
|
||||
else
|
||||
warn "Restore completed with $issues issue(s). Check the log: $LOG_FILE"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
log "Restore log saved to: $LOG_FILE"
|
||||
}
|
||||
|
||||
# ── Main ──────────────────────────────────────────────────────────────────
|
||||
|
||||
main() {
|
||||
echo -e "${BLUE}"
|
||||
echo "╔══════════════════════════════════════════════════════════════╗"
|
||||
echo "║ Cloud Backup Restore — Media Downloader ║"
|
||||
echo "║ ║"
|
||||
echo "║ Restores: App, Databases, Media, Configs, Services ║"
|
||||
echo "║ Source: Backblaze B2 (rclone crypt encrypted) ║"
|
||||
echo "╚══════════════════════════════════════════════════════════════╝"
|
||||
echo -e "${NC}"
|
||||
|
||||
echo "This script will restore your media-downloader + Immich stack"
|
||||
echo "from an encrypted B2 cloud backup. Each step asks for confirmation."
|
||||
echo ""
|
||||
echo "Log: $LOG_FILE"
|
||||
echo ""
|
||||
|
||||
if ! confirm "Ready to begin restore?"; then
|
||||
echo "Aborted."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "" > "$LOG_FILE"
|
||||
|
||||
preflight
|
||||
download_configs
|
||||
install_dependencies
|
||||
restore_app
|
||||
restore_immich
|
||||
restore_databases
|
||||
restore_services
|
||||
restore_media
|
||||
start_services
|
||||
verify
|
||||
}
|
||||
|
||||
main "$@"
|
||||
1227
scripts/cloud_backup_sync.py
Executable file
1227
scripts/cloud_backup_sync.py
Executable file
File diff suppressed because it is too large
Load Diff
47
scripts/create-version-backup.sh
Executable file
47
scripts/create-version-backup.sh
Executable file
@@ -0,0 +1,47 @@
|
||||
#!/bin/bash
|
||||
# Create version-stamped locked backup using backup-central
|
||||
set -e
|
||||
|
||||
# Get version from VERSION file
|
||||
VERSION=$(cat /opt/media-downloader/VERSION | tr -d '[:space:]')
|
||||
|
||||
# Create timestamp
|
||||
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
||||
BACKUP_NAME="${VERSION}-${TIMESTAMP}"
|
||||
|
||||
# Configuration
|
||||
PROFILE_ID="profile-media-downloader"
|
||||
|
||||
echo "╔════════════════════════════════════════════════╗"
|
||||
echo "║ Media Downloader Version Backup ║"
|
||||
echo "╠════════════════════════════════════════════════╣"
|
||||
echo "║ Version: ${VERSION} ║"
|
||||
echo "║ Name: ${BACKUP_NAME} ║"
|
||||
echo "╚════════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
|
||||
echo "⏳ Starting backup using backup-central..."
|
||||
echo ""
|
||||
|
||||
# Run backup using CLI with profile, custom name, and locked flag
|
||||
backup-central backup -P "$PROFILE_ID" -n "$BACKUP_NAME" -l
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo ""
|
||||
echo "╔════════════════════════════════════════════════╗"
|
||||
echo "║ Backup Complete ║"
|
||||
echo "╠════════════════════════════════════════════════╣"
|
||||
echo "║ Name: ${BACKUP_NAME} ║"
|
||||
echo "║ Profile: Media Downloader ║"
|
||||
echo "║ Status: Locked & Protected ║"
|
||||
echo "║ Type: Incremental ║"
|
||||
echo "╚════════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
echo "✓ Version backup created successfully!"
|
||||
echo ""
|
||||
else
|
||||
echo ""
|
||||
echo "✗ Backup failed!"
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
93
scripts/db-cleanup.sh
Executable file
93
scripts/db-cleanup.sh
Executable file
@@ -0,0 +1,93 @@
|
||||
#!/bin/bash
|
||||
# Database Cleanup Script
|
||||
# Scans database for missing files and removes their references
|
||||
# Runs via systemd timer nightly at 3:00 AM
|
||||
|
||||
set -e
|
||||
|
||||
# Configuration
|
||||
API_URL="http://localhost:8000/api/maintenance/cleanup/missing-files"
|
||||
STATUS_URL="http://localhost:8000/api/maintenance/cleanup/status"
|
||||
LOG_FILE="/opt/media-downloader/logs/db-cleanup.log"
|
||||
TOKEN_SCRIPT="/opt/media-downloader/scripts/get-api-token.sh"
|
||||
|
||||
# Logging function
|
||||
log() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
# Get API token
|
||||
if [ ! -f "$TOKEN_SCRIPT" ]; then
|
||||
log "ERROR: API token script not found at $TOKEN_SCRIPT"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
$TOKEN_SCRIPT > /dev/null 2>&1
|
||||
TOKEN=$(cat /tmp/api_token.txt 2>/dev/null)
|
||||
if [ -z "$TOKEN" ]; then
|
||||
log "ERROR: Failed to get API token"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log "Starting database cleanup (dry_run=false)"
|
||||
|
||||
# Start cleanup
|
||||
RESPONSE=$(curl -s -X POST "$API_URL" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer $TOKEN" \
|
||||
-d '{"dry_run": false}')
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
log "ERROR: Failed to start cleanup"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log "Cleanup started, waiting for completion..."
|
||||
|
||||
# Poll for status
|
||||
MAX_WAIT=300 # 5 minutes max
|
||||
WAITED=0
|
||||
INTERVAL=5
|
||||
|
||||
while [ $WAITED -lt $MAX_WAIT ]; do
|
||||
sleep $INTERVAL
|
||||
WAITED=$((WAITED + INTERVAL))
|
||||
|
||||
STATUS=$(curl -s "$STATUS_URL" -H "Authorization: Bearer $TOKEN")
|
||||
|
||||
STATUS_CODE=$(echo "$STATUS" | grep -o '"status":"[^"]*"' | cut -d'"' -f4)
|
||||
|
||||
case "$STATUS_CODE" in
|
||||
"completed")
|
||||
TOTAL_CHECKED=$(echo "$STATUS" | grep -o '"total_checked":[0-9]*' | cut -d':' -f2)
|
||||
TOTAL_MISSING=$(echo "$STATUS" | grep -o '"total_missing":[0-9]*' | cut -d':' -f2)
|
||||
TOTAL_REMOVED=$(echo "$STATUS" | grep -o '"total_removed":[0-9]*' | cut -d':' -f2)
|
||||
DURATION=$(echo "$STATUS" | grep -o '"duration_seconds":[0-9.]*' | cut -d':' -f2)
|
||||
|
||||
log "SUCCESS: Cleanup completed"
|
||||
log " Checked: $TOTAL_CHECKED files"
|
||||
log " Missing: $TOTAL_MISSING files"
|
||||
log " Removed: $TOTAL_REMOVED references"
|
||||
log " Duration: ${DURATION}s"
|
||||
exit 0
|
||||
;;
|
||||
"failed")
|
||||
ERROR=$(echo "$STATUS" | grep -o '"error":"[^"]*"' | cut -d'"' -f4)
|
||||
log "ERROR: Cleanup failed - $ERROR"
|
||||
exit 1
|
||||
;;
|
||||
"running")
|
||||
log "Still running... (${WAITED}s elapsed)"
|
||||
;;
|
||||
"no_scan")
|
||||
log "ERROR: Cleanup job not found"
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
log "WARNING: Unknown status - $STATUS_CODE"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
log "ERROR: Cleanup timed out after ${MAX_WAIT}s"
|
||||
exit 1
|
||||
125
scripts/fix_kylie_tags.py
Normal file
125
scripts/fix_kylie_tags.py
Normal file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Fix tagged users for recently backfilled kyliejenner posts.
|
||||
|
||||
Fetches each post via /api/v1/media/{code}/info/ and inserts tagged users.
|
||||
"""
|
||||
import json
|
||||
import string
|
||||
import sys
|
||||
import time
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import modules.db_bootstrap # noqa: F401
|
||||
|
||||
import sqlite3
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
from datetime import datetime
|
||||
|
||||
CREATOR_ID = 110
|
||||
SLEEP_BETWEEN = 1.5
|
||||
|
||||
CHARSET = string.ascii_uppercase + string.ascii_lowercase + string.digits + '-_'
|
||||
|
||||
def shortcode_to_media_id(code):
|
||||
media_id = 0
|
||||
for char in code:
|
||||
media_id = media_id * 64 + CHARSET.index(char)
|
||||
return str(media_id)
|
||||
|
||||
|
||||
def main():
|
||||
conn = sqlite3.connect('media_downloader')
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Load cookies
|
||||
cursor.execute("SELECT cookies_json FROM scrapers WHERE id = 'instagram_browser'")
|
||||
cookie_list = json.loads(cursor.fetchone()[0])
|
||||
|
||||
# Get the backfilled posts
|
||||
cursor.execute("""
|
||||
SELECT p.id, p.post_id FROM paid_content_posts p
|
||||
WHERE p.creator_id = ? AND p.added_at >= '2026-03-28T21:00:00'
|
||||
ORDER BY p.id
|
||||
""", (CREATOR_ID,))
|
||||
posts = cursor.fetchall()
|
||||
print(f"Found {len(posts)} posts to check for tags")
|
||||
|
||||
session = CurlSession(impersonate='edge101')
|
||||
session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
|
||||
'X-IG-App-ID': '936619743392459',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': 'https://www.instagram.com/',
|
||||
'Origin': 'https://www.instagram.com',
|
||||
'Sec-CH-UA': '"Microsoft Edge";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
||||
'Sec-CH-UA-Mobile': '?0',
|
||||
'Sec-CH-UA-Platform': '"Windows"',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
})
|
||||
for c in cookie_list:
|
||||
if c.get('name') and c.get('value'):
|
||||
session.cookies.set(c['name'], c['value'], domain=c.get('domain', '.instagram.com'))
|
||||
|
||||
tagged_count = 0
|
||||
now = datetime.now().isoformat()
|
||||
|
||||
for i, (db_id, code) in enumerate(posts):
|
||||
try:
|
||||
media_id = shortcode_to_media_id(code)
|
||||
resp = session.get(
|
||||
f'https://www.instagram.com/api/v1/media/{media_id}/info/',
|
||||
timeout=10
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
print(f" [{i+1}/{len(posts)}] {code}: HTTP {resp.status_code}")
|
||||
if resp.status_code == 429:
|
||||
print(" Rate limited, waiting 60s...")
|
||||
time.sleep(60)
|
||||
continue
|
||||
|
||||
data = resp.json()
|
||||
items = data.get('items', [])
|
||||
if not items:
|
||||
print(f" [{i+1}/{len(posts)}] {code}: no items")
|
||||
continue
|
||||
|
||||
node = items[0]
|
||||
tagged_users = []
|
||||
for tag in (node.get('usertags') or {}).get('in', []):
|
||||
uname = (tag.get('user') or {}).get('username')
|
||||
if uname and uname not in tagged_users:
|
||||
tagged_users.append(uname)
|
||||
for cm in node.get('carousel_media') or []:
|
||||
for tag in (cm.get('usertags') or {}).get('in', []):
|
||||
uname = (tag.get('user') or {}).get('username')
|
||||
if uname and uname not in tagged_users:
|
||||
tagged_users.append(uname)
|
||||
|
||||
if tagged_users:
|
||||
for uname in tagged_users:
|
||||
cursor.execute(
|
||||
"""INSERT INTO paid_content_post_tagged_users (post_id, username, created_at)
|
||||
VALUES (?, ?, ?) ON CONFLICT (post_id, username) DO NOTHING""",
|
||||
(db_id, uname, now)
|
||||
)
|
||||
conn.commit()
|
||||
tagged_count += 1
|
||||
print(f" [{i+1}/{len(posts)}] {code}: {', '.join(tagged_users)}")
|
||||
else:
|
||||
if (i + 1) % 50 == 0:
|
||||
print(f" [{i+1}/{len(posts)}] progress... ({tagged_count} tagged so far)")
|
||||
|
||||
except Exception as e:
|
||||
print(f" [{i+1}/{len(posts)}] {code}: error: {e}")
|
||||
|
||||
time.sleep(SLEEP_BETWEEN)
|
||||
|
||||
conn.close()
|
||||
print(f"\nDone! Tagged {tagged_count} posts out of {len(posts)}.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
114
scripts/fix_special_dirs.py
Normal file
114
scripts/fix_special_dirs.py
Normal file
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Rename special directories (manual_*, PPV, import_*) to use date format.
|
||||
For multiple posts on same date, use suffixes: YYYY-MM-DD, YYYY-MM-DD_2, etc.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
sys.path.insert(0, '/opt/media-downloader')
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description='Fix special directory names')
|
||||
parser.add_argument('--dry-run', action='store_true', help='Show changes without making them')
|
||||
args = parser.parse_args()
|
||||
|
||||
db = UnifiedDatabase()
|
||||
base_path = Path('/opt/immich/paid/fansly/puffinasmr')
|
||||
|
||||
stats = {'renamed': 0, 'db_updated': 0, 'errors': 0}
|
||||
|
||||
# Find all special directories grouped by date
|
||||
date_dirs = defaultdict(list)
|
||||
|
||||
for date_dir in base_path.iterdir():
|
||||
if not date_dir.is_dir():
|
||||
continue
|
||||
date_str = date_dir.name
|
||||
|
||||
for post_dir in date_dir.iterdir():
|
||||
if not post_dir.is_dir():
|
||||
continue
|
||||
name = post_dir.name
|
||||
# Check if it's a special directory (not a numeric post_id)
|
||||
if name.startswith('manual_') or name.startswith('import_') or name == 'PPV':
|
||||
date_dirs[date_str].append(post_dir)
|
||||
|
||||
# Process each date
|
||||
for date_str, dirs in sorted(date_dirs.items()):
|
||||
# Check if a date-named directory already exists
|
||||
existing_date_dir = base_path / date_str / date_str
|
||||
suffix = 1
|
||||
if existing_date_dir.exists():
|
||||
# Find next available suffix
|
||||
while (base_path / date_str / f"{date_str}_{suffix + 1}").exists():
|
||||
suffix += 1
|
||||
suffix += 1
|
||||
|
||||
for old_dir in sorted(dirs, key=lambda d: d.name):
|
||||
# Determine new name
|
||||
if suffix == 1:
|
||||
new_name = date_str
|
||||
else:
|
||||
new_name = f"{date_str}_{suffix}"
|
||||
|
||||
new_dir = old_dir.parent / new_name
|
||||
|
||||
# Skip if target exists
|
||||
if new_dir.exists():
|
||||
print(f" SKIP (exists): {old_dir} -> {new_dir}")
|
||||
suffix += 1
|
||||
continue
|
||||
|
||||
print(f" {old_dir.name} -> {new_name}")
|
||||
|
||||
if not args.dry_run:
|
||||
try:
|
||||
old_dir.rename(new_dir)
|
||||
stats['renamed'] += 1
|
||||
|
||||
# Update database paths
|
||||
with db.get_connection(for_write=True) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
UPDATE paid_content_attachments
|
||||
SET local_path = REPLACE(local_path, ?, ?)
|
||||
WHERE local_path LIKE ?
|
||||
""", (str(old_dir), str(new_dir), f"%{old_dir}%"))
|
||||
stats['db_updated'] += cursor.rowcount
|
||||
|
||||
# Also update posts table if post_id matches the old dir name
|
||||
old_name = old_dir.name
|
||||
if old_name.startswith('manual_'):
|
||||
cursor.execute("""
|
||||
UPDATE paid_content_posts
|
||||
SET post_id = ?
|
||||
WHERE post_id = ?
|
||||
""", (new_name, old_name))
|
||||
|
||||
conn.commit()
|
||||
except Exception as e:
|
||||
print(f" ERROR: {e}")
|
||||
stats['errors'] += 1
|
||||
|
||||
suffix += 1
|
||||
|
||||
print("\n" + "=" * 50)
|
||||
print("SUMMARY")
|
||||
print("=" * 50)
|
||||
print(f"Directories renamed: {stats['renamed']}")
|
||||
print(f"DB records updated: {stats['db_updated']}")
|
||||
print(f"Errors: {stats['errors']}")
|
||||
|
||||
if args.dry_run:
|
||||
print("\n(Dry run - no changes made)")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
103
scripts/generate-embeddings.py
Executable file
103
scripts/generate-embeddings.py
Executable file
@@ -0,0 +1,103 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Nightly embedding generation script
|
||||
Run via systemd timer to index new media files
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, '/opt/media-downloader')
|
||||
|
||||
# Bootstrap database backend (must be before any sqlite3 imports)
|
||||
import modules.db_bootstrap # noqa: E402,F401
|
||||
|
||||
from modules.universal_logger import get_logger
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
from modules.semantic_search import SemanticSearch
|
||||
|
||||
logger = get_logger('EmbeddingGenerator')
|
||||
|
||||
|
||||
def generate_embeddings(db):
|
||||
"""Generate embeddings for files that don't have them yet"""
|
||||
logger.info("=== Embedding Generation ===")
|
||||
|
||||
try:
|
||||
semantic = SemanticSearch(db)
|
||||
|
||||
# Get current stats
|
||||
stats = semantic.get_embedding_stats()
|
||||
logger.info(f"Current stats: {stats['total_embeddings']} embeddings, "
|
||||
f"{stats['missing_embeddings']} missing, "
|
||||
f"{stats['coverage_percent']}% coverage")
|
||||
|
||||
if stats['missing_embeddings'] == 0:
|
||||
logger.info("All files already have embeddings, nothing to do")
|
||||
return 0
|
||||
|
||||
# Process in batches of 1000 files
|
||||
batch_size = 1000
|
||||
total_processed = 0
|
||||
max_batches = 10 # Process up to 10000 files per night
|
||||
|
||||
for batch_num in range(max_batches):
|
||||
if stats['missing_embeddings'] == 0:
|
||||
break
|
||||
|
||||
logger.info(f"Processing batch {batch_num + 1}/{max_batches} "
|
||||
f"({stats['missing_embeddings']} files remaining)")
|
||||
|
||||
def progress_callback(processed, total, current_file):
|
||||
if processed % 100 == 0:
|
||||
logger.info(f" Progress: {processed}/{total} - {current_file}")
|
||||
|
||||
results = semantic.generate_embeddings_batch(
|
||||
limit=batch_size,
|
||||
progress_callback=progress_callback
|
||||
)
|
||||
|
||||
total_processed += results['success']
|
||||
logger.info(f"Batch {batch_num + 1} complete: "
|
||||
f"{results['success']} success, "
|
||||
f"{results['errors']} errors, "
|
||||
f"{results['skipped']} skipped")
|
||||
|
||||
# Update stats for next iteration
|
||||
stats = semantic.get_embedding_stats()
|
||||
|
||||
# Final stats
|
||||
final_stats = semantic.get_embedding_stats()
|
||||
logger.info(f"Embedding generation complete: {total_processed} new embeddings generated")
|
||||
logger.info(f"Final coverage: {final_stats['coverage_percent']}% "
|
||||
f"({final_stats['total_embeddings']}/{final_stats['total_files']} files)")
|
||||
|
||||
return total_processed
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Embedding generation failed: {e}")
|
||||
return 0
|
||||
|
||||
|
||||
def main():
|
||||
"""Generate embeddings for files that don't have them yet"""
|
||||
logger.info("Starting nightly embedding generation")
|
||||
|
||||
try:
|
||||
# Initialize database
|
||||
db = UnifiedDatabase()
|
||||
|
||||
# Generate embeddings
|
||||
embeddings_processed = generate_embeddings(db)
|
||||
|
||||
logger.info(f"=== Nightly indexing complete ===")
|
||||
logger.info(f" Embeddings generated: {embeddings_processed}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Nightly indexing failed: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
18
scripts/get-api-token.sh
Executable file
18
scripts/get-api-token.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
# Get API token for claude_test account and save to /tmp/api_token.txt
|
||||
# Usage: /opt/media-downloader/scripts/get-api-token.sh
|
||||
#
|
||||
# After running this, use api-call.sh to make authenticated requests:
|
||||
# /opt/media-downloader/scripts/api-call.sh "/api/video-queue?limit=2"
|
||||
|
||||
TOKEN=$(curl -s -X POST "http://localhost:8000/api/auth/login" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"username": "claude_test", "password": "ClaudeTest2025Secure"}' | jq -r '.token')
|
||||
|
||||
if [ "$TOKEN" != "null" ] && [ -n "$TOKEN" ]; then
|
||||
echo "$TOKEN" > /tmp/api_token.txt
|
||||
echo "Token saved to /tmp/api_token.txt"
|
||||
else
|
||||
echo "Failed to get token"
|
||||
exit 1
|
||||
fi
|
||||
45
scripts/get-podchaser-token.sh
Executable file
45
scripts/get-podchaser-token.sh
Executable file
@@ -0,0 +1,45 @@
|
||||
#!/bin/bash
|
||||
# Helper script to exchange Podchaser client credentials for an access token
|
||||
|
||||
if [ -z "$1" ] || [ -z "$2" ]; then
|
||||
echo "Usage: $0 <client_id> <client_secret>"
|
||||
echo ""
|
||||
echo "Example:"
|
||||
echo " $0 'your-client-id' 'your-client-secret'"
|
||||
echo ""
|
||||
echo "Get your credentials from: https://www.podchaser.com/creators/dashboard/api"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CLIENT_ID="$1"
|
||||
CLIENT_SECRET="$2"
|
||||
|
||||
echo "Exchanging Podchaser credentials for access token..."
|
||||
|
||||
RESPONSE=$(curl -s -X POST "https://api.podchaser.com/graphql" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"query\": \"mutation { requestAccessToken(input: { grant_type: CLIENT_CREDENTIALS client_id: \\\"$CLIENT_ID\\\" client_secret: \\\"$CLIENT_SECRET\\\" }) { access_token } }\"}")
|
||||
|
||||
# Check for errors
|
||||
if echo "$RESPONSE" | jq -e '.errors' > /dev/null 2>&1; then
|
||||
echo "❌ Error getting access token:"
|
||||
echo "$RESPONSE" | jq -r '.errors[].message'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Extract access token
|
||||
ACCESS_TOKEN=$(echo "$RESPONSE" | jq -r '.data.requestAccessToken.access_token')
|
||||
|
||||
if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then
|
||||
echo "❌ Failed to get access token. Response:"
|
||||
echo "$RESPONSE" | jq '.'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "✅ Success! Your Podchaser access token:"
|
||||
echo ""
|
||||
echo "$ACCESS_TOKEN"
|
||||
echo ""
|
||||
echo "This token is valid for 1 year."
|
||||
echo "Copy this token and paste it into Configuration > Appearances > Podchaser API Key"
|
||||
674
scripts/install.sh
Executable file
674
scripts/install.sh
Executable file
@@ -0,0 +1,674 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Media Downloader Installer Script
|
||||
# Version: 13.13.1
|
||||
# Installs to /opt/media-downloader
|
||||
|
||||
set -e
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Installation directory
|
||||
INSTALL_DIR="/opt/media-downloader"
|
||||
SERVICE_NAME="media-downloader"
|
||||
CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
|
||||
echo -e "${GREEN}╔════════════════════════════════════════════════╗${NC}"
|
||||
echo -e "${GREEN}║ Media Downloader Installer v13.13.1 ║${NC}"
|
||||
echo -e "${GREEN}╚════════════════════════════════════════════════╝${NC}"
|
||||
echo ""
|
||||
|
||||
# Check if running as root
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
echo -e "${RED}This script must be run as root (use sudo)${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get the actual user who ran sudo
|
||||
ACTUAL_USER="${SUDO_USER:-$USER}"
|
||||
ACTUAL_HOME=$(getent passwd "$ACTUAL_USER" | cut -d: -f6)
|
||||
|
||||
echo -e "${YELLOW}Installation Settings:${NC}"
|
||||
echo " Install directory: $INSTALL_DIR"
|
||||
echo " Service name: $SERVICE_NAME"
|
||||
echo " User: $ACTUAL_USER"
|
||||
echo " Source: $CURRENT_DIR"
|
||||
echo ""
|
||||
|
||||
read -p "Continue with installation? (y/n) " -n 1 -r
|
||||
echo
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
echo "Installation cancelled"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Stop services if they exist
|
||||
echo -e "${YELLOW}Stopping existing services...${NC}"
|
||||
systemctl stop $SERVICE_NAME 2>/dev/null || true
|
||||
systemctl stop media-downloader-api 2>/dev/null || true
|
||||
systemctl stop media-downloader-frontend 2>/dev/null || true
|
||||
systemctl stop xvfb-media-downloader 2>/dev/null || true
|
||||
|
||||
# Create installation directory
|
||||
echo -e "${GREEN}Creating installation directory...${NC}"
|
||||
mkdir -p "$INSTALL_DIR"
|
||||
|
||||
# Copy files
|
||||
echo -e "${GREEN}Copying files...${NC}"
|
||||
rsync -a --exclude='.git' --exclude='node_modules' --exclude='venv' --exclude='__pycache__' \
|
||||
--exclude='.playwright' --exclude='dist' --exclude='*.pyc' \
|
||||
"$CURRENT_DIR/" "$INSTALL_DIR/"
|
||||
|
||||
# Create required directories
|
||||
echo -e "${GREEN}Creating required directories...${NC}"
|
||||
mkdir -p "$INSTALL_DIR/logs"
|
||||
mkdir -p "$INSTALL_DIR/database"
|
||||
mkdir -p "$INSTALL_DIR/cookies"
|
||||
mkdir -p "$INSTALL_DIR/sessions"
|
||||
mkdir -p "$INSTALL_DIR/config"
|
||||
mkdir -p "$INSTALL_DIR/data"
|
||||
mkdir -p "$INSTALL_DIR/data/face_references" # Face recognition reference images
|
||||
mkdir -p "$INSTALL_DIR/data/cache/profile_images" # Cached creator avatars/banners
|
||||
mkdir -p "/opt/immich/review" # Face recognition review queue
|
||||
mkdir -p "/opt/immich/recycle" # Recycle bin
|
||||
mkdir -p "/var/log/media-downloader" # System log directory
|
||||
|
||||
# Set permissions
|
||||
echo -e "${GREEN}Setting permissions...${NC}"
|
||||
chown -R "$ACTUAL_USER:$ACTUAL_USER" "$INSTALL_DIR"
|
||||
chmod +x "$INSTALL_DIR/media-downloader.py"
|
||||
chmod +x "$INSTALL_DIR/scripts/"*.sh
|
||||
chmod +x "$INSTALL_DIR/scripts/"*.py 2>/dev/null || true
|
||||
|
||||
# Install system dependencies
|
||||
echo -e "${GREEN}Installing system dependencies...${NC}"
|
||||
apt-get update > /dev/null 2>&1
|
||||
apt-get install -y cmake build-essential libopenblas-dev liblapack-dev \
|
||||
ffmpeg redis-server xvfb nodejs npm \
|
||||
libheif-examples imagemagick \
|
||||
postgresql postgresql-contrib libpq-dev > /dev/null 2>&1
|
||||
|
||||
# Start Redis if not running
|
||||
systemctl enable redis-server
|
||||
systemctl start redis-server
|
||||
|
||||
# Setup PostgreSQL
|
||||
echo -e "${GREEN}Setting up PostgreSQL...${NC}"
|
||||
systemctl enable postgresql
|
||||
systemctl start postgresql
|
||||
sudo -u postgres psql -tc "SELECT 1 FROM pg_roles WHERE rolname='media_downloader'" | grep -q 1 || \
|
||||
sudo -u postgres psql -c "CREATE USER media_downloader WITH PASSWORD 'changeme';"
|
||||
sudo -u postgres psql -tc "SELECT 1 FROM pg_database WHERE datname='media_downloader'" | grep -q 1 || \
|
||||
sudo -u postgres createdb -O media_downloader media_downloader
|
||||
echo -e "${GREEN}✓ PostgreSQL configured (user: media_downloader, db: media_downloader)${NC}"
|
||||
echo -e "${YELLOW}⚠ Remember to update DATABASE_URL in .env with the correct password${NC}"
|
||||
|
||||
# Create virtual environment
|
||||
echo -e "${GREEN}Creating Python virtual environment...${NC}"
|
||||
rm -rf "$INSTALL_DIR/venv" 2>/dev/null || true
|
||||
python3 -m venv "$INSTALL_DIR/venv"
|
||||
chown -R "$ACTUAL_USER:$ACTUAL_USER" "$INSTALL_DIR/venv"
|
||||
|
||||
# Install Python dependencies
|
||||
echo -e "${GREEN}Installing Python dependencies from requirements.txt...${NC}"
|
||||
sudo -u "$ACTUAL_USER" "$INSTALL_DIR/venv/bin/python" -m pip install --upgrade pip
|
||||
sudo -u "$ACTUAL_USER" "$INSTALL_DIR/venv/bin/python" -m pip install -r "$INSTALL_DIR/requirements.txt"
|
||||
|
||||
# Install playwright browsers
|
||||
echo -e "${GREEN}Installing Playwright browsers...${NC}"
|
||||
sudo -u "$ACTUAL_USER" bash -c "cd '$INSTALL_DIR' && '$INSTALL_DIR/venv/bin/python' -m playwright install chromium firefox"
|
||||
|
||||
# Install frontend dependencies
|
||||
echo -e "${GREEN}Installing frontend dependencies...${NC}"
|
||||
cd "$INSTALL_DIR/web/frontend"
|
||||
sudo -u "$ACTUAL_USER" npm install
|
||||
sudo -u "$ACTUAL_USER" npm run build
|
||||
|
||||
PYTHON_BIN="$INSTALL_DIR/venv/bin/python"
|
||||
|
||||
# ============================================================================
|
||||
# CHECK DEPENDENCIES
|
||||
# ============================================================================
|
||||
|
||||
echo ""
|
||||
echo -e "${BLUE}Checking Dependencies...${NC}"
|
||||
|
||||
# Check for FlareSolverr
|
||||
if command -v docker &> /dev/null; then
|
||||
if docker ps | grep -q flaresolverr; then
|
||||
echo -e "${GREEN}✓ FlareSolverr container is running${NC}"
|
||||
else
|
||||
echo -e "${YELLOW}⚠ FlareSolverr container not found${NC}"
|
||||
read -p "Install FlareSolverr Docker container now? (recommended) (y/n) " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
docker run -d \
|
||||
--name flaresolverr \
|
||||
-p 8191:8191 \
|
||||
-e LOG_LEVEL=info \
|
||||
--restart unless-stopped \
|
||||
ghcr.io/flaresolverr/flaresolverr:latest
|
||||
echo -e "${GREEN}✓ FlareSolverr installed on port 8191${NC}"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo -e "${YELLOW}⚠ Docker not found. FlareSolverr requires Docker for Cloudflare bypass.${NC}"
|
||||
fi
|
||||
|
||||
# ============================================================================
|
||||
# CREATE SYSTEMD SERVICES
|
||||
# ============================================================================
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}Creating systemd services...${NC}"
|
||||
|
||||
# 1. Xvfb Virtual Display Service
|
||||
cat > "/etc/systemd/system/xvfb-media-downloader.service" << EOF
|
||||
[Unit]
|
||||
Description=Xvfb Virtual Display for Media Downloader
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=root
|
||||
ExecStart=/usr/bin/Xvfb :100 -screen 0 1920x1080x24 -nolisten tcp
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
# 2. Main Scheduler Service
|
||||
cat > "/etc/systemd/system/$SERVICE_NAME.service" << EOF
|
||||
[Unit]
|
||||
Description=Media Downloader Scheduler Service
|
||||
After=network.target xvfb-media-downloader.service redis-server.service
|
||||
Wants=xvfb-media-downloader.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=$ACTUAL_USER
|
||||
Group=$ACTUAL_USER
|
||||
WorkingDirectory=$INSTALL_DIR
|
||||
ExecStart=$PYTHON_BIN $INSTALL_DIR/media-downloader.py --scheduler
|
||||
Restart=on-failure
|
||||
RestartSec=30
|
||||
StandardOutput=append:$INSTALL_DIR/logs/service.log
|
||||
StandardError=append:$INSTALL_DIR/logs/service.log
|
||||
|
||||
Environment="PYTHONUNBUFFERED=1"
|
||||
Environment="PYTHONDONTWRITEBYTECODE=1"
|
||||
Environment="DISPLAY=:100"
|
||||
|
||||
LimitNOFILE=65536
|
||||
Nice=10
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
# 3. Web API Service
|
||||
cat > "/etc/systemd/system/media-downloader-api.service" << EOF
|
||||
[Unit]
|
||||
Description=Media Downloader Web API
|
||||
After=network.target redis-server.service
|
||||
Wants=redis-server.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=root
|
||||
Group=root
|
||||
WorkingDirectory=$INSTALL_DIR/web/backend
|
||||
ExecStart=$PYTHON_BIN $INSTALL_DIR/web/backend/api.py
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
Environment="PYTHONUNBUFFERED=1"
|
||||
Environment="PYTHONDONTWRITEBYTECODE=1"
|
||||
|
||||
LimitNOFILE=65536
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
# 4. Web Frontend Service (Production - serves pre-built static files)
|
||||
# Note: For development with hot-reload, run: cd web/frontend && npm run dev
|
||||
cat > "/etc/systemd/system/media-downloader-frontend.service" << EOF
|
||||
[Unit]
|
||||
Description=Media Downloader Web Frontend (Production)
|
||||
After=network.target media-downloader-api.service
|
||||
Wants=media-downloader-api.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=root
|
||||
Group=root
|
||||
WorkingDirectory=$INSTALL_DIR/web/frontend
|
||||
ExecStart=/usr/bin/npm run preview -- --host 0.0.0.0 --port 5173
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
Environment="NODE_ENV=production"
|
||||
|
||||
LimitNOFILE=65536
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
# 4b. Development Frontend Service (optional - hot reload)
|
||||
cat > "/etc/systemd/system/media-downloader-frontend-dev.service" << EOF
|
||||
[Unit]
|
||||
Description=Media Downloader Web Frontend (Development - Hot Reload)
|
||||
After=network.target media-downloader-api.service
|
||||
Wants=media-downloader-api.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=$ACTUAL_USER
|
||||
Group=$ACTUAL_USER
|
||||
WorkingDirectory=$INSTALL_DIR/web/frontend
|
||||
ExecStart=/usr/bin/npm run dev -- --host 0.0.0.0 --port 5173
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
Environment="NODE_ENV=development"
|
||||
|
||||
LimitNOFILE=65536
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
# 5. Thumbnail Cache Builder Service + Timer
|
||||
cat > "/etc/systemd/system/media-cache-builder.service" << EOF
|
||||
[Unit]
|
||||
Description=Media Thumbnail and Metadata Cache Builder
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User=root
|
||||
WorkingDirectory=$INSTALL_DIR
|
||||
ExecStart=$PYTHON_BIN $INSTALL_DIR/modules/thumbnail_cache_builder.py
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
TimeoutStartSec=3600
|
||||
|
||||
Nice=19
|
||||
IOSchedulingClass=idle
|
||||
CPUQuota=50%
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
cat > "/etc/systemd/system/media-cache-builder.timer" << EOF
|
||||
[Unit]
|
||||
Description=Daily Media Cache Builder Timer
|
||||
Requires=media-cache-builder.service
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 03:00:00
|
||||
Persistent=true
|
||||
RandomizedDelaySec=30min
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
EOF
|
||||
|
||||
# 6. Embedding Generator Service + Timer
|
||||
cat > "/etc/systemd/system/media-embedding-generator.service" << EOF
|
||||
[Unit]
|
||||
Description=Media Downloader Embedding Generator (CLIP)
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User=root
|
||||
WorkingDirectory=$INSTALL_DIR
|
||||
ExecStart=$PYTHON_BIN $INSTALL_DIR/scripts/generate-embeddings.py
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
TimeoutStartSec=3600
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
cat > "/etc/systemd/system/media-embedding-generator.timer" << EOF
|
||||
[Unit]
|
||||
Description=Nightly Media Embedding Generation Timer
|
||||
Requires=media-embedding-generator.service
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 03:00:00
|
||||
RandomizedDelaySec=1800
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
EOF
|
||||
|
||||
# 7. Celebrity Enrichment Service + Timer
|
||||
cat > "/etc/systemd/system/media-celebrity-enrichment.service" << EOF
|
||||
[Unit]
|
||||
Description=Media Downloader Celebrity Metadata Enrichment
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User=root
|
||||
WorkingDirectory=$INSTALL_DIR
|
||||
ExecStart=$PYTHON_BIN $INSTALL_DIR/scripts/enrich_celebrity_metadata.py
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
TimeoutStartSec=3600
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
cat > "/etc/systemd/system/media-celebrity-enrichment.timer" << EOF
|
||||
[Unit]
|
||||
Description=Nightly Celebrity Metadata Enrichment Timer
|
||||
Requires=media-celebrity-enrichment.service
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 04:00:00
|
||||
RandomizedDelaySec=300
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
EOF
|
||||
|
||||
# 8. Plex Matching Service + Timer
|
||||
cat > "/etc/systemd/system/plex-match.service" << EOF
|
||||
[Unit]
|
||||
Description=Match appearances to Plex library
|
||||
After=media-downloader-api.service
|
||||
Requires=media-downloader-api.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=$INSTALL_DIR/scripts/plex-match.sh
|
||||
User=root
|
||||
EOF
|
||||
|
||||
cat > "/etc/systemd/system/plex-match.timer" << EOF
|
||||
[Unit]
|
||||
Description=Run Plex matching twice daily
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 06:00:00
|
||||
OnCalendar=*-*-* 18:00:00
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
EOF
|
||||
|
||||
# 9. Database Cleanup Service + Timer
|
||||
cat > "/etc/systemd/system/media-downloader-db-cleanup.service" << EOF
|
||||
[Unit]
|
||||
Description=Media Downloader Database Cleanup
|
||||
After=network.target media-downloader-api.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User=root
|
||||
WorkingDirectory=$INSTALL_DIR
|
||||
ExecStart=$INSTALL_DIR/scripts/db-cleanup.sh
|
||||
StandardOutput=append:$INSTALL_DIR/logs/db-cleanup.log
|
||||
StandardError=append:$INSTALL_DIR/logs/db-cleanup.log
|
||||
MemoryMax=512M
|
||||
CPUQuota=50%
|
||||
Restart=no
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
cat > "/etc/systemd/system/media-downloader-db-cleanup.timer" << EOF
|
||||
[Unit]
|
||||
Description=Media Downloader Database Cleanup Timer
|
||||
Requires=media-downloader-db-cleanup.service
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 03:00:00
|
||||
Persistent=true
|
||||
OnBootSec=5min
|
||||
RandomizedDelaySec=10min
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
EOF
|
||||
|
||||
# Create command-line wrapper
|
||||
echo -e "${GREEN}Creating command-line wrapper...${NC}"
|
||||
cat > "/usr/local/bin/media-downloader" << EOF
|
||||
#!/bin/bash
|
||||
cd $INSTALL_DIR
|
||||
export DISPLAY=:100
|
||||
$PYTHON_BIN $INSTALL_DIR/media-downloader.py "\$@"
|
||||
EOF
|
||||
chmod +x "/usr/local/bin/media-downloader"
|
||||
|
||||
# Copy config if it doesn't exist
|
||||
if [ ! -f "$INSTALL_DIR/config/settings.json" ]; then
|
||||
echo -e "${GREEN}Copying default configuration...${NC}"
|
||||
if [ -f "$INSTALL_DIR/config/settings.example.json" ]; then
|
||||
cp "$INSTALL_DIR/config/settings.example.json" "$INSTALL_DIR/config/settings.json"
|
||||
fi
|
||||
chown -R "$ACTUAL_USER:$ACTUAL_USER" "$INSTALL_DIR/config"
|
||||
chmod 600 "$INSTALL_DIR/config/settings.json" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Reload systemd
|
||||
echo -e "${GREEN}Reloading systemd...${NC}"
|
||||
systemctl daemon-reload
|
||||
|
||||
# ============================================================================
|
||||
# ENABLE AND START SERVICES
|
||||
# ============================================================================
|
||||
|
||||
echo ""
|
||||
echo -e "${BLUE}Service Configuration${NC}"
|
||||
|
||||
read -p "Enable and start all services? (y/n) " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
echo -e "${GREEN}Enabling services...${NC}"
|
||||
|
||||
# Core services
|
||||
systemctl enable xvfb-media-downloader.service
|
||||
systemctl enable $SERVICE_NAME.service
|
||||
systemctl enable media-downloader-api.service
|
||||
|
||||
# Timers
|
||||
systemctl enable media-cache-builder.timer
|
||||
systemctl enable media-embedding-generator.timer
|
||||
systemctl enable media-celebrity-enrichment.timer
|
||||
systemctl enable plex-match.timer
|
||||
systemctl enable media-downloader-db-cleanup.timer
|
||||
|
||||
echo -e "${GREEN}Starting services...${NC}"
|
||||
|
||||
# Start in order
|
||||
systemctl start xvfb-media-downloader.service
|
||||
sleep 2
|
||||
systemctl start media-downloader-api.service
|
||||
sleep 2
|
||||
systemctl start $SERVICE_NAME.service
|
||||
|
||||
# Start timers
|
||||
systemctl start media-cache-builder.timer
|
||||
systemctl start media-embedding-generator.timer
|
||||
systemctl start media-celebrity-enrichment.timer
|
||||
systemctl start plex-match.timer
|
||||
systemctl start media-downloader-db-cleanup.timer
|
||||
|
||||
echo -e "${GREEN}✓ All services started${NC}"
|
||||
fi
|
||||
|
||||
# ============================================================================
|
||||
# OPTIONAL: NGINX REVERSE PROXY
|
||||
# ============================================================================
|
||||
|
||||
echo ""
|
||||
if command -v nginx &> /dev/null; then
|
||||
read -p "Configure nginx reverse proxy? (recommended for production) (y/n) " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
echo -e "${GREEN}Creating nginx configuration...${NC}"
|
||||
|
||||
cat > "/etc/nginx/sites-available/media-downloader" << 'NGINX_EOF'
|
||||
# Media Downloader Nginx Configuration
|
||||
# Reverse proxy for API (8000) and Frontend (5173)
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
server_name _; # Change to your domain
|
||||
|
||||
# Frontend (Vite)
|
||||
location / {
|
||||
proxy_pass http://127.0.0.1:5173;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection 'upgrade';
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_cache_bypass $http_upgrade;
|
||||
}
|
||||
|
||||
# API Backend
|
||||
location /api/ {
|
||||
proxy_pass http://127.0.0.1:8000/api/;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# WebSocket support
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
|
||||
# Timeouts for long-running requests
|
||||
proxy_connect_timeout 60s;
|
||||
proxy_send_timeout 300s;
|
||||
proxy_read_timeout 300s;
|
||||
}
|
||||
|
||||
# WebSocket endpoint
|
||||
location /ws {
|
||||
proxy_pass http://127.0.0.1:8000/ws;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_read_timeout 86400;
|
||||
}
|
||||
|
||||
# Media files (if serving directly)
|
||||
location /media/ {
|
||||
alias /opt/immich/media/;
|
||||
autoindex off;
|
||||
}
|
||||
|
||||
# Thumbnails
|
||||
location /thumbnails/ {
|
||||
proxy_pass http://127.0.0.1:8000/api/thumbnails/;
|
||||
proxy_cache_valid 200 1d;
|
||||
}
|
||||
|
||||
# Increase max upload size for imports
|
||||
client_max_body_size 500M;
|
||||
}
|
||||
NGINX_EOF
|
||||
|
||||
# Enable site
|
||||
ln -sf /etc/nginx/sites-available/media-downloader /etc/nginx/sites-enabled/ 2>/dev/null || true
|
||||
|
||||
# Test and reload nginx
|
||||
if nginx -t 2>/dev/null; then
|
||||
systemctl reload nginx
|
||||
echo -e "${GREEN}✓ Nginx configured and reloaded${NC}"
|
||||
else
|
||||
echo -e "${YELLOW}⚠ Nginx config has errors - please check manually${NC}"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo -e "${YELLOW}Note: nginx not installed. For production, consider:${NC}"
|
||||
echo " sudo apt install nginx"
|
||||
echo " Then re-run installer or manually configure reverse proxy"
|
||||
fi
|
||||
|
||||
# ============================================================================
|
||||
# COMPLETION MESSAGE
|
||||
# ============================================================================
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}╔════════════════════════════════════════════════╗${NC}"
|
||||
echo -e "${GREEN}║ Installation Complete! ║${NC}"
|
||||
echo -e "${GREEN}╚════════════════════════════════════════════════╝${NC}"
|
||||
echo ""
|
||||
echo -e "${BLUE}Installation location:${NC} $INSTALL_DIR"
|
||||
echo -e "${BLUE}Configuration file:${NC} $INSTALL_DIR/config/settings.json"
|
||||
echo -e "${BLUE}Database directory:${NC} $INSTALL_DIR/database"
|
||||
echo -e "${BLUE}Logs directory:${NC} $INSTALL_DIR/logs"
|
||||
echo ""
|
||||
echo -e "${YELLOW}Services:${NC}"
|
||||
echo " media-downloader - Main scheduler service"
|
||||
echo " media-downloader-api - Web API (port 8000)"
|
||||
echo " media-downloader-frontend - Web UI production (port 5173)"
|
||||
echo " media-downloader-frontend-dev - Web UI development with hot-reload"
|
||||
echo " xvfb-media-downloader - Virtual display for browser automation"
|
||||
echo ""
|
||||
echo -e "${YELLOW}Scheduled Tasks (timers):${NC}"
|
||||
echo " media-cache-builder - Thumbnail cache (daily 3 AM)"
|
||||
echo " media-embedding-generator - CLIP embeddings (daily 3 AM)"
|
||||
echo " media-downloader-db-cleanup- Database cleanup (daily 3 AM)"
|
||||
echo " media-celebrity-enrichment - Celebrity metadata (daily 4 AM)"
|
||||
echo " plex-match - Plex library matching (6 AM, 6 PM)"
|
||||
echo ""
|
||||
echo -e "${YELLOW}Commands:${NC}"
|
||||
echo " media-downloader - Run manual download"
|
||||
echo " media-downloader --scheduler - Run scheduler"
|
||||
echo " media-downloader --scheduler-status - Check scheduler status"
|
||||
echo " media-downloader --platform instagram - Download specific platform"
|
||||
echo ""
|
||||
echo -e "${YELLOW}Service Management:${NC}"
|
||||
echo " sudo systemctl status media-downloader - Check status"
|
||||
echo " sudo systemctl restart media-downloader - Restart scheduler"
|
||||
echo " sudo systemctl restart media-downloader-api- Restart API"
|
||||
echo " sudo journalctl -u media-downloader -f - View logs"
|
||||
echo ""
|
||||
echo -e "${YELLOW}Web Interface:${NC}"
|
||||
echo " API: http://localhost:8000"
|
||||
echo " Frontend: http://localhost:5173"
|
||||
echo ""
|
||||
echo -e "${YELLOW}Development Mode:${NC}"
|
||||
echo " # Switch to development frontend (with hot-reload):"
|
||||
echo " sudo systemctl stop media-downloader-frontend"
|
||||
echo " sudo systemctl start media-downloader-frontend-dev"
|
||||
echo ""
|
||||
echo -e "${YELLOW}To uninstall:${NC}"
|
||||
echo " sudo $INSTALL_DIR/scripts/uninstall.sh"
|
||||
307
scripts/mds
Executable file
307
scripts/mds
Executable file
@@ -0,0 +1,307 @@
|
||||
#!/bin/bash
|
||||
# mds - Media Downloader Services manager
|
||||
# Usage: mds [command] [service(s)...]
|
||||
#
|
||||
# Commands:
|
||||
# status - Show status of services (default)
|
||||
# start - Start service(s)
|
||||
# stop - Stop service(s)
|
||||
# restart - Restart service(s)
|
||||
# logs - Show recent logs for a service
|
||||
#
|
||||
# Services:
|
||||
# all - All services
|
||||
# scheduler - media-downloader (scheduler)
|
||||
# api - media-downloader-api
|
||||
# frontend - media-downloader-frontend
|
||||
# xvfb - xvfb-media-downloader
|
||||
# proxy - unified-proxy (Docker)
|
||||
# cache - media-cache-builder
|
||||
# enrich - media-celebrity-enrichment
|
||||
# embeddings - media-embedding-generator
|
||||
# dbcleanup - media-downloader-db-cleanup
|
||||
# backup - cloud-backup-sync
|
||||
# backupui - backup-central (web UI)
|
||||
#
|
||||
# Examples:
|
||||
# mds # status of all services
|
||||
# mds status # same
|
||||
# mds restart api # restart just the API
|
||||
# mds restart api frontend # restart API and frontend
|
||||
# mds restart all # restart all services
|
||||
# mds stop scheduler # stop the scheduler
|
||||
# mds logs scheduler # show scheduler logs
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Color codes
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
CYAN='\033[0;36m'
|
||||
BOLD='\033[1m'
|
||||
DIM='\033[2m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Service name mapping
|
||||
declare -A SERVICE_MAP=(
|
||||
[scheduler]="media-downloader"
|
||||
[api]="media-downloader-api"
|
||||
[frontend]="media-downloader-frontend"
|
||||
[xvfb]="xvfb-media-downloader"
|
||||
[cache]="media-cache-builder"
|
||||
[enrich]="media-celebrity-enrichment"
|
||||
[embeddings]="media-embedding-generator"
|
||||
[dbcleanup]="media-downloader-db-cleanup"
|
||||
[backup]="cloud-backup-sync"
|
||||
[backupui]="backup-central"
|
||||
)
|
||||
|
||||
ALL_ALIASES=(scheduler api frontend xvfb proxy cache enrich embeddings dbcleanup backup backupui)
|
||||
|
||||
resolve_service() {
|
||||
local alias="$1"
|
||||
if [[ "$alias" == "proxy" ]]; then
|
||||
echo "proxy"
|
||||
elif [[ -n "${SERVICE_MAP[$alias]+x}" ]]; then
|
||||
echo "${SERVICE_MAP[$alias]}"
|
||||
else
|
||||
# Try as a literal service name
|
||||
for key in "${!SERVICE_MAP[@]}"; do
|
||||
if [[ "${SERVICE_MAP[$key]}" == "$alias" ]]; then
|
||||
echo "$alias"
|
||||
return
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
fi
|
||||
}
|
||||
|
||||
get_alias() {
|
||||
local service="$1"
|
||||
for key in "${!SERVICE_MAP[@]}"; do
|
||||
if [[ "${SERVICE_MAP[$key]}" == "$service" ]]; then
|
||||
echo "$key"
|
||||
return
|
||||
fi
|
||||
done
|
||||
echo "$service"
|
||||
}
|
||||
|
||||
print_status_line() {
|
||||
local alias="$1"
|
||||
local service="$2"
|
||||
|
||||
if [[ "$alias" == "proxy" ]]; then
|
||||
# Docker container
|
||||
local state
|
||||
state=$(docker inspect -f '{{.State.Status}}' unified-proxy 2>/dev/null || echo "not found")
|
||||
local uptime
|
||||
uptime=$(docker inspect -f '{{.State.StartedAt}}' unified-proxy 2>/dev/null || echo "")
|
||||
|
||||
local color="$RED"
|
||||
local symbol="●"
|
||||
if [[ "$state" == "running" ]]; then
|
||||
color="$GREEN"
|
||||
fi
|
||||
|
||||
printf " ${color}${symbol}${NC} %-12s %-30s %s\n" "$alias" "unified-proxy (docker)" "$state"
|
||||
return
|
||||
fi
|
||||
|
||||
local active_state sub_state
|
||||
active_state=$(systemctl show -p ActiveState --value "$service" 2>/dev/null || echo "unknown")
|
||||
sub_state=$(systemctl show -p SubState --value "$service" 2>/dev/null || echo "unknown")
|
||||
|
||||
local color="$RED"
|
||||
local symbol="●"
|
||||
case "$active_state" in
|
||||
active)
|
||||
color="$GREEN"
|
||||
;;
|
||||
inactive)
|
||||
if [[ "$sub_state" == "failed" ]]; then
|
||||
color="$RED"
|
||||
else
|
||||
color="$DIM"
|
||||
fi
|
||||
;;
|
||||
deactivating)
|
||||
color="$YELLOW"
|
||||
;;
|
||||
activating)
|
||||
color="$BLUE"
|
||||
;;
|
||||
failed)
|
||||
color="$RED"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Get memory and uptime
|
||||
local memory=""
|
||||
local pid=""
|
||||
if [[ "$active_state" == "active" ]]; then
|
||||
memory=$(systemctl show -p MemoryCurrent --value "$service" 2>/dev/null || echo "")
|
||||
pid=$(systemctl show -p MainPID --value "$service" 2>/dev/null || echo "")
|
||||
if [[ -n "$memory" && "$memory" != "[not set]" && "$memory" != "infinity" ]]; then
|
||||
# Convert bytes to human readable
|
||||
local mem_mb=$((memory / 1024 / 1024))
|
||||
if [[ $mem_mb -gt 1024 ]]; then
|
||||
local mem_gb=$((mem_mb / 1024))
|
||||
local mem_frac=$(( (mem_mb % 1024) * 10 / 1024 ))
|
||||
memory="${mem_gb}.${mem_frac}G"
|
||||
else
|
||||
memory="${mem_mb}M"
|
||||
fi
|
||||
else
|
||||
memory=""
|
||||
fi
|
||||
fi
|
||||
|
||||
local details
|
||||
if [[ "$active_state" == "inactive" && "$sub_state" == "dead" ]]; then
|
||||
details="stopped"
|
||||
elif [[ "$active_state" == "failed" || "$sub_state" == "failed" ]]; then
|
||||
details="failed"
|
||||
elif [[ "$active_state" == "active" && "$sub_state" == "running" ]]; then
|
||||
details="running"
|
||||
else
|
||||
details="$active_state ($sub_state)"
|
||||
fi
|
||||
if [[ -n "$memory" ]]; then
|
||||
details="$details ${DIM}mem: ${memory}${NC}"
|
||||
fi
|
||||
if [[ -n "$pid" && "$pid" != "0" ]]; then
|
||||
details="$details ${DIM}pid: ${pid}${NC}"
|
||||
fi
|
||||
|
||||
printf " ${color}${symbol}${NC} %-12s %-30s %b\n" "$alias" "$service" "$details"
|
||||
}
|
||||
|
||||
do_status() {
|
||||
local services=("$@")
|
||||
if [[ ${#services[@]} -eq 0 ]]; then
|
||||
services=("${ALL_ALIASES[@]}")
|
||||
fi
|
||||
|
||||
echo -e "\n${BOLD}Media Downloader Services${NC}\n"
|
||||
|
||||
for alias in "${services[@]}"; do
|
||||
local service
|
||||
service=$(resolve_service "$alias")
|
||||
if [[ -z "$service" ]]; then
|
||||
echo -e " ${RED}?${NC} ${alias} (unknown service)"
|
||||
continue
|
||||
fi
|
||||
print_status_line "$alias" "$service"
|
||||
done
|
||||
echo ""
|
||||
}
|
||||
|
||||
do_action() {
|
||||
local action="$1"
|
||||
shift
|
||||
local services=("$@")
|
||||
|
||||
if [[ ${#services[@]} -eq 0 ]]; then
|
||||
echo -e "${RED}Error: specify service(s) or 'all'${NC}"
|
||||
echo "Usage: mds $action [service(s)...]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Expand 'all'
|
||||
if [[ "${services[0]}" == "all" ]]; then
|
||||
services=("${ALL_ALIASES[@]}")
|
||||
fi
|
||||
|
||||
for alias in "${services[@]}"; do
|
||||
local service
|
||||
service=$(resolve_service "$alias")
|
||||
if [[ -z "$service" ]]; then
|
||||
echo -e " ${RED}✗${NC} ${alias}: unknown service"
|
||||
continue
|
||||
fi
|
||||
|
||||
if [[ "$alias" == "proxy" ]]; then
|
||||
case "$action" in
|
||||
start)
|
||||
echo -e " ${CYAN}▶${NC} Starting unified-proxy..."
|
||||
docker start unified-proxy 2>/dev/null && echo -e " ${GREEN}✓${NC} unified-proxy started" || echo -e " ${RED}✗${NC} Failed to start unified-proxy"
|
||||
;;
|
||||
stop)
|
||||
echo -e " ${YELLOW}■${NC} Stopping unified-proxy..."
|
||||
docker stop unified-proxy 2>/dev/null && echo -e " ${GREEN}✓${NC} unified-proxy stopped" || echo -e " ${RED}✗${NC} Failed to stop unified-proxy"
|
||||
;;
|
||||
restart)
|
||||
echo -e " ${CYAN}↻${NC} Restarting unified-proxy..."
|
||||
docker restart unified-proxy 2>/dev/null && echo -e " ${GREEN}✓${NC} unified-proxy restarted" || echo -e " ${RED}✗${NC} Failed to restart unified-proxy"
|
||||
;;
|
||||
esac
|
||||
continue
|
||||
fi
|
||||
|
||||
case "$action" in
|
||||
start)
|
||||
echo -e " ${CYAN}▶${NC} Starting ${alias} (${service})..."
|
||||
sudo systemctl start "$service" && echo -e " ${GREEN}✓${NC} ${alias} started" || echo -e " ${RED}✗${NC} Failed to start ${alias}"
|
||||
;;
|
||||
stop)
|
||||
echo -e " ${YELLOW}■${NC} Stopping ${alias} (${service})..."
|
||||
sudo systemctl stop "$service" && echo -e " ${GREEN}✓${NC} ${alias} stopped" || echo -e " ${RED}✗${NC} Failed to stop ${alias}"
|
||||
;;
|
||||
restart)
|
||||
echo -e " ${CYAN}↻${NC} Restarting ${alias} (${service})..."
|
||||
sudo systemctl restart "$service" && echo -e " ${GREEN}✓${NC} ${alias} restarted" || echo -e " ${RED}✗${NC} Failed to restart ${alias}"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
do_logs() {
|
||||
local alias="${1:-scheduler}"
|
||||
local service
|
||||
service=$(resolve_service "$alias")
|
||||
|
||||
if [[ -z "$service" ]]; then
|
||||
echo -e "${RED}Unknown service: ${alias}${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$alias" == "proxy" ]]; then
|
||||
docker logs --tail 50 unified-proxy
|
||||
return
|
||||
fi
|
||||
|
||||
sudo journalctl -u "$service" --no-pager -n 50
|
||||
}
|
||||
|
||||
# Main
|
||||
command="${1:-status}"
|
||||
shift 2>/dev/null || true
|
||||
|
||||
case "$command" in
|
||||
status|st|s)
|
||||
do_status "$@"
|
||||
;;
|
||||
start)
|
||||
do_action start "$@"
|
||||
;;
|
||||
stop)
|
||||
do_action stop "$@"
|
||||
;;
|
||||
restart|rs|r)
|
||||
do_action restart "$@"
|
||||
;;
|
||||
logs|log|l)
|
||||
do_logs "$@"
|
||||
;;
|
||||
help|--help|-h)
|
||||
head -27 "$0" | tail -25
|
||||
;;
|
||||
*)
|
||||
echo -e "${RED}Unknown command: ${command}${NC}"
|
||||
echo "Commands: status, start, stop, restart, logs"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
433
scripts/migrate_immich_to_gallery.py
Executable file
433
scripts/migrate_immich_to_gallery.py
Executable file
@@ -0,0 +1,433 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Migrate Immich assets into file_inventory and face_recognition_scans.
|
||||
|
||||
Connects to Immich PostgreSQL (via docker exec) and app PostgreSQL directly.
|
||||
Idempotent — safe to re-run. Uses ON CONFLICT DO NOTHING for file_inventory
|
||||
and checks for existing immich_import scans before inserting face data.
|
||||
|
||||
Path mapping:
|
||||
/mnt/media/evalongoria/ → /opt/immich/el/
|
||||
/mnt/media/elvideo/ → /opt/immich/elv/
|
||||
/mnt/media/md/ → SKIPPED (already in file_inventory)
|
||||
|
||||
Platform inference from subdirectories:
|
||||
evalongoria: IG→instagram, TT→tiktok, X→twitter, Discord→discord,
|
||||
Flickr→flickr, rest→unknown
|
||||
elvideo: YT→youtube, rest→unknown
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
# ── Configuration ──────────────────────────────────────────────────────────
|
||||
|
||||
APP_DB_DSN = "postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader"
|
||||
IMMICH_CONTAINER = "immich_postgres"
|
||||
IMMICH_DB = "immich"
|
||||
IMMICH_USER = "postgres"
|
||||
|
||||
BATCH_SIZE = 5000
|
||||
|
||||
EVA_PERSON_UUID = "0154270a-8c30-4fb7-b73b-3fb3acc49483"
|
||||
|
||||
# Path prefix replacements (Immich → local)
|
||||
PATH_MAP = {
|
||||
"/mnt/media/evalongoria/": "/opt/immich/el/",
|
||||
"/mnt/media/elvideo/": "/opt/immich/elv/",
|
||||
}
|
||||
|
||||
# Subdirectory → platform mapping for evalongoria
|
||||
EVALONGORIA_PLATFORM_MAP = {
|
||||
"IG": "instagram",
|
||||
"TT": "tiktok",
|
||||
"X": "twitter",
|
||||
"Discord": "discord",
|
||||
"Flickr": "flickr",
|
||||
"SC": "unknown",
|
||||
"Caps": "unknown",
|
||||
"Clips": "unknown",
|
||||
"CT": "unknown",
|
||||
"HQ": "unknown",
|
||||
"Misc": "unknown",
|
||||
}
|
||||
|
||||
# Subdirectory → platform mapping for elvideo
|
||||
ELVIDEO_PLATFORM_MAP = {
|
||||
"YT": "youtube",
|
||||
"Misc": "unknown",
|
||||
}
|
||||
|
||||
|
||||
# ── Immich DB helper ───────────────────────────────────────────────────────
|
||||
|
||||
def immich_query(sql):
|
||||
"""Run a SQL query against Immich PostgreSQL via docker exec, return rows as dicts."""
|
||||
cmd = [
|
||||
"docker", "exec", IMMICH_CONTAINER,
|
||||
"psql", "-U", IMMICH_USER, "-d", IMMICH_DB,
|
||||
"-t", "-A", "-F", "\x1f", # tuples-only, unaligned, unit-separator delimiter
|
||||
"-c", sql,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
|
||||
if result.returncode != 0:
|
||||
print(f"ERROR running Immich query: {result.stderr}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
def immich_query_rows(sql, columns):
|
||||
"""Run query, parse into list of dicts with given column names."""
|
||||
raw = immich_query(sql)
|
||||
if not raw:
|
||||
return []
|
||||
rows = []
|
||||
for line in raw.split("\n"):
|
||||
if not line.strip():
|
||||
continue
|
||||
fields = line.split("\x1f")
|
||||
if len(fields) != len(columns):
|
||||
continue
|
||||
rows.append(dict(zip(columns, fields)))
|
||||
return rows
|
||||
|
||||
|
||||
# ── Path & platform helpers ────────────────────────────────────────────────
|
||||
|
||||
def map_path(immich_path):
|
||||
"""Convert Immich path to local path. Returns None for /mnt/media/md/ paths."""
|
||||
for immich_prefix, local_prefix in PATH_MAP.items():
|
||||
if immich_path.startswith(immich_prefix):
|
||||
return local_prefix + immich_path[len(immich_prefix):]
|
||||
return None # md/ or unknown prefix — skip
|
||||
|
||||
|
||||
def infer_platform(immich_path):
|
||||
"""Infer platform from Immich path based on subdirectory."""
|
||||
if immich_path.startswith("/mnt/media/evalongoria/"):
|
||||
remainder = immich_path[len("/mnt/media/evalongoria/"):]
|
||||
# Check if first component is a known subdirectory
|
||||
first_component = remainder.split("/")[0] if "/" in remainder else None
|
||||
if first_component and first_component in EVALONGORIA_PLATFORM_MAP:
|
||||
return EVALONGORIA_PLATFORM_MAP[first_component]
|
||||
return "unknown"
|
||||
elif immich_path.startswith("/mnt/media/elvideo/"):
|
||||
remainder = immich_path[len("/mnt/media/elvideo/"):]
|
||||
first_component = remainder.split("/")[0] if "/" in remainder else None
|
||||
if first_component and first_component in ELVIDEO_PLATFORM_MAP:
|
||||
return ELVIDEO_PLATFORM_MAP[first_component]
|
||||
return "unknown"
|
||||
return "unknown"
|
||||
|
||||
|
||||
def infer_content_type(asset_type):
|
||||
"""Map Immich asset type to content_type."""
|
||||
if asset_type == "IMAGE":
|
||||
return "image"
|
||||
elif asset_type == "VIDEO":
|
||||
return "video"
|
||||
return "unknown"
|
||||
|
||||
|
||||
# ── Main migration ─────────────────────────────────────────────────────────
|
||||
|
||||
def migrate_assets(app_conn):
|
||||
"""Fetch assets from Immich and insert into file_inventory."""
|
||||
print("=" * 60)
|
||||
print("Phase 1: Migrating Immich assets → file_inventory")
|
||||
print("=" * 60)
|
||||
|
||||
# Fetch all evalongoria + elvideo assets from Immich
|
||||
sql = """
|
||||
SELECT
|
||||
a.id::text,
|
||||
a."originalPath",
|
||||
a."originalFileName",
|
||||
a.type,
|
||||
a."fileCreatedAt"::text,
|
||||
a."deletedAt"::text,
|
||||
a.width::text,
|
||||
a.height::text,
|
||||
encode(a.checksum, 'hex') as file_hash,
|
||||
COALESCE(e."fileSizeInByte"::text, '') as file_size
|
||||
FROM asset a
|
||||
LEFT JOIN asset_exif e ON a.id = e."assetId"
|
||||
WHERE (a."originalPath" LIKE '/mnt/media/evalongoria/%'
|
||||
OR a."originalPath" LIKE '/mnt/media/elvideo/%')
|
||||
ORDER BY a."fileCreatedAt"
|
||||
"""
|
||||
|
||||
print("Fetching assets from Immich...")
|
||||
columns = [
|
||||
"id", "originalPath", "originalFileName", "type",
|
||||
"fileCreatedAt", "deletedAt", "width", "height",
|
||||
"file_hash", "file_size",
|
||||
]
|
||||
rows = immich_query_rows(sql, columns)
|
||||
total = len(rows)
|
||||
print(f" Found {total:,} assets to process")
|
||||
|
||||
# Prepare and batch-insert
|
||||
inserted = 0
|
||||
skipped = 0
|
||||
batch = []
|
||||
|
||||
cur = app_conn.cursor()
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO file_inventory
|
||||
(file_path, filename, platform, source, content_type,
|
||||
file_size, file_hash, width, height, location, created_date)
|
||||
VALUES %s
|
||||
ON CONFLICT (file_path) DO NOTHING
|
||||
"""
|
||||
|
||||
for i, row in enumerate(rows):
|
||||
local_path = map_path(row["originalPath"])
|
||||
if local_path is None:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
platform = infer_platform(row["originalPath"])
|
||||
content_type = infer_content_type(row["type"])
|
||||
location = "recycle" if row["deletedAt"] else "final"
|
||||
|
||||
width = int(row["width"]) if row["width"] else None
|
||||
height = int(row["height"]) if row["height"] else None
|
||||
file_size = int(row["file_size"]) if row["file_size"] else None
|
||||
|
||||
# Parse timestamp — strip timezone info for timestamp without time zone column
|
||||
created_date = row["fileCreatedAt"]
|
||||
if created_date:
|
||||
# Remove timezone suffix like +00 or +00:00 for naive timestamp
|
||||
created_date = created_date.replace("+00:00", "").replace("+00", "").strip()
|
||||
|
||||
batch.append((
|
||||
local_path,
|
||||
row["originalFileName"],
|
||||
platform,
|
||||
"evalongoria",
|
||||
content_type,
|
||||
file_size,
|
||||
row["file_hash"],
|
||||
width,
|
||||
height,
|
||||
location,
|
||||
created_date if created_date else None,
|
||||
))
|
||||
|
||||
if len(batch) >= BATCH_SIZE:
|
||||
psycopg2.extras.execute_values(
|
||||
cur, insert_sql, batch,
|
||||
template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
|
||||
)
|
||||
inserted += cur.rowcount
|
||||
app_conn.commit()
|
||||
processed = i + 1
|
||||
print(f" Progress: {processed:,}/{total:,} processed, {inserted:,} inserted")
|
||||
batch = []
|
||||
|
||||
# Final batch
|
||||
if batch:
|
||||
psycopg2.extras.execute_values(
|
||||
cur, insert_sql, batch,
|
||||
template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
|
||||
)
|
||||
inserted += cur.rowcount
|
||||
app_conn.commit()
|
||||
|
||||
cur.close()
|
||||
print(f"\n DONE: {inserted:,} rows inserted, {skipped:,} skipped (md/ paths)")
|
||||
return inserted
|
||||
|
||||
|
||||
def migrate_face_detections(app_conn):
|
||||
"""Migrate Eva Longoria face detections from Immich → face_recognition_scans."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Phase 2: Migrating face detections → face_recognition_scans")
|
||||
print("=" * 60)
|
||||
|
||||
# First, check if we already ran this migration
|
||||
cur = app_conn.cursor()
|
||||
cur.execute("SELECT COUNT(*) FROM face_recognition_scans WHERE scan_type = 'immich_import'")
|
||||
existing = cur.fetchone()[0]
|
||||
if existing > 0:
|
||||
print(f" Found {existing:,} existing immich_import scans — skipping face migration")
|
||||
print(" (Delete existing immich_import scans first if you want to re-run)")
|
||||
cur.close()
|
||||
return 0
|
||||
|
||||
# Get distinct assets with Eva Longoria face + face count + path in one query
|
||||
print("Fetching face detection data with paths from Immich...")
|
||||
sql = f"""
|
||||
SELECT
|
||||
a."originalPath",
|
||||
COUNT(*) as eva_faces
|
||||
FROM asset_face af
|
||||
JOIN asset a ON af."assetId" = a.id
|
||||
WHERE af."personId" = '{EVA_PERSON_UUID}'
|
||||
AND af."deletedAt" IS NULL
|
||||
AND (a."originalPath" LIKE '/mnt/media/evalongoria/%'
|
||||
OR a."originalPath" LIKE '/mnt/media/elvideo/%')
|
||||
GROUP BY a."originalPath"
|
||||
"""
|
||||
columns = ["originalPath", "face_count"]
|
||||
face_rows = immich_query_rows(sql, columns)
|
||||
print(f" Found {len(face_rows):,} assets with Eva Longoria face detections")
|
||||
|
||||
# Build file_path lookup from file_inventory (for /opt/immich/el/ and /opt/immich/elv/ paths)
|
||||
print("Building file_inventory lookup...")
|
||||
cur.execute("""
|
||||
SELECT file_path FROM file_inventory
|
||||
WHERE file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%'
|
||||
""")
|
||||
inventory_paths = set(row[0] for row in cur.fetchall())
|
||||
print(f" {len(inventory_paths):,} paths in file_inventory for el/elv")
|
||||
|
||||
# Prepare face scan inserts
|
||||
insert_sql = """
|
||||
INSERT INTO face_recognition_scans
|
||||
(file_path, has_match, matched_person, confidence, face_count, scan_type)
|
||||
VALUES %s
|
||||
"""
|
||||
|
||||
batch = []
|
||||
inserted = 0
|
||||
skipped_not_in_inventory = 0
|
||||
total = len(face_rows)
|
||||
|
||||
for i, row in enumerate(face_rows):
|
||||
local_path = map_path(row["originalPath"])
|
||||
if local_path is None:
|
||||
continue
|
||||
|
||||
if local_path not in inventory_paths:
|
||||
skipped_not_in_inventory += 1
|
||||
continue
|
||||
|
||||
face_count = int(row["face_count"])
|
||||
|
||||
batch.append((
|
||||
local_path,
|
||||
True,
|
||||
"Eva Longoria",
|
||||
1.0,
|
||||
face_count,
|
||||
"immich_import",
|
||||
))
|
||||
|
||||
if len(batch) >= BATCH_SIZE:
|
||||
psycopg2.extras.execute_values(
|
||||
cur, insert_sql, batch,
|
||||
template="(%s, %s, %s, %s, %s, %s)",
|
||||
)
|
||||
inserted += cur.rowcount
|
||||
app_conn.commit()
|
||||
print(f" Progress: {i + 1:,}/{total:,} processed, {inserted:,} inserted")
|
||||
batch = []
|
||||
|
||||
if batch:
|
||||
psycopg2.extras.execute_values(
|
||||
cur, insert_sql, batch,
|
||||
template="(%s, %s, %s, %s, %s, %s)",
|
||||
)
|
||||
inserted += cur.rowcount
|
||||
app_conn.commit()
|
||||
|
||||
cur.close()
|
||||
print(f"\n DONE: {inserted:,} face scans inserted")
|
||||
print(f" Skipped: {skipped_not_in_inventory:,} (not in file_inventory)")
|
||||
return inserted
|
||||
|
||||
|
||||
def verify(app_conn):
|
||||
"""Print verification counts."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Verification")
|
||||
print("=" * 60)
|
||||
|
||||
cur = app_conn.cursor()
|
||||
|
||||
# file_inventory counts
|
||||
cur.execute("SELECT COUNT(*) FROM file_inventory WHERE file_path LIKE '/opt/immich/el/%'")
|
||||
el_count = cur.fetchone()[0]
|
||||
|
||||
cur.execute("SELECT COUNT(*) FROM file_inventory WHERE file_path LIKE '/opt/immich/elv/%'")
|
||||
elv_count = cur.fetchone()[0]
|
||||
|
||||
cur.execute("""
|
||||
SELECT location, COUNT(*)
|
||||
FROM file_inventory
|
||||
WHERE file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%'
|
||||
GROUP BY location
|
||||
""")
|
||||
location_counts = dict(cur.fetchall())
|
||||
|
||||
cur.execute("""
|
||||
SELECT platform, COUNT(*)
|
||||
FROM file_inventory
|
||||
WHERE file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%'
|
||||
GROUP BY platform
|
||||
ORDER BY 2 DESC
|
||||
""")
|
||||
platform_counts = cur.fetchall()
|
||||
|
||||
# face_recognition_scans counts
|
||||
cur.execute("SELECT COUNT(*) FROM face_recognition_scans WHERE scan_type = 'immich_import'")
|
||||
face_count = cur.fetchone()[0]
|
||||
|
||||
cur.execute("SELECT COUNT(*) FROM face_recognition_scans")
|
||||
total_face_scans = cur.fetchone()[0]
|
||||
|
||||
# Total file_inventory
|
||||
cur.execute("SELECT COUNT(*) FROM file_inventory")
|
||||
total_inventory = cur.fetchone()[0]
|
||||
|
||||
cur.close()
|
||||
|
||||
print(f"\n file_inventory:")
|
||||
print(f" /opt/immich/el/* (evalongoria): {el_count:,}")
|
||||
print(f" /opt/immich/elv/* (elvideo): {elv_count:,}")
|
||||
print(f" Total new: {el_count + elv_count:,}")
|
||||
print(f" By location: {dict(location_counts)}")
|
||||
print(f" By platform:")
|
||||
for platform, count in platform_counts:
|
||||
print(f" {platform:12s}: {count:,}")
|
||||
|
||||
print(f"\n face_recognition_scans:")
|
||||
print(f" immich_import: {face_count:,}")
|
||||
print(f" Total scans: {total_face_scans:,}")
|
||||
|
||||
print(f"\n Total file_inventory rows: {total_inventory:,}")
|
||||
|
||||
|
||||
def main():
|
||||
start = time.time()
|
||||
print("Immich → file_inventory migration")
|
||||
print("=" * 60)
|
||||
|
||||
# Test Immich connection
|
||||
print("Testing Immich database connection...")
|
||||
test = immich_query("SELECT COUNT(*) FROM asset")
|
||||
print(f" Immich has {int(test):,} assets")
|
||||
|
||||
# Connect to app database
|
||||
print("Connecting to app database...")
|
||||
app_conn = psycopg2.connect(APP_DB_DSN)
|
||||
|
||||
try:
|
||||
assets_inserted = migrate_assets(app_conn)
|
||||
faces_inserted = migrate_face_detections(app_conn)
|
||||
verify(app_conn)
|
||||
finally:
|
||||
app_conn.close()
|
||||
|
||||
elapsed = time.time() - start
|
||||
print(f"\nCompleted in {elapsed:.1f}s")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
127
scripts/move_immich_deleted_to_recycle.py
Normal file
127
scripts/move_immich_deleted_to_recycle.py
Normal file
@@ -0,0 +1,127 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Move Immich soft-deleted files from /opt/immich/el/ and /opt/immich/elv/
|
||||
into the actual recycle bin (/opt/immich/recycle/) with proper DB entries.
|
||||
|
||||
For each file with location='recycle' in file_inventory:
|
||||
1. Move the file to /opt/immich/recycle/<uuid>.<ext>
|
||||
2. Insert a row into recycle_bin
|
||||
3. Delete from file_inventory
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
import psycopg2
|
||||
|
||||
APP_DB_DSN = "postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader"
|
||||
RECYCLE_DIR = Path("/opt/immich/recycle")
|
||||
BATCH_SIZE = 500
|
||||
|
||||
|
||||
def main():
|
||||
start = time.time()
|
||||
print("Moving Immich soft-deleted files to recycle bin")
|
||||
print("=" * 60)
|
||||
|
||||
conn = psycopg2.connect(APP_DB_DSN)
|
||||
cur = conn.cursor()
|
||||
|
||||
# Get all recycled entries from el/elv
|
||||
cur.execute("""
|
||||
SELECT id, file_path, filename, file_size, file_hash, created_date
|
||||
FROM file_inventory
|
||||
WHERE (file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%')
|
||||
AND location = 'recycle'
|
||||
ORDER BY id
|
||||
""")
|
||||
rows = cur.fetchall()
|
||||
total = len(rows)
|
||||
print(f" Found {total:,} recycled entries to move")
|
||||
|
||||
if total == 0:
|
||||
print(" Nothing to do.")
|
||||
conn.close()
|
||||
return
|
||||
|
||||
RECYCLE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
moved = 0
|
||||
missing = 0
|
||||
errors = 0
|
||||
|
||||
for i, (inv_id, file_path, filename, file_size, file_hash, created_date) in enumerate(rows):
|
||||
src = Path(file_path)
|
||||
|
||||
if not src.exists():
|
||||
# File doesn't exist on disk — just remove from file_inventory
|
||||
cur.execute("DELETE FROM file_inventory WHERE id = %s", (inv_id,))
|
||||
missing += 1
|
||||
if missing <= 5:
|
||||
print(f" MISSING (removed from DB): {file_path}")
|
||||
continue
|
||||
|
||||
# Generate recycle path
|
||||
ext = src.suffix or ""
|
||||
recycle_id = str(uuid.uuid4())
|
||||
recycle_path = RECYCLE_DIR / f"{recycle_id}{ext}"
|
||||
|
||||
try:
|
||||
# Get file mtime before moving
|
||||
mtime = src.stat().st_mtime
|
||||
actual_size = src.stat().st_size
|
||||
|
||||
# Move the file
|
||||
shutil.move(str(src), str(recycle_path))
|
||||
|
||||
# Insert into recycle_bin
|
||||
cur.execute("""
|
||||
INSERT INTO recycle_bin
|
||||
(id, original_path, original_filename, recycle_path,
|
||||
file_extension, file_size, original_mtime,
|
||||
deleted_from, deleted_by, file_hash)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
""", (
|
||||
recycle_id,
|
||||
file_path,
|
||||
filename,
|
||||
str(recycle_path),
|
||||
ext.lstrip(".") if ext else None,
|
||||
actual_size or file_size,
|
||||
mtime,
|
||||
"immich_deleted",
|
||||
"immich_migration",
|
||||
file_hash,
|
||||
))
|
||||
|
||||
# Delete from file_inventory
|
||||
cur.execute("DELETE FROM file_inventory WHERE id = %s", (inv_id,))
|
||||
|
||||
moved += 1
|
||||
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
if errors <= 5:
|
||||
print(f" ERROR moving {file_path}: {e}")
|
||||
|
||||
if (i + 1) % BATCH_SIZE == 0:
|
||||
conn.commit()
|
||||
print(f" Progress: {i + 1:,}/{total:,} — moved: {moved:,}, missing: {missing:,}, errors: {errors:,}")
|
||||
|
||||
conn.commit()
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
elapsed = time.time() - start
|
||||
print(f"\n DONE in {elapsed:.1f}s:")
|
||||
print(f" Moved to recycle: {moved:,}")
|
||||
print(f" Missing on disk: {missing:,}")
|
||||
print(f" Errors: {errors:,}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
136
scripts/paid-content-health-check.py
Executable file
136
scripts/paid-content-health-check.py
Executable file
@@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Paid Content Service Health Check
|
||||
|
||||
Standalone script to check the health of all paid content services.
|
||||
Designed to be run via systemd timer every 4 hours.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from modules.paid_content import (
|
||||
PaidContentDBAdapter,
|
||||
PaidContentAPIClient,
|
||||
FanslyDirectClient,
|
||||
YouTubeClient,
|
||||
TwitchClient
|
||||
)
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
|
||||
|
||||
def log(message: str, level: str = "info"):
|
||||
"""Simple logging to stdout"""
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
print(f"[{timestamp}] [{level.upper()}] {message}")
|
||||
|
||||
|
||||
async def check_service_health(service: dict, pc_db: PaidContentDBAdapter) -> dict:
|
||||
"""Check health of a single service"""
|
||||
service_id = service['id']
|
||||
health = {'status': 'unknown', 'message': ''}
|
||||
|
||||
try:
|
||||
if service_id == 'youtube':
|
||||
youtube = YouTubeClient()
|
||||
if youtube.is_available():
|
||||
health = {'status': 'healthy', 'message': 'yt-dlp is available'}
|
||||
else:
|
||||
health = {'status': 'down', 'message': 'yt-dlp not found'}
|
||||
|
||||
elif service_id == 'twitch':
|
||||
twitch = TwitchClient()
|
||||
if twitch.is_available():
|
||||
health = {'status': 'healthy', 'message': 'yt-dlp is available for Twitch'}
|
||||
else:
|
||||
health = {'status': 'down', 'message': 'yt-dlp not found'}
|
||||
|
||||
elif service_id == 'fansly_direct':
|
||||
auth_token = service.get('session_cookie')
|
||||
if not auth_token:
|
||||
health = {'status': 'down', 'message': 'Auth token not configured'}
|
||||
else:
|
||||
client = FanslyDirectClient(auth_token=auth_token)
|
||||
try:
|
||||
result = await client.check_auth()
|
||||
if result.get('valid'):
|
||||
health = {
|
||||
'status': 'healthy',
|
||||
'message': f"Connected as {result.get('username', 'unknown')}"
|
||||
}
|
||||
else:
|
||||
health = {'status': 'down', 'message': result.get('error', 'Auth failed')}
|
||||
finally:
|
||||
await client.close()
|
||||
|
||||
else:
|
||||
# Coomer/Kemono services
|
||||
client = PaidContentAPIClient(
|
||||
service_id,
|
||||
session_cookie=service.get('session_cookie'),
|
||||
base_url=service.get('base_url')
|
||||
)
|
||||
try:
|
||||
health = await client.check_health()
|
||||
finally:
|
||||
await client.close()
|
||||
|
||||
# Update database
|
||||
pc_db.update_service(service_id, {
|
||||
'health_status': health.get('status', 'unknown'),
|
||||
'last_health_check': datetime.now().isoformat()
|
||||
})
|
||||
|
||||
return {'service_id': service_id, **health}
|
||||
|
||||
except Exception as e:
|
||||
log(f"Health check failed for {service_id}: {e}", "error")
|
||||
return {'service_id': service_id, 'status': 'error', 'message': str(e)}
|
||||
|
||||
|
||||
async def main():
|
||||
"""Main health check routine"""
|
||||
log("Starting paid content service health check")
|
||||
|
||||
try:
|
||||
# Initialize database
|
||||
db = UnifiedDatabase()
|
||||
pc_db = PaidContentDBAdapter(db)
|
||||
|
||||
# Get all services
|
||||
services = pc_db.get_services()
|
||||
|
||||
if not services:
|
||||
log("No services configured", "warning")
|
||||
return 0
|
||||
|
||||
log(f"Checking {len(services)} services...")
|
||||
|
||||
# Check each service
|
||||
results = []
|
||||
for service in services:
|
||||
result = await check_service_health(service, pc_db)
|
||||
results.append(result)
|
||||
status_icon = "✓" if result['status'] == 'healthy' else "✗"
|
||||
log(f" {status_icon} {result['service_id']}: {result['status']} - {result.get('message', '')}")
|
||||
|
||||
# Summary
|
||||
healthy = sum(1 for r in results if r['status'] == 'healthy')
|
||||
total = len(results)
|
||||
log(f"Health check complete: {healthy}/{total} services healthy")
|
||||
|
||||
return 0 if healthy == total else 1
|
||||
|
||||
except Exception as e:
|
||||
log(f"Health check failed: {e}", "error")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = asyncio.run(main())
|
||||
sys.exit(exit_code)
|
||||
8
scripts/plex-match.sh
Executable file
8
scripts/plex-match.sh
Executable file
@@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
# Trigger Plex matching for appearances
|
||||
# Runs via cron to populate plex_rating_key and plex_show_rating_key
|
||||
|
||||
/opt/media-downloader/scripts/get-api-token.sh >/dev/null 2>&1
|
||||
/opt/media-downloader/scripts/api-call.sh "/api/appearances/plex/match" -X POST -H "Content-Type: application/json" >/dev/null 2>&1
|
||||
|
||||
echo "$(date): Plex matching triggered" >> /var/log/media-downloader/plex-match.log
|
||||
199
scripts/pregerate_thumbnails.py
Normal file
199
scripts/pregerate_thumbnails.py
Normal file
@@ -0,0 +1,199 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Bulk pre-generate thumbnails for file_inventory entries that don't have cached thumbnails.
|
||||
|
||||
Targets /opt/immich/el/ and /opt/immich/elv/ paths (Immich migration).
|
||||
Uses multiprocessing to generate thumbnails in parallel.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import io
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from pathlib import Path
|
||||
|
||||
from PIL import Image
|
||||
|
||||
THUMB_DB = "/opt/media-downloader/database/thumbnails.db"
|
||||
APP_DB_DSN = "postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader"
|
||||
MAX_SIZE = (300, 300)
|
||||
WORKERS = 6
|
||||
BATCH_SIZE = 200
|
||||
|
||||
|
||||
def generate_image_thumbnail(file_path, max_size=(300, 300)):
|
||||
try:
|
||||
img = Image.open(file_path)
|
||||
img.thumbnail(max_size, Image.Resampling.LANCZOS)
|
||||
if img.mode in ('RGBA', 'LA', 'P'):
|
||||
background = Image.new('RGB', img.size, (255, 255, 255))
|
||||
if img.mode == 'P':
|
||||
img = img.convert('RGBA')
|
||||
background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
|
||||
img = background
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format='JPEG', quality=85)
|
||||
return buffer.getvalue()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def generate_video_thumbnail(file_path, max_size=(300, 300)):
|
||||
for seek_time in ['00:00:01.000', '00:00:00.000']:
|
||||
try:
|
||||
result = subprocess.run([
|
||||
'ffmpeg', '-ss', seek_time,
|
||||
'-i', str(file_path),
|
||||
'-vframes', '1',
|
||||
'-f', 'image2pipe',
|
||||
'-vcodec', 'mjpeg', '-'
|
||||
], capture_output=True, timeout=30)
|
||||
if result.returncode != 0 or not result.stdout:
|
||||
continue
|
||||
img = Image.open(io.BytesIO(result.stdout))
|
||||
img.thumbnail(max_size, Image.Resampling.LANCZOS)
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format='JPEG', quality=85)
|
||||
return buffer.getvalue()
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def process_file(args):
|
||||
"""Generate thumbnail for a single file. Runs in worker process."""
|
||||
file_path, content_type, file_hash = args
|
||||
p = Path(file_path)
|
||||
if not p.exists():
|
||||
return (file_hash, file_path, None, 0, 'missing')
|
||||
|
||||
try:
|
||||
mtime = p.stat().st_mtime
|
||||
except OSError:
|
||||
mtime = 0
|
||||
|
||||
if content_type == 'video':
|
||||
data = generate_video_thumbnail(p, MAX_SIZE)
|
||||
else:
|
||||
data = generate_image_thumbnail(p, MAX_SIZE)
|
||||
|
||||
if data:
|
||||
return (file_hash, file_path, data, mtime, 'ok')
|
||||
return (file_hash, file_path, None, mtime, 'failed')
|
||||
|
||||
|
||||
def get_files_needing_thumbnails():
|
||||
"""Query file_inventory for el/elv files, check which lack thumbnails."""
|
||||
import psycopg2
|
||||
|
||||
conn = psycopg2.connect(APP_DB_DSN)
|
||||
cur = conn.cursor()
|
||||
cur.execute("""
|
||||
SELECT file_path, content_type, file_hash
|
||||
FROM file_inventory
|
||||
WHERE (file_path LIKE '/opt/immich/el/%%' OR file_path LIKE '/opt/immich/elv/%%')
|
||||
AND location = 'final'
|
||||
ORDER BY id
|
||||
""")
|
||||
all_files = cur.fetchall()
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
# Check which already have thumbnails
|
||||
thumb_conn = sqlite3.connect(THUMB_DB, timeout=30)
|
||||
thumb_cur = thumb_conn.cursor()
|
||||
|
||||
# Get all existing thumbnail hashes in one query
|
||||
thumb_cur.execute("SELECT file_hash FROM thumbnails")
|
||||
existing_hashes = set(row[0] for row in thumb_cur.fetchall())
|
||||
thumb_conn.close()
|
||||
|
||||
needed = []
|
||||
for file_path, content_type, file_hash in all_files:
|
||||
# Use content hash if available, else path hash
|
||||
cache_key = file_hash if file_hash else hashlib.sha256(file_path.encode()).hexdigest()
|
||||
if cache_key not in existing_hashes:
|
||||
needed.append((file_path, content_type, cache_key))
|
||||
|
||||
return needed
|
||||
|
||||
|
||||
def save_batch(results):
|
||||
"""Save a batch of thumbnails to the DB."""
|
||||
from datetime import datetime, timezone
|
||||
now = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S')
|
||||
|
||||
conn = sqlite3.connect(THUMB_DB, timeout=60)
|
||||
saved = 0
|
||||
for file_hash, file_path, data, mtime, status in results:
|
||||
if data:
|
||||
conn.execute("""
|
||||
INSERT OR REPLACE INTO thumbnails
|
||||
(file_hash, file_path, thumbnail_data, created_at, file_mtime)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""", (file_hash, file_path, data, now, mtime))
|
||||
saved += 1
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return saved
|
||||
|
||||
|
||||
def main():
|
||||
start = time.time()
|
||||
print("Bulk thumbnail pre-generation")
|
||||
print("=" * 60)
|
||||
|
||||
print("Finding files needing thumbnails...")
|
||||
needed = get_files_needing_thumbnails()
|
||||
total = len(needed)
|
||||
print(f" {total:,} files need thumbnails")
|
||||
|
||||
if total == 0:
|
||||
print(" Nothing to do!")
|
||||
return
|
||||
|
||||
generated = 0
|
||||
failed = 0
|
||||
missing = 0
|
||||
batch_results = []
|
||||
|
||||
with ProcessPoolExecutor(max_workers=WORKERS) as executor:
|
||||
futures = {executor.submit(process_file, item): item for item in needed}
|
||||
|
||||
for i, future in enumerate(as_completed(futures), 1):
|
||||
result = future.result()
|
||||
batch_results.append(result)
|
||||
|
||||
status = result[4]
|
||||
if status == 'ok':
|
||||
generated += 1
|
||||
elif status == 'missing':
|
||||
missing += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
if len(batch_results) >= BATCH_SIZE:
|
||||
save_batch(batch_results)
|
||||
batch_results = []
|
||||
elapsed = time.time() - start
|
||||
rate = i / elapsed if elapsed > 0 else 0
|
||||
eta = (total - i) / rate if rate > 0 else 0
|
||||
print(f" {i:,}/{total:,} ({generated:,} ok, {failed:,} failed, {missing:,} missing) "
|
||||
f"[{rate:.0f}/s, ETA {eta:.0f}s]")
|
||||
|
||||
# Final batch
|
||||
if batch_results:
|
||||
save_batch(batch_results)
|
||||
|
||||
elapsed = time.time() - start
|
||||
print(f"\nDone in {elapsed:.1f}s:")
|
||||
print(f" Generated: {generated:,}")
|
||||
print(f" Failed: {failed:,}")
|
||||
print(f" Missing: {missing:,}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
178
scripts/profile_scheduler_full.py
Normal file
178
scripts/profile_scheduler_full.py
Normal file
@@ -0,0 +1,178 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Full scheduler startup profiler - mimics media-downloader.py --scheduler exactly.
|
||||
Adds memory logging at every stage and a background thread that monitors RSS every 2 seconds.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import gc
|
||||
import threading
|
||||
import time
|
||||
|
||||
# Set up environment exactly like the systemd service
|
||||
os.environ['PYTHONUNBUFFERED'] = '1'
|
||||
os.environ['PYTHONDONTWRITEBYTECODE'] = '1'
|
||||
os.environ['DATABASE_BACKEND'] = 'postgresql'
|
||||
os.environ['DATABASE_URL'] = 'postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader'
|
||||
os.environ['HOME'] = '/root'
|
||||
os.environ['PLAYWRIGHT_BROWSERS_PATH'] = '/root/.cache/ms-playwright'
|
||||
os.environ.setdefault('DISPLAY', ':100')
|
||||
|
||||
os.chdir('/opt/media-downloader')
|
||||
sys.path.insert(0, '/opt/media-downloader')
|
||||
|
||||
def get_rss_mb():
|
||||
"""Get current RSS in MB from /proc/self/status"""
|
||||
try:
|
||||
with open('/proc/self/status') as f:
|
||||
for line in f:
|
||||
if line.startswith('VmRSS:'):
|
||||
return int(line.split()[1]) / 1024
|
||||
except:
|
||||
pass
|
||||
return 0
|
||||
|
||||
def get_child_rss_mb():
|
||||
"""Get total RSS of child processes"""
|
||||
import subprocess
|
||||
try:
|
||||
pid = os.getpid()
|
||||
result = subprocess.run(
|
||||
['ps', '--ppid', str(pid), '-o', 'rss='],
|
||||
capture_output=True, text=True, timeout=5
|
||||
)
|
||||
total = 0
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line:
|
||||
total += int(line)
|
||||
return total / 1024 # kB to MB
|
||||
except:
|
||||
return 0
|
||||
|
||||
# Memory monitoring thread
|
||||
stop_monitor = False
|
||||
peak_rss = 0
|
||||
|
||||
def memory_monitor():
|
||||
global peak_rss
|
||||
while not stop_monitor:
|
||||
rss = get_rss_mb()
|
||||
child_rss = get_child_rss_mb()
|
||||
total = rss + child_rss
|
||||
if rss > peak_rss:
|
||||
peak_rss = rss
|
||||
# Only print on significant changes or every 10s
|
||||
sys.stderr.write(f"[MEMORY] RSS={rss:.0f}MB Children={child_rss:.0f}MB Total={total:.0f}MB Peak={peak_rss:.0f}MB\n")
|
||||
sys.stderr.flush()
|
||||
time.sleep(2)
|
||||
|
||||
# Start memory monitoring
|
||||
monitor_thread = threading.Thread(target=memory_monitor, daemon=True)
|
||||
monitor_thread.start()
|
||||
|
||||
sys.stderr.write(f"[STAGE] Baseline: {get_rss_mb():.0f}MB\n")
|
||||
|
||||
# Now do EXACTLY what media-downloader.py does
|
||||
# --- Module-level code from media-downloader.py ---
|
||||
|
||||
try:
|
||||
import nest_asyncio
|
||||
nest_asyncio.apply()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore", message=".*pkg_resources is deprecated.*")
|
||||
|
||||
import modules.db_bootstrap
|
||||
|
||||
import json, sqlite3, logging, argparse, subprocess, random
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, Any, Set, Tuple
|
||||
import requests
|
||||
from dataclasses import dataclass
|
||||
|
||||
sqlite3.register_adapter(datetime, lambda d: d.isoformat())
|
||||
sqlite3.register_converter("datetime", lambda s: datetime.fromisoformat(s.decode()))
|
||||
|
||||
sys.path.insert(0, str(Path('/opt/media-downloader')))
|
||||
sys.path.insert(0, str(Path('/opt/media-downloader') / 'modules'))
|
||||
|
||||
try:
|
||||
from modules.instaloader_module import InstaLoaderModule as InstaLoaderDownloader
|
||||
from modules.fastdl_module import FastDLDownloader
|
||||
from modules.imginn_module import ImgInnDownloader
|
||||
from modules.imginn_api_module import ImgInnAPIDownloader
|
||||
from modules.instagram_client_module import InstagramClientDownloader
|
||||
from modules.toolzu_module import ToolzuDownloader
|
||||
from modules.snapchat_scraper import SnapchatDirectScraper
|
||||
from modules.snapchat_client_module import SnapchatClientDownloader
|
||||
from modules.tiktok_module import TikTokDownloader
|
||||
from modules.forum_downloader import ForumDownloader
|
||||
from modules.coppermine_module import CoppermineDownloader
|
||||
from modules.download_manager import DownloadManager, DownloadItem
|
||||
from modules.settings_manager import SettingsManager
|
||||
from modules.date_utils import DateHandler, extract_date, update_timestamps
|
||||
from modules.move_module import MoveManager
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
from modules.universal_logger import get_logger
|
||||
from modules.forum_db_adapter import ForumDatabaseAdapter
|
||||
from modules.pushover_notifier import PushoverNotifier, create_notifier_from_config
|
||||
from modules.service_health_monitor import ServiceHealthMonitor
|
||||
from modules.dependency_updater import DependencyUpdater
|
||||
from modules.downloader_monitor import get_monitor
|
||||
from modules.activity_status import get_activity_manager
|
||||
except ImportError as e:
|
||||
print(f"Error importing modules: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
sys.stderr.write(f"[STAGE] All imports done: {get_rss_mb():.0f}MB\n")
|
||||
|
||||
# --- Scheduler section (what main() does with --scheduler) ---
|
||||
from modules.scheduler import DownloadScheduler
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
import signal
|
||||
|
||||
sys.stderr.write(f"[STAGE] Scheduler imported: {get_rss_mb():.0f}MB\n")
|
||||
|
||||
# Create unified database
|
||||
unified_db = UnifiedDatabase('database/media_downloader.db', use_pool=True, pool_size=5)
|
||||
|
||||
sys.stderr.write(f"[STAGE] UnifiedDatabase created: {get_rss_mb():.0f}MB\n")
|
||||
|
||||
# Create SettingsManager
|
||||
sm = SettingsManager('database/media_downloader.db')
|
||||
|
||||
# Create scheduler - pass settings_manager like main() does
|
||||
scheduler = DownloadScheduler(unified_db=unified_db, settings_manager=sm)
|
||||
|
||||
sys.stderr.write(f"[STAGE] DownloadScheduler created: {get_rss_mb():.0f}MB\n")
|
||||
|
||||
# Set up graceful shutdown
|
||||
shutdown_requested = False
|
||||
def graceful_shutdown(signum, frame):
|
||||
global shutdown_requested, stop_monitor
|
||||
if shutdown_requested:
|
||||
return
|
||||
shutdown_requested = True
|
||||
stop_monitor = True
|
||||
sys.stderr.write(f"\n[SHUTDOWN] Signal received, stopping...\n")
|
||||
sys.stderr.write(f"[SHUTDOWN] Final RSS: {get_rss_mb():.0f}MB, Peak: {peak_rss:.0f}MB\n")
|
||||
scheduler.stop()
|
||||
dl = getattr(scheduler, 'downloader', None)
|
||||
if dl:
|
||||
dl.cleanup_all_temp_dirs()
|
||||
unified_db.close()
|
||||
sys.exit(0)
|
||||
|
||||
signal.signal(signal.SIGTERM, graceful_shutdown)
|
||||
signal.signal(signal.SIGINT, graceful_shutdown)
|
||||
|
||||
sys.stderr.write(f"[STAGE] About to call scheduler.start() - this will exec_module, create MediaDownloader, then enter main loop\n")
|
||||
sys.stderr.write(f"[STAGE] Pre-start RSS: {get_rss_mb():.0f}MB\n")
|
||||
sys.stderr.flush()
|
||||
|
||||
# Start scheduler (this blocks - enters main loop)
|
||||
scheduler.start()
|
||||
213
scripts/profile_scheduler_memory.py
Normal file
213
scripts/profile_scheduler_memory.py
Normal file
@@ -0,0 +1,213 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Profile memory usage at each stage of scheduler startup to find the 8GB culprit."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import gc
|
||||
|
||||
sys.path.insert(0, '/opt/media-downloader')
|
||||
os.chdir('/opt/media-downloader')
|
||||
|
||||
# Set environment like the service does
|
||||
os.environ['DATABASE_BACKEND'] = 'postgresql'
|
||||
os.environ['DATABASE_URL'] = 'postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader'
|
||||
os.environ['PYTHONUNBUFFERED'] = '1'
|
||||
|
||||
def get_rss_mb():
|
||||
"""Get current RSS in MB from /proc/self/status"""
|
||||
with open('/proc/self/status') as f:
|
||||
for line in f:
|
||||
if line.startswith('VmRSS:'):
|
||||
return int(line.split()[1]) / 1024 # kB to MB
|
||||
return 0
|
||||
|
||||
def log_mem(label):
|
||||
gc.collect()
|
||||
rss = get_rss_mb()
|
||||
print(f"[{rss:7.1f} MB] {label}")
|
||||
return rss
|
||||
|
||||
# Stage 0: Baseline
|
||||
log_mem("BASELINE (python + script)")
|
||||
|
||||
# Stage 1: Basic imports (what media-downloader.py does at top level)
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore", message=".*pkg_resources is deprecated.*")
|
||||
log_mem("After warnings")
|
||||
|
||||
import modules.db_bootstrap
|
||||
log_mem("After db_bootstrap")
|
||||
|
||||
import json, sqlite3, logging, argparse, time, subprocess, random
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, Any, Set, Tuple
|
||||
import requests
|
||||
from dataclasses import dataclass
|
||||
log_mem("After stdlib + requests")
|
||||
|
||||
# Stage 2: Module imports (lines 52-80 of media-downloader.py)
|
||||
from modules.instaloader_module import InstaLoaderModule as InstaLoaderDownloader
|
||||
log_mem("After instaloader_module import")
|
||||
|
||||
from modules.fastdl_module import FastDLDownloader
|
||||
log_mem("After fastdl_module import")
|
||||
|
||||
from modules.imginn_module import ImgInnDownloader
|
||||
log_mem("After imginn_module import")
|
||||
|
||||
from modules.imginn_api_module import ImgInnAPIDownloader
|
||||
log_mem("After imginn_api_module import")
|
||||
|
||||
from modules.instagram_client_module import InstagramClientDownloader
|
||||
log_mem("After instagram_client_module import")
|
||||
|
||||
from modules.toolzu_module import ToolzuDownloader
|
||||
log_mem("After toolzu_module import")
|
||||
|
||||
from modules.snapchat_scraper import SnapchatDirectScraper
|
||||
log_mem("After snapchat_scraper import")
|
||||
|
||||
from modules.snapchat_client_module import SnapchatClientDownloader
|
||||
log_mem("After snapchat_client_module import")
|
||||
|
||||
from modules.tiktok_module import TikTokDownloader
|
||||
log_mem("After tiktok_module import")
|
||||
|
||||
from modules.forum_downloader import ForumDownloader
|
||||
log_mem("After forum_downloader import (has Playwright)")
|
||||
|
||||
from modules.coppermine_module import CoppermineDownloader
|
||||
log_mem("After coppermine_module import")
|
||||
|
||||
from modules.download_manager import DownloadManager, DownloadItem
|
||||
log_mem("After download_manager import")
|
||||
|
||||
from modules.settings_manager import SettingsManager
|
||||
from modules.date_utils import DateHandler, extract_date, update_timestamps
|
||||
from modules.move_module import MoveManager
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
from modules.universal_logger import get_logger
|
||||
from modules.forum_db_adapter import ForumDatabaseAdapter
|
||||
from modules.pushover_notifier import PushoverNotifier, create_notifier_from_config
|
||||
from modules.service_health_monitor import ServiceHealthMonitor
|
||||
from modules.dependency_updater import DependencyUpdater
|
||||
from modules.downloader_monitor import get_monitor
|
||||
from modules.activity_status import get_activity_manager
|
||||
log_mem("After ALL module imports")
|
||||
|
||||
# Stage 3: Import scheduler and its dependencies
|
||||
from modules.scheduler import DownloadScheduler
|
||||
log_mem("After scheduler import (includes monitors)")
|
||||
|
||||
# Stage 4: Create UnifiedDatabase
|
||||
db_path = '/opt/media-downloader/database/media_downloader.db'
|
||||
unified_db = UnifiedDatabase(db_path, use_pool=True, pool_size=5)
|
||||
log_mem("After UnifiedDatabase creation")
|
||||
|
||||
# Stage 5: Create DownloadScheduler
|
||||
from modules.settings_manager import SettingsManager
|
||||
sm = SettingsManager(db_path)
|
||||
scheduler = DownloadScheduler(
|
||||
config_path=None,
|
||||
unified_db=unified_db,
|
||||
settings_manager=sm
|
||||
)
|
||||
log_mem("After DownloadScheduler creation")
|
||||
|
||||
# Stage 6: exec_module to load media-downloader.py (what scheduler.start() does)
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"media_downloader",
|
||||
Path("/opt/media-downloader/media-downloader.py")
|
||||
)
|
||||
media_downloader = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(media_downloader)
|
||||
MediaDownloader = media_downloader.MediaDownloader
|
||||
log_mem("After exec_module (re-loads media-downloader.py)")
|
||||
|
||||
# Stage 7: Create MediaDownloader instance
|
||||
downloader = MediaDownloader(enable_notifications=True, unified_db=unified_db)
|
||||
log_mem("After MediaDownloader creation (lazy modules)")
|
||||
|
||||
# Stage 8: Access one lazy module to see how much it adds
|
||||
print("\n--- Testing individual module instantiation ---")
|
||||
if 'fastdl' in downloader.modules:
|
||||
_ = downloader.modules['fastdl']
|
||||
log_mem("After instantiating FastDL module")
|
||||
downloader.modules.release('fastdl')
|
||||
gc.collect()
|
||||
log_mem("After releasing FastDL module")
|
||||
|
||||
if 'forum' in downloader.modules or 'forums' in downloader.modules:
|
||||
key = 'forums' if 'forums' in downloader.modules else 'forum'
|
||||
_ = downloader.modules[key]
|
||||
log_mem(f"After instantiating {key} module (Playwright-based)")
|
||||
downloader.modules.release(key)
|
||||
gc.collect()
|
||||
log_mem(f"After releasing {key} module")
|
||||
|
||||
# Stage 9: Create the monitors that scheduler creates
|
||||
print("\n--- Testing monitor creation ---")
|
||||
from modules.youtube_channel_monitor import YouTubeChannelMonitor
|
||||
from modules.easynews_monitor import EasynewsMonitor
|
||||
from modules.reddit_community_monitor import RedditCommunityMonitor
|
||||
|
||||
yt = YouTubeChannelMonitor(db_path, get_activity_manager(unified_db))
|
||||
log_mem("After YouTubeChannelMonitor creation")
|
||||
|
||||
en = EasynewsMonitor(db_path, get_activity_manager(unified_db))
|
||||
log_mem("After EasynewsMonitor creation")
|
||||
|
||||
rd = RedditCommunityMonitor(db_path, get_activity_manager(unified_db))
|
||||
log_mem("After RedditCommunityMonitor creation")
|
||||
|
||||
# Stage 10: Simulate what happens when a background task runs
|
||||
print("\n--- Simulating background task execution ---")
|
||||
|
||||
# Test: easynews check_all_celebrities
|
||||
print("Running Easynews check_all_celebrities...")
|
||||
try:
|
||||
result = en.check_all_celebrities(from_scheduler=True)
|
||||
log_mem(f"After Easynews check (results: {result.get('results_found', 0)})")
|
||||
except Exception as e:
|
||||
log_mem(f"After Easynews check (error: {e})")
|
||||
|
||||
gc.collect()
|
||||
log_mem("After gc.collect")
|
||||
|
||||
# Test: reddit check_all_now
|
||||
print("Running Reddit check_all_now...")
|
||||
try:
|
||||
import asyncio
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
count = loop.run_until_complete(rd.check_all_now(from_scheduler=True))
|
||||
log_mem(f"After Reddit check (media: {count})")
|
||||
finally:
|
||||
loop.close()
|
||||
except Exception as e:
|
||||
log_mem(f"After Reddit check (error: {e})")
|
||||
|
||||
gc.collect()
|
||||
log_mem("After gc.collect")
|
||||
|
||||
# Test: youtube check_all_now
|
||||
print("Running YouTube check_all_now...")
|
||||
try:
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
count = loop.run_until_complete(yt.check_all_now(from_scheduler=True))
|
||||
log_mem(f"After YouTube check (videos: {count})")
|
||||
finally:
|
||||
loop.close()
|
||||
except Exception as e:
|
||||
log_mem(f"After YouTube check (error: {e})")
|
||||
|
||||
gc.collect()
|
||||
log_mem("After gc.collect")
|
||||
|
||||
print("\n--- DONE ---")
|
||||
print(f"Final RSS: {get_rss_mb():.1f} MB")
|
||||
112
scripts/quick_face_backfill.py
Executable file
112
scripts/quick_face_backfill.py
Executable file
@@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Quick backfill of face recognition scans for existing files
|
||||
This scans all media files currently in /opt/immich/md and logs results to database
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Bootstrap PostgreSQL adapter before any database imports
|
||||
from modules.db_bootstrap import bootstrap_database
|
||||
bootstrap_database()
|
||||
|
||||
from modules.face_recognition_module import FaceRecognitionModule
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
from modules.settings_manager import SettingsManager
|
||||
|
||||
# Configuration
|
||||
SCAN_BASE_DIR = "/opt/immich/md"
|
||||
DATABASE_PATH = "/opt/media-downloader/database/media_downloader.db"
|
||||
|
||||
# Supported file extensions
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.heic'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv', '.m4v'}
|
||||
SUPPORTED_EXTENSIONS = IMAGE_EXTENSIONS | VIDEO_EXTENSIONS
|
||||
|
||||
|
||||
def main():
|
||||
print("🔄 Quick Face Recognition Backfill")
|
||||
print("=" * 70)
|
||||
|
||||
db = UnifiedDatabase()
|
||||
settings_manager = SettingsManager(DATABASE_PATH)
|
||||
face_module = FaceRecognitionModule(unified_db=db)
|
||||
|
||||
# Get settings
|
||||
settings = settings_manager.get('face_recognition', {})
|
||||
if not settings.get('enabled', False):
|
||||
print("✗ Face recognition is disabled in settings")
|
||||
sys.exit(1)
|
||||
|
||||
tolerance = settings.get('tolerance', 0.6)
|
||||
print(f"Scanning: {SCAN_BASE_DIR}")
|
||||
print(f"Tolerance: {tolerance}")
|
||||
print("=" * 70)
|
||||
|
||||
stats = {'total': 0, 'matched': 0, 'no_match': 0, 'errors': 0, 'already_scanned': 0}
|
||||
|
||||
# Walk through all files
|
||||
for root, dirs, files in os.walk(SCAN_BASE_DIR):
|
||||
for filename in files:
|
||||
file_path = os.path.join(root, filename)
|
||||
file_ext = os.path.splitext(filename)[1].lower()
|
||||
|
||||
if file_ext not in SUPPORTED_EXTENSIONS:
|
||||
continue
|
||||
|
||||
stats['total'] += 1
|
||||
|
||||
# Check if already scanned
|
||||
existing = db.get_face_recognition_result(file_path)
|
||||
if existing:
|
||||
stats['already_scanned'] += 1
|
||||
if stats['total'] % 50 == 0:
|
||||
print(f"Progress: {stats['total']} files processed, {stats['already_scanned']} already scanned, {stats['matched']} newly matched...")
|
||||
continue
|
||||
|
||||
try:
|
||||
# Scan the file
|
||||
is_video = file_ext in VIDEO_EXTENSIONS
|
||||
result = face_module.check_image(file_path, tolerance=tolerance, is_video=is_video)
|
||||
|
||||
# Log to database
|
||||
db.log_face_recognition_scan(
|
||||
file_path=file_path,
|
||||
has_match=result.get('has_match', False),
|
||||
matched_person=result.get('person_name'),
|
||||
confidence=result.get('confidence'),
|
||||
face_count=result.get('face_count', 0),
|
||||
scan_type='quick_backfill'
|
||||
)
|
||||
|
||||
if result.get('has_match'):
|
||||
stats['matched'] += 1
|
||||
person = result.get('person_name', 'Unknown')
|
||||
conf = result.get('confidence', 0)
|
||||
print(f"✓ [{stats['total']}] {filename[:60]} - MATCHED: {person} ({conf:.1%})")
|
||||
else:
|
||||
stats['no_match'] += 1
|
||||
|
||||
except Exception as e:
|
||||
stats['errors'] += 1
|
||||
print(f"✗ [{stats['total']}] {filename[:60]} - ERROR: {e}")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("📊 BACKFILL COMPLETE")
|
||||
print("=" * 70)
|
||||
print(f"Total files: {stats['total']}")
|
||||
print(f"Already scanned: {stats['already_scanned']}")
|
||||
print(f"Newly matched: {stats['matched']}")
|
||||
print(f"No match: {stats['no_match']}")
|
||||
print(f"Errors: {stats['errors']}")
|
||||
print("=" * 70)
|
||||
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
102
scripts/regenerate_thumbnails.py
Normal file
102
scripts/regenerate_thumbnails.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Regenerate all thumbnails for Fansly attachments."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import io
|
||||
from pathlib import Path
|
||||
from PIL import Image
|
||||
|
||||
# Bootstrap PostgreSQL adapter before any sqlite3 imports
|
||||
sys.path.insert(0, '/opt/media-downloader')
|
||||
from modules.db_bootstrap import bootstrap_database
|
||||
bootstrap_database()
|
||||
import sqlite3
|
||||
|
||||
# Database path (routed to PostgreSQL via pgadapter)
|
||||
DB_PATH = '/opt/media-downloader/database/media_downloader.db'
|
||||
THUMB_CACHE = Path('/opt/media-downloader/cache/thumbnails/large')
|
||||
MAX_SIZE = (800, 800)
|
||||
|
||||
|
||||
def generate_thumbnail(file_path, file_type):
|
||||
"""Generate thumbnail for image or video."""
|
||||
try:
|
||||
if file_type == 'image':
|
||||
with Image.open(file_path) as img:
|
||||
img.thumbnail(MAX_SIZE, Image.LANCZOS)
|
||||
if img.mode in ('RGBA', 'P'):
|
||||
img = img.convert('RGB')
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format='JPEG', quality=85)
|
||||
return buffer.getvalue()
|
||||
|
||||
elif file_type == 'video':
|
||||
cmd = [
|
||||
'ffmpeg', '-y', '-ss', '1', '-i', str(file_path),
|
||||
'-vframes', '1', '-f', 'image2pipe', '-vcodec', 'mjpeg', '-'
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, timeout=30)
|
||||
if result.returncode == 0 and result.stdout:
|
||||
with Image.open(io.BytesIO(result.stdout)) as img:
|
||||
img.thumbnail(MAX_SIZE, Image.LANCZOS)
|
||||
if img.mode in ('RGBA', 'P'):
|
||||
img = img.convert('RGB')
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format='JPEG', quality=85)
|
||||
return buffer.getvalue()
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
THUMB_CACHE.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cursor = conn.execute("""
|
||||
SELECT a.id, a.local_path, a.file_type
|
||||
FROM paid_content_attachments a
|
||||
JOIN paid_content_posts p ON a.post_id = p.id
|
||||
JOIN paid_content_creators c ON p.creator_id = c.id
|
||||
WHERE c.service_id = 'fansly_direct'
|
||||
AND a.status = 'completed'
|
||||
AND a.local_path IS NOT NULL
|
||||
ORDER BY a.id
|
||||
""")
|
||||
attachments = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
print(f"Regenerating thumbnails for {len(attachments)} files...")
|
||||
|
||||
generated = 0
|
||||
failed = 0
|
||||
missing = 0
|
||||
|
||||
for i, (att_id, local_path, file_type) in enumerate(attachments):
|
||||
if i % 100 == 0:
|
||||
print(f"Progress: {i}/{len(attachments)} (generated: {generated}, failed: {failed})")
|
||||
|
||||
file_path = Path(local_path)
|
||||
if not file_path.exists():
|
||||
missing += 1
|
||||
continue
|
||||
|
||||
thumb_data = generate_thumbnail(file_path, file_type)
|
||||
if thumb_data:
|
||||
thumb_file = THUMB_CACHE / f"{att_id}.jpg"
|
||||
thumb_file.write_bytes(thumb_data)
|
||||
generated += 1
|
||||
else:
|
||||
failed += 1
|
||||
print(f" Failed: {att_id} - {local_path}")
|
||||
|
||||
print(f"\nDone!")
|
||||
print(f" Generated: {generated}")
|
||||
print(f" Failed: {failed}")
|
||||
print(f" Missing files: {missing}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
293
scripts/retroactive_face_scan.py
Executable file
293
scripts/retroactive_face_scan.py
Executable file
@@ -0,0 +1,293 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Retroactive Face Recognition Scanner
|
||||
|
||||
Scans existing files in a directory and moves unmatched files to review queue
|
||||
while storing their original destination paths for later restoration.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Bootstrap PostgreSQL adapter before any database imports
|
||||
from modules.db_bootstrap import bootstrap_database
|
||||
bootstrap_database()
|
||||
|
||||
from modules.face_recognition_module import FaceRecognitionModule
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
from modules.settings_manager import SettingsManager
|
||||
|
||||
# Configuration
|
||||
SCAN_BASE_DIR = "/opt/immich/md"
|
||||
REVIEW_DIR = "/opt/immich/review"
|
||||
DATABASE_PATH = "/opt/media-downloader/database/media_downloader.db"
|
||||
|
||||
# Supported file extensions
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.heic'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv', '.m4v'}
|
||||
SUPPORTED_EXTENSIONS = IMAGE_EXTENSIONS | VIDEO_EXTENSIONS
|
||||
|
||||
|
||||
class RetroactiveFaceScanner:
|
||||
def __init__(self, scan_only=False):
|
||||
self.db = UnifiedDatabase()
|
||||
self.settings_manager = SettingsManager(DATABASE_PATH)
|
||||
self.face_module = FaceRecognitionModule(unified_db=self.db)
|
||||
self.scan_only = scan_only
|
||||
self.stats = {
|
||||
'total_files': 0,
|
||||
'matched': 0,
|
||||
'unmatched': 0,
|
||||
'errors': 0,
|
||||
'skipped': 0
|
||||
}
|
||||
|
||||
|
||||
def get_relative_path(self, full_path):
|
||||
"""Get path relative to SCAN_BASE_DIR"""
|
||||
try:
|
||||
return os.path.relpath(full_path, SCAN_BASE_DIR)
|
||||
except ValueError:
|
||||
return full_path
|
||||
|
||||
def scan_directory(self, directory):
|
||||
"""Recursively scan directory for media files"""
|
||||
print(f"\n🔍 Scanning directory: {directory}")
|
||||
print("=" * 70)
|
||||
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for filename in files:
|
||||
file_path = os.path.join(root, filename)
|
||||
file_ext = os.path.splitext(filename)[1].lower()
|
||||
|
||||
if file_ext not in SUPPORTED_EXTENSIONS:
|
||||
continue
|
||||
|
||||
self.stats['total_files'] += 1
|
||||
self.process_file(file_path, file_ext in VIDEO_EXTENSIONS)
|
||||
|
||||
self.print_stats()
|
||||
|
||||
def process_file(self, file_path, is_video):
|
||||
"""Process a single file with face recognition"""
|
||||
filename = os.path.basename(file_path)
|
||||
relative_path = self.get_relative_path(os.path.dirname(file_path))
|
||||
|
||||
print(f"\n[{self.stats['total_files']}] {filename}")
|
||||
print(f" Location: {relative_path}")
|
||||
|
||||
try:
|
||||
# Get face recognition settings
|
||||
settings = self.settings_manager.get('face_recognition', {})
|
||||
if not settings.get('enabled', False):
|
||||
print(" ⚠ Face recognition is disabled in settings")
|
||||
self.stats['skipped'] += 1
|
||||
return
|
||||
|
||||
tolerance = settings.get('tolerance', 0.6)
|
||||
|
||||
# Check for faces
|
||||
print(f" 🔍 Checking for faces (tolerance: {tolerance})...")
|
||||
result = self.face_module.check_image(file_path, tolerance=tolerance, is_video=is_video)
|
||||
|
||||
# Log scan result to database
|
||||
try:
|
||||
self.db.log_face_recognition_scan(
|
||||
file_path=file_path,
|
||||
has_match=result.get('has_match', False),
|
||||
matched_person=result.get('person_name'),
|
||||
confidence=result.get('confidence'),
|
||||
face_count=result.get('face_count', 0),
|
||||
scan_type='retroactive'
|
||||
)
|
||||
except Exception as db_err:
|
||||
print(f" ⚠ Warning: Failed to log to database: {db_err}")
|
||||
|
||||
if result.get('has_match'):
|
||||
person_name = result.get('person_name', 'Unknown')
|
||||
confidence = result.get('confidence', 0)
|
||||
print(f" ✓ MATCH: {person_name} (confidence: {confidence:.2%})")
|
||||
self.stats['matched'] += 1
|
||||
else:
|
||||
if self.scan_only:
|
||||
print(f" ✗ NO MATCH (scan-only mode, not moving file)")
|
||||
self.stats['unmatched'] += 1
|
||||
else:
|
||||
print(f" ✗ NO MATCH - Moving to review queue...")
|
||||
self.move_to_review(file_path, file_path) # Pass full path as original path
|
||||
self.stats['unmatched'] += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ ERROR: {str(e)}")
|
||||
self.stats['errors'] += 1
|
||||
|
||||
def move_to_review(self, file_path, original_path):
|
||||
"""Move file to review queue and update database with intended_path"""
|
||||
try:
|
||||
from pathlib import Path
|
||||
|
||||
# Maintain directory structure in review queue
|
||||
base_path = Path(SCAN_BASE_DIR)
|
||||
file_path_obj = Path(file_path)
|
||||
|
||||
if file_path_obj.is_relative_to(base_path):
|
||||
# Get relative path from base
|
||||
relative_path = file_path_obj.relative_to(base_path)
|
||||
# Recreate under review directory
|
||||
review_path = Path(REVIEW_DIR) / relative_path
|
||||
else:
|
||||
# Fallback to flat structure if not under base path
|
||||
review_path = Path(REVIEW_DIR) / file_path_obj.name
|
||||
|
||||
# Ensure parent directory exists
|
||||
review_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Move file
|
||||
shutil.move(file_path, str(review_path))
|
||||
|
||||
# Update database entry with new review path and store intended_path in metadata
|
||||
with self.db.get_connection(for_write=True) as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Find the download entry for this file
|
||||
cursor.execute('SELECT id, metadata FROM downloads WHERE file_path = ?', (file_path,))
|
||||
row = cursor.fetchone()
|
||||
|
||||
if row:
|
||||
download_id = row['id']
|
||||
existing_metadata = json.loads(row['metadata']) if row['metadata'] else {}
|
||||
|
||||
# Add intended_path to metadata
|
||||
existing_metadata['intended_path'] = file_path
|
||||
|
||||
# Update the download record with new review path and metadata
|
||||
cursor.execute('''
|
||||
UPDATE downloads
|
||||
SET file_path = ?, metadata = ?
|
||||
WHERE id = ?
|
||||
''', (str(review_path), json.dumps(existing_metadata), download_id))
|
||||
|
||||
print(f" → Moved to: {review_path}")
|
||||
print(f" → Original path stored in database: {file_path}")
|
||||
else:
|
||||
print(f" ⚠ Warning: No database entry found for {file_path}")
|
||||
print(f" → Moved to: {review_path} (not tracked in database)")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Failed to move file: {e}")
|
||||
raise
|
||||
|
||||
def print_stats(self):
|
||||
"""Print final statistics"""
|
||||
print("\n" + "=" * 70)
|
||||
print("📊 SCAN COMPLETE")
|
||||
print("=" * 70)
|
||||
print(f"Total files scanned: {self.stats['total_files']}")
|
||||
print(f"✓ Matched: {self.stats['matched']}")
|
||||
print(f"✗ Unmatched (moved): {self.stats['unmatched']}")
|
||||
print(f"⚠ Errors: {self.stats['errors']}")
|
||||
print(f"⊘ Skipped: {self.stats['skipped']}")
|
||||
print("=" * 70)
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Retroactively scan existing files with face recognition',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Scan social media directory
|
||||
python3 scripts/retroactive_face_scan.py "social media"
|
||||
|
||||
# Scan specific subdirectory
|
||||
python3 scripts/retroactive_face_scan.py "social media/instagram"
|
||||
|
||||
# Scan with full path
|
||||
python3 scripts/retroactive_face_scan.py "/opt/immich/md/social media"
|
||||
|
||||
Note: Original paths are stored in the database metadata as 'intended_path'.
|
||||
Use the Review UI to keep/delete/add reference to moved files.
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'directory',
|
||||
help='Directory to scan (relative to /opt/immich/md or absolute path)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Show what would be done without moving files'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--scan-only',
|
||||
action='store_true',
|
||||
help='Scan and update database only - do not move unmatched files to review'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Determine scan directory
|
||||
if os.path.isabs(args.directory):
|
||||
scan_dir = args.directory
|
||||
else:
|
||||
scan_dir = os.path.join(SCAN_BASE_DIR, args.directory)
|
||||
|
||||
if not os.path.exists(scan_dir):
|
||||
print(f"✗ Error: Directory does not exist: {scan_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
if not os.path.isdir(scan_dir):
|
||||
print(f"✗ Error: Not a directory: {scan_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
if args.dry_run:
|
||||
print("🔍 DRY RUN MODE - No files will be moved")
|
||||
print("=" * 70)
|
||||
|
||||
print(f"\n🎯 Retroactive Face Recognition Scan")
|
||||
print(f"Scan directory: {scan_dir}")
|
||||
print(f"Review queue: {REVIEW_DIR}")
|
||||
|
||||
# Confirm
|
||||
response = input("\nContinue? (y/n): ")
|
||||
if response.lower() != 'y':
|
||||
print("Cancelled.")
|
||||
sys.exit(0)
|
||||
|
||||
# Run scan
|
||||
scanner = RetroactiveFaceScanner(scan_only=args.scan_only)
|
||||
|
||||
if args.scan_only:
|
||||
print("🔍 SCAN-ONLY MODE - Files will NOT be moved to review")
|
||||
print("=" * 70)
|
||||
|
||||
if args.dry_run:
|
||||
# TODO: Implement dry run mode
|
||||
print("\n⚠ Dry run mode not yet implemented")
|
||||
sys.exit(1)
|
||||
else:
|
||||
scanner.scan_directory(scan_dir)
|
||||
|
||||
print(f"\n✓ Scan complete!")
|
||||
if args.scan_only:
|
||||
print(f"\n📝 Scan-only mode: Database updated with face recognition results.")
|
||||
print(f"No files were moved. Use the GUI to filter by 'Not Scanned' or 'No Match'.")
|
||||
else:
|
||||
print(f"\nUnmatched files have been moved to: {REVIEW_DIR}")
|
||||
print(f"Use the Review UI at http://your-server:5173/review to process them.")
|
||||
print(f"\nOriginal paths stored in database metadata.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
103
scripts/run-dependency-updates.sh
Executable file
103
scripts/run-dependency-updates.sh
Executable file
@@ -0,0 +1,103 @@
|
||||
#!/bin/bash
|
||||
# Dependency Update Script
|
||||
# Safely stops services, runs updates, and restarts services
|
||||
# Designed to be run by systemd timer at scheduled intervals
|
||||
|
||||
set -e
|
||||
|
||||
LOG_FILE="/opt/media-downloader/logs/dependency-updates.log"
|
||||
LOCK_FILE="/tmp/dependency-updates.lock"
|
||||
|
||||
log() {
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S') $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
# Check for lock file to prevent concurrent runs
|
||||
if [ -f "$LOCK_FILE" ]; then
|
||||
log "[WARN] Another update process is running. Exiting."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Create lock file
|
||||
trap "rm -f $LOCK_FILE" EXIT
|
||||
echo $$ > "$LOCK_FILE"
|
||||
|
||||
log "[INFO] =========================================="
|
||||
log "[INFO] Starting dependency update process"
|
||||
log "[INFO] =========================================="
|
||||
|
||||
# Check if scheduler is running
|
||||
SCHEDULER_WAS_RUNNING=false
|
||||
if systemctl is-active --quiet media-downloader; then
|
||||
SCHEDULER_WAS_RUNNING=true
|
||||
log "[INFO] Stopping scheduler for updates..."
|
||||
systemctl stop media-downloader
|
||||
|
||||
# Wait for clean shutdown
|
||||
sleep 5
|
||||
log "[INFO] Scheduler stopped"
|
||||
fi
|
||||
|
||||
# Run dependency updates
|
||||
log "[INFO] Running dependency updates..."
|
||||
cd /opt/media-downloader
|
||||
|
||||
/opt/media-downloader/venv/bin/python3 -c "
|
||||
import sys
|
||||
sys.path.insert(0, '/opt/media-downloader')
|
||||
from modules.dependency_updater import DependencyUpdater
|
||||
from modules.settings_manager import SettingsManager
|
||||
|
||||
# Load config
|
||||
settings = SettingsManager()
|
||||
config = settings.get_all()
|
||||
update_config = config.get('dependency_updater', {}) or config.get('dependency_updates', {})
|
||||
|
||||
if not update_config.get('enabled', True):
|
||||
print('[INFO] Dependency updates disabled in config')
|
||||
sys.exit(0)
|
||||
|
||||
updater = DependencyUpdater(config=update_config, scheduler_mode=True)
|
||||
results = updater.force_update_check()
|
||||
|
||||
print('[INFO] Update results:')
|
||||
for component, updated in results.items():
|
||||
status = 'Updated' if updated else 'Current'
|
||||
print(f' - {component}: {status}')
|
||||
" 2>&1 | tee -a "$LOG_FILE"
|
||||
|
||||
UPDATE_STATUS=$?
|
||||
|
||||
if [ $UPDATE_STATUS -eq 0 ]; then
|
||||
log "[INFO] Dependency updates completed successfully"
|
||||
else
|
||||
log "[ERROR] Dependency updates failed with status $UPDATE_STATUS"
|
||||
fi
|
||||
|
||||
# Restart API to pick up any Python package changes
|
||||
log "[INFO] Restarting API service..."
|
||||
systemctl restart media-downloader-api
|
||||
sleep 2
|
||||
|
||||
if systemctl is-active --quiet media-downloader-api; then
|
||||
log "[INFO] API service restarted successfully"
|
||||
else
|
||||
log "[ERROR] API service failed to restart!"
|
||||
fi
|
||||
|
||||
# Restart scheduler if it was running
|
||||
if [ "$SCHEDULER_WAS_RUNNING" = true ]; then
|
||||
log "[INFO] Restarting scheduler..."
|
||||
systemctl start media-downloader
|
||||
sleep 3
|
||||
|
||||
if systemctl is-active --quiet media-downloader; then
|
||||
log "[INFO] Scheduler restarted successfully"
|
||||
else
|
||||
log "[ERROR] Scheduler failed to restart!"
|
||||
fi
|
||||
fi
|
||||
|
||||
log "[INFO] =========================================="
|
||||
log "[INFO] Dependency update process complete"
|
||||
log "[INFO] =========================================="
|
||||
5
scripts/run-with-xvfb.sh
Executable file
5
scripts/run-with-xvfb.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
# Run media-downloader with Xvfb virtual display
|
||||
# This allows headed browsers (ImgInn, Toolzu) to run without a GUI
|
||||
|
||||
env DISPLAY=:100 HOME=/root XAUTHORITY= "$@"
|
||||
205
scripts/uninstall.sh
Executable file
205
scripts/uninstall.sh
Executable file
@@ -0,0 +1,205 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Media Downloader Uninstaller Script
|
||||
# Version: 11.27.0
|
||||
|
||||
set -e
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Installation directory
|
||||
INSTALL_DIR="/opt/media-downloader"
|
||||
|
||||
echo -e "${RED}╔════════════════════════════════════════════════╗${NC}"
|
||||
echo -e "${RED}║ Media Downloader Uninstaller ║${NC}"
|
||||
echo -e "${RED}╚════════════════════════════════════════════════╝${NC}"
|
||||
echo ""
|
||||
|
||||
# Check if running as root
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
echo -e "${RED}This script must be run as root (use sudo)${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "${YELLOW}This will remove:${NC}"
|
||||
echo " - Installation directory: $INSTALL_DIR"
|
||||
echo " - All systemd services and timers"
|
||||
echo " - Command wrapper: /usr/local/bin/media-downloader"
|
||||
echo ""
|
||||
echo -e "${YELLOW}Services to be removed:${NC}"
|
||||
echo " - media-downloader (scheduler)"
|
||||
echo " - media-downloader-api (web API)"
|
||||
echo " - media-downloader-frontend (web UI)"
|
||||
echo " - xvfb-media-downloader (virtual display)"
|
||||
echo " - media-cache-builder (timer)"
|
||||
echo " - media-embedding-generator (timer)"
|
||||
echo " - media-celebrity-enrichment (timer)"
|
||||
echo ""
|
||||
echo -e "${YELLOW}This will NOT remove:${NC}"
|
||||
echo " - Downloaded media files (if stored elsewhere)"
|
||||
echo " - Python packages installed system-wide"
|
||||
echo " - Redis server"
|
||||
echo " - FlareSolverr Docker container"
|
||||
echo ""
|
||||
|
||||
read -p "Continue with uninstallation? (y/n) " -n 1 -r
|
||||
echo
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
echo "Uninstallation cancelled"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ============================================================================
|
||||
# STOP ALL SERVICES
|
||||
# ============================================================================
|
||||
|
||||
echo -e "${YELLOW}Stopping all services...${NC}"
|
||||
|
||||
# Stop main services
|
||||
for service in media-downloader media-downloader-api media-downloader-frontend xvfb-media-downloader; do
|
||||
if systemctl is-active --quiet $service 2>/dev/null; then
|
||||
echo " Stopping $service..."
|
||||
systemctl stop $service
|
||||
fi
|
||||
done
|
||||
|
||||
# Stop timers
|
||||
for timer in media-cache-builder media-embedding-generator media-celebrity-enrichment; do
|
||||
if systemctl is-active --quiet $timer.timer 2>/dev/null; then
|
||||
echo " Stopping $timer.timer..."
|
||||
systemctl stop $timer.timer
|
||||
fi
|
||||
done
|
||||
|
||||
# ============================================================================
|
||||
# DISABLE SERVICES
|
||||
# ============================================================================
|
||||
|
||||
echo -e "${YELLOW}Disabling services...${NC}"
|
||||
|
||||
# Disable main services
|
||||
for service in media-downloader media-downloader-api media-downloader-frontend xvfb-media-downloader; do
|
||||
if systemctl is-enabled --quiet $service 2>/dev/null; then
|
||||
echo " Disabling $service..."
|
||||
systemctl disable $service
|
||||
fi
|
||||
done
|
||||
|
||||
# Disable timers
|
||||
for timer in media-cache-builder media-embedding-generator media-celebrity-enrichment; do
|
||||
if systemctl is-enabled --quiet $timer.timer 2>/dev/null; then
|
||||
echo " Disabling $timer.timer..."
|
||||
systemctl disable $timer.timer
|
||||
fi
|
||||
done
|
||||
|
||||
# ============================================================================
|
||||
# REMOVE SYSTEMD FILES
|
||||
# ============================================================================
|
||||
|
||||
echo -e "${YELLOW}Removing systemd files...${NC}"
|
||||
|
||||
rm -f /etc/systemd/system/media-downloader.service
|
||||
rm -f /etc/systemd/system/media-downloader-api.service
|
||||
rm -f /etc/systemd/system/media-downloader-frontend.service
|
||||
rm -f /etc/systemd/system/xvfb-media-downloader.service
|
||||
rm -f /etc/systemd/system/media-cache-builder.service
|
||||
rm -f /etc/systemd/system/media-cache-builder.timer
|
||||
rm -f /etc/systemd/system/media-embedding-generator.service
|
||||
rm -f /etc/systemd/system/media-embedding-generator.timer
|
||||
rm -f /etc/systemd/system/media-celebrity-enrichment.service
|
||||
rm -f /etc/systemd/system/media-celebrity-enrichment.timer
|
||||
|
||||
# Reload systemd
|
||||
systemctl daemon-reload
|
||||
|
||||
# ============================================================================
|
||||
# BACKUP DATA
|
||||
# ============================================================================
|
||||
|
||||
if [ -d "$INSTALL_DIR" ]; then
|
||||
BACKUP_DIR="$HOME/media-downloader-backup-$(date +%Y%m%d-%H%M%S)"
|
||||
echo -e "${GREEN}Creating backup at $BACKUP_DIR${NC}"
|
||||
mkdir -p "$BACKUP_DIR"
|
||||
|
||||
# Backup config directory
|
||||
if [ -d "$INSTALL_DIR/config" ]; then
|
||||
cp -r "$INSTALL_DIR/config" "$BACKUP_DIR/"
|
||||
echo " ✓ Configuration directory backed up"
|
||||
fi
|
||||
|
||||
# Backup sessions
|
||||
if [ -d "$INSTALL_DIR/sessions" ]; then
|
||||
cp -r "$INSTALL_DIR/sessions" "$BACKUP_DIR/"
|
||||
echo " ✓ Sessions backed up"
|
||||
fi
|
||||
|
||||
# Backup cookies
|
||||
if [ -d "$INSTALL_DIR/cookies" ]; then
|
||||
cp -r "$INSTALL_DIR/cookies" "$BACKUP_DIR/"
|
||||
echo " ✓ Forum cookies backed up"
|
||||
fi
|
||||
|
||||
# Backup database directory
|
||||
if [ -d "$INSTALL_DIR/database" ]; then
|
||||
cp -r "$INSTALL_DIR/database" "$BACKUP_DIR/"
|
||||
echo " ✓ Database directory backed up"
|
||||
fi
|
||||
|
||||
# Backup data directory
|
||||
if [ -d "$INSTALL_DIR/data" ]; then
|
||||
cp -r "$INSTALL_DIR/data" "$BACKUP_DIR/"
|
||||
echo " ✓ Data directory backed up"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ============================================================================
|
||||
# REMOVE INSTALLATION
|
||||
# ============================================================================
|
||||
|
||||
# Remove installation directory
|
||||
if [ -d "$INSTALL_DIR" ]; then
|
||||
echo -e "${YELLOW}Removing installation directory...${NC}"
|
||||
rm -rf "$INSTALL_DIR"
|
||||
fi
|
||||
|
||||
# Remove command wrapper
|
||||
if [ -f "/usr/local/bin/media-downloader" ]; then
|
||||
echo -e "${YELLOW}Removing command wrapper...${NC}"
|
||||
rm -f "/usr/local/bin/media-downloader"
|
||||
fi
|
||||
|
||||
# ============================================================================
|
||||
# COMPLETION
|
||||
# ============================================================================
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}╔════════════════════════════════════════════════╗${NC}"
|
||||
echo -e "${GREEN}║ Uninstallation Complete! ║${NC}"
|
||||
echo -e "${GREEN}╚════════════════════════════════════════════════╝${NC}"
|
||||
echo ""
|
||||
|
||||
if [ -n "$BACKUP_DIR" ] && [ -d "$BACKUP_DIR" ]; then
|
||||
echo -e "${BLUE}Backup created at:${NC} $BACKUP_DIR"
|
||||
echo ""
|
||||
echo -e "${YELLOW}To restore your data to a new installation:${NC}"
|
||||
echo " 1. Install Media Downloader again:"
|
||||
echo " sudo ./scripts/install.sh"
|
||||
echo ""
|
||||
echo " 2. Restore your backed up data:"
|
||||
echo " sudo cp -r $BACKUP_DIR/database/* /opt/media-downloader/database/"
|
||||
echo " sudo cp -r $BACKUP_DIR/config/* /opt/media-downloader/config/"
|
||||
echo " sudo cp -r $BACKUP_DIR/sessions /opt/media-downloader/"
|
||||
echo " sudo cp -r $BACKUP_DIR/cookies /opt/media-downloader/"
|
||||
echo " sudo cp -r $BACKUP_DIR/data/* /opt/media-downloader/data/"
|
||||
echo " sudo chown -R \$USER:\$USER /opt/media-downloader/"
|
||||
echo ""
|
||||
echo " 3. Restart the services:"
|
||||
echo " sudo systemctl restart media-downloader"
|
||||
echo " sudo systemctl restart media-downloader-api"
|
||||
fi
|
||||
131
scripts/update-all-versions.sh
Executable file
131
scripts/update-all-versions.sh
Executable file
@@ -0,0 +1,131 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Comprehensive Version Update Script for Media Downloader
|
||||
# Updates ALL version references across the entire codebase
|
||||
#
|
||||
# Usage: ./scripts/update-all-versions.sh <new_version>
|
||||
# Example: ./scripts/update-all-versions.sh 6.11.0
|
||||
#
|
||||
|
||||
set -e
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "Error: Version number required"
|
||||
echo "Usage: $0 <version>"
|
||||
echo "Example: $0 6.11.0"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
NEW_VERSION="$1"
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
echo "╔════════════════════════════════════════════════╗"
|
||||
echo "║ Media Downloader Version Update ║"
|
||||
echo "╠════════════════════════════════════════════════╣"
|
||||
echo "║ New Version: $NEW_VERSION"
|
||||
echo "╚════════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
|
||||
# Read current version
|
||||
CURRENT_VERSION=$(cat "$PROJECT_ROOT/VERSION" 2>/dev/null || echo "unknown")
|
||||
echo "Current version: $CURRENT_VERSION"
|
||||
echo "New version: $NEW_VERSION"
|
||||
echo ""
|
||||
|
||||
# Function to update file with sed
|
||||
update_file() {
|
||||
local file=$1
|
||||
local pattern=$2
|
||||
local replacement=$3
|
||||
local description=$4
|
||||
|
||||
if [ -f "$file" ]; then
|
||||
sed -i "$pattern" "$file"
|
||||
echo "✓ Updated: $description"
|
||||
else
|
||||
echo "⚠ Skipped: $file (not found)"
|
||||
fi
|
||||
}
|
||||
|
||||
echo "Updating version files..."
|
||||
echo "─────────────────────────────────────────────────"
|
||||
|
||||
# 1. VERSION file
|
||||
echo "$NEW_VERSION" > "$PROJECT_ROOT/VERSION"
|
||||
echo "✓ Updated: VERSION file"
|
||||
|
||||
# 2. README.md
|
||||
update_file "$PROJECT_ROOT/README.md" \
|
||||
"s/\*\*Version:\*\* [0-9.]\+/**Version:** $NEW_VERSION/g" \
|
||||
"README.md (header)"
|
||||
|
||||
update_file "$PROJECT_ROOT/README.md" \
|
||||
"s/VERSION.*# Version number ([0-9.]\+)/VERSION # Version number ($NEW_VERSION)/g" \
|
||||
"README.md (directory structure comment)"
|
||||
|
||||
# 3. Frontend files
|
||||
echo ""
|
||||
echo "Updating frontend files..."
|
||||
echo "─────────────────────────────────────────────────"
|
||||
|
||||
update_file "$PROJECT_ROOT/web/frontend/src/pages/Login.tsx" \
|
||||
"s/<p>v[0-9.]\+<\/p>/<p>v$NEW_VERSION<\/p>/g" \
|
||||
"Login.tsx"
|
||||
|
||||
update_file "$PROJECT_ROOT/web/frontend/src/App.tsx" \
|
||||
"s/v[0-9.]\+<\/p>/v$NEW_VERSION<\/p>/g" \
|
||||
"App.tsx (all occurrences)"
|
||||
|
||||
update_file "$PROJECT_ROOT/web/frontend/src/pages/Configuration.tsx" \
|
||||
"s/Version [0-9.]\+/Version $NEW_VERSION/g" \
|
||||
"Configuration.tsx"
|
||||
|
||||
update_file "$PROJECT_ROOT/web/frontend/src/pages/Configuration.tsx" \
|
||||
"s/v[0-9.]\+)/v$NEW_VERSION)/g" \
|
||||
"Configuration.tsx (comments)"
|
||||
|
||||
update_file "$PROJECT_ROOT/web/frontend/package.json" \
|
||||
"s/\"version\": \"[0-9.]\+\"/\"version\": \"$NEW_VERSION\"/g" \
|
||||
"package.json"
|
||||
|
||||
# 4. Backend API version
|
||||
echo ""
|
||||
echo "Updating backend files..."
|
||||
echo "─────────────────────────────────────────────────"
|
||||
|
||||
update_file "$PROJECT_ROOT/web/backend/api.py" \
|
||||
"s/version=\"[0-9.]\+\"/version=\"$NEW_VERSION\"/g" \
|
||||
"api.py"
|
||||
|
||||
update_file "$PROJECT_ROOT/web/backend/core/config.py" \
|
||||
"s/API_VERSION: str = \"[0-9.]\+\"/API_VERSION: str = \"$NEW_VERSION\"/g" \
|
||||
"core/config.py (API_VERSION)"
|
||||
|
||||
# 5. Installer script
|
||||
echo ""
|
||||
echo "Updating installer..."
|
||||
echo "─────────────────────────────────────────────────"
|
||||
|
||||
update_file "$PROJECT_ROOT/scripts/install.sh" \
|
||||
"s/# Version: [0-9.]\+/# Version: $NEW_VERSION/g" \
|
||||
"install.sh (header comment)"
|
||||
|
||||
update_file "$PROJECT_ROOT/scripts/install.sh" \
|
||||
"s/Installer v[0-9.]\+/Installer v$NEW_VERSION/g" \
|
||||
"install.sh (banner)"
|
||||
|
||||
echo ""
|
||||
echo "╔════════════════════════════════════════════════╗"
|
||||
echo "║ Version Update Complete ║"
|
||||
echo "╠════════════════════════════════════════════════╣"
|
||||
echo "║ Version: $NEW_VERSION"
|
||||
echo "║ Files Updated: 10"
|
||||
echo "╚════════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo "1. Update data/changelog.json with new version"
|
||||
echo "2. Update docs/CHANGELOG.md with release notes"
|
||||
echo "3. Rebuild frontend (automatically if dev server running)"
|
||||
echo "4. Run: ./scripts/create-version-backup.sh"
|
||||
echo ""
|
||||
165
scripts/update-version.sh
Executable file
165
scripts/update-version.sh
Executable file
@@ -0,0 +1,165 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Media Downloader Version Update Script
|
||||
# Updates version numbers across all application files
|
||||
# Usage: bash scripts/update-version.sh 6.4.3
|
||||
################################################################################
|
||||
|
||||
set -e # Exit on error
|
||||
|
||||
NEW_VERSION="$1"
|
||||
OLD_VERSION=$(cat /opt/media-downloader/VERSION 2>/dev/null || echo "unknown")
|
||||
|
||||
# Color codes
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Function to print colored output
|
||||
print_success() { echo -e "${GREEN}✓${NC} $1"; }
|
||||
print_error() { echo -e "${RED}✗${NC} $1"; }
|
||||
print_info() { echo -e "${BLUE}ℹ${NC} $1"; }
|
||||
print_warning() { echo -e "${YELLOW}⚠${NC} $1"; }
|
||||
|
||||
################################################################################
|
||||
# Validate Input
|
||||
################################################################################
|
||||
|
||||
if [ -z "$NEW_VERSION" ]; then
|
||||
print_error "No version specified!"
|
||||
echo ""
|
||||
echo "Usage: $0 <version>"
|
||||
echo "Example: $0 6.4.3"
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validate version format (X.X.X)
|
||||
if ! [[ "$NEW_VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
|
||||
print_error "Invalid version format: $NEW_VERSION"
|
||||
echo "Version must be in format X.X.X (e.g., 6.4.3)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
################################################################################
|
||||
# Header
|
||||
################################################################################
|
||||
|
||||
echo ""
|
||||
echo "╔════════════════════════════════════════════════╗"
|
||||
echo "║ Media Downloader Version Update ║"
|
||||
echo "╠════════════════════════════════════════════════╣"
|
||||
echo "║ Current: ${OLD_VERSION} "
|
||||
echo "║ New: ${NEW_VERSION} "
|
||||
echo "╚════════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
|
||||
# Confirm with user
|
||||
read -p "Update version from ${OLD_VERSION} to ${NEW_VERSION}? (y/N) " -n 1 -r
|
||||
echo ""
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
print_info "Version update cancelled"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
################################################################################
|
||||
# Update Files
|
||||
################################################################################
|
||||
|
||||
echo ""
|
||||
print_info "Updating version files..."
|
||||
echo ""
|
||||
|
||||
# 1. Update VERSION file
|
||||
print_info "1/5 Updating VERSION file..."
|
||||
echo "$NEW_VERSION" > /opt/media-downloader/VERSION
|
||||
print_success "VERSION file updated"
|
||||
|
||||
# 2. Update backend API
|
||||
print_info "2/5 Updating backend API (web/backend/api.py)..."
|
||||
sed -i "s/version=\"[0-9]\+\.[0-9]\+\.[0-9]\+\"/version=\"$NEW_VERSION\"/" \
|
||||
/opt/media-downloader/web/backend/api.py
|
||||
print_success "Backend API version updated"
|
||||
|
||||
# 3. Update package.json
|
||||
print_info "3/5 Updating frontend package.json..."
|
||||
sed -i "s/\"version\": \"[0-9]\+\.[0-9]\+\.[0-9]\+\"/\"version\": \"$NEW_VERSION\"/" \
|
||||
/opt/media-downloader/web/frontend/package.json
|
||||
print_success "package.json updated"
|
||||
|
||||
# 4. Update App.tsx (both locations)
|
||||
print_info "4/5 Updating App.tsx (desktop and mobile menus)..."
|
||||
sed -i "s/>v[0-9]\+\.[0-9]\+\.[0-9]\+</>v$NEW_VERSION</g" \
|
||||
/opt/media-downloader/web/frontend/src/App.tsx
|
||||
print_success "App.tsx updated (2 locations)"
|
||||
|
||||
# 5. Update Configuration.tsx
|
||||
print_info "5/5 Updating Configuration.tsx (About tab)..."
|
||||
sed -i "s/Version [0-9]\+\.[0-9]\+\.[0-9]\+/Version $NEW_VERSION/" \
|
||||
/opt/media-downloader/web/frontend/src/pages/Configuration.tsx
|
||||
sed -i "s/currently v[0-9]\+\.[0-9]\+\.[0-9]\+/currently v$NEW_VERSION/" \
|
||||
/opt/media-downloader/web/frontend/src/pages/Configuration.tsx
|
||||
print_success "Configuration.tsx updated"
|
||||
|
||||
################################################################################
|
||||
# Verification
|
||||
################################################################################
|
||||
|
||||
echo ""
|
||||
print_info "Verifying version updates..."
|
||||
echo ""
|
||||
|
||||
# Check each file
|
||||
check_file() {
|
||||
local file="$1"
|
||||
local pattern="$2"
|
||||
local description="$3"
|
||||
|
||||
if grep -q "$pattern" "$file" 2>/dev/null; then
|
||||
print_success "$description"
|
||||
else
|
||||
print_error "$description - NOT FOUND!"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_file "/opt/media-downloader/VERSION" "^$NEW_VERSION$" "VERSION file"
|
||||
check_file "/opt/media-downloader/web/backend/api.py" "version=\"$NEW_VERSION\"" "Backend API"
|
||||
check_file "/opt/media-downloader/web/frontend/package.json" "\"version\": \"$NEW_VERSION\"" "package.json"
|
||||
check_file "/opt/media-downloader/web/frontend/src/App.tsx" "v$NEW_VERSION" "App.tsx (menus)"
|
||||
check_file "/opt/media-downloader/web/frontend/src/pages/Configuration.tsx" "Version $NEW_VERSION" "Configuration.tsx (About tab)"
|
||||
|
||||
################################################################################
|
||||
# Manual Steps Reminder
|
||||
################################################################################
|
||||
|
||||
echo ""
|
||||
echo "╔════════════════════════════════════════════════╗"
|
||||
echo "║ Version Update Complete ║"
|
||||
echo "╚════════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
print_warning "MANUAL STEPS REQUIRED:"
|
||||
echo ""
|
||||
echo "1. Update data/changelog.json:"
|
||||
echo " - Add new version entry at the TOP of the array"
|
||||
echo " - Include version, date, title, type, and changes"
|
||||
echo ""
|
||||
echo "2. Update CHANGELOG.md:"
|
||||
echo " - Add new version section at the TOP (after header)"
|
||||
echo " - Document all changes, fixes, and features"
|
||||
echo ""
|
||||
echo "3. Restart services:"
|
||||
echo " ${BLUE}sudo systemctl restart media-downloader-api${NC}"
|
||||
echo ""
|
||||
echo "4. Create version backup:"
|
||||
echo " ${BLUE}bash scripts/create-version-backup.sh${NC}"
|
||||
echo ""
|
||||
echo "5. Verify in browser:"
|
||||
echo " - Check Health page loads correctly"
|
||||
echo " - Check Configuration → About tab shows v$NEW_VERSION"
|
||||
echo " - Check desktop/mobile menu shows v$NEW_VERSION"
|
||||
echo ""
|
||||
print_info "See docs/VERSION_UPDATE_CHECKLIST.md for full checklist"
|
||||
echo ""
|
||||
181
scripts/upgrade_fansly_to_4k.py
Normal file
181
scripts/upgrade_fansly_to_4k.py
Normal file
@@ -0,0 +1,181 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Check Fansly attachments for 4K variants and upgrade if available.
|
||||
|
||||
This script:
|
||||
1. Finds all non-4K video attachments from Fansly Direct
|
||||
2. Re-fetches media info from the Fansly API
|
||||
3. Checks if a higher resolution variant is available
|
||||
4. Updates the attachment URL and resets for re-download if upgrade found
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add project to path
|
||||
sys.path.insert(0, '/opt/media-downloader')
|
||||
|
||||
# Bootstrap PostgreSQL adapter before any database imports
|
||||
from modules.db_bootstrap import bootstrap_database
|
||||
bootstrap_database()
|
||||
|
||||
from modules.paid_content.fansly_direct_client import FanslyDirectClient
|
||||
from modules.paid_content.db_adapter import PaidContentDB
|
||||
|
||||
|
||||
async def check_and_upgrade_attachments():
|
||||
"""Check all non-4K Fansly attachments for upgrades."""
|
||||
|
||||
db = PaidContentDB('/opt/media-downloader/database/media_downloader.db')
|
||||
|
||||
# Get Fansly auth token
|
||||
service = db.get_service('fansly_direct')
|
||||
if not service or not service.get('session_cookie'):
|
||||
print("ERROR: No Fansly auth token configured")
|
||||
return
|
||||
|
||||
auth_token = service['session_cookie']
|
||||
client = FanslyDirectClient(auth_token)
|
||||
|
||||
# Find non-4K video attachments from Fansly Direct
|
||||
# 4K is 3840x2160 or 2160x3840 (portrait)
|
||||
query = """
|
||||
SELECT a.id, a.name, a.width, a.height, a.status, p.post_id, p.id as db_post_id
|
||||
FROM paid_content_attachments a
|
||||
JOIN paid_content_posts p ON a.post_id = p.id
|
||||
JOIN paid_content_creators c ON p.creator_id = c.id
|
||||
WHERE c.service_id = 'fansly_direct'
|
||||
AND a.file_type = 'video'
|
||||
AND a.width IS NOT NULL
|
||||
AND a.height IS NOT NULL
|
||||
AND NOT (
|
||||
(a.width >= 3840 AND a.height >= 2160) OR
|
||||
(a.width >= 2160 AND a.height >= 3840)
|
||||
)
|
||||
AND p.post_id NOT LIKE 'manual_%'
|
||||
AND p.post_id NOT LIKE 'import_%'
|
||||
AND p.post_id NOT LIKE '20%-%'
|
||||
ORDER BY a.id
|
||||
"""
|
||||
|
||||
cursor = db.conn.execute(query)
|
||||
attachments = cursor.fetchall()
|
||||
|
||||
print(f"Found {len(attachments)} non-4K video attachments to check")
|
||||
print("-" * 80)
|
||||
|
||||
upgrades_found = 0
|
||||
errors = 0
|
||||
already_best = 0
|
||||
|
||||
for att in attachments:
|
||||
att_id, name, width, height, status, post_id, db_post_id = att
|
||||
current_res = f"{width}x{height}"
|
||||
|
||||
print(f"\nChecking: {name} (ID: {att_id}, current: {current_res})")
|
||||
|
||||
try:
|
||||
# Extract media ID from filename (e.g., "12345.mp4" -> "12345")
|
||||
media_id = name.replace('.mp4', '').replace('.mov', '')
|
||||
|
||||
# Fetch media info from Fansly API
|
||||
# We need to get the account media for this post
|
||||
async with client:
|
||||
# Get the post to find media info
|
||||
posts, _, media_dict, account_media_dict, bundle_dict = await client._fetch_timeline_page(
|
||||
account_id=None, # We'll search by post ID
|
||||
before=str(int(post_id) + 1), # Get this post
|
||||
account={}
|
||||
)
|
||||
|
||||
# Find media in the dictionaries
|
||||
found_4k = False
|
||||
best_width = width
|
||||
best_height = height
|
||||
best_url = None
|
||||
|
||||
# Check account_media_dict for this media
|
||||
for am_id, am_data in account_media_dict.items():
|
||||
media = am_data.get('media', {})
|
||||
if str(media.get('id')) == media_id:
|
||||
# Found the media, check variants
|
||||
variants = media.get('variants', [])
|
||||
print(f" Found media with {len(variants)} variants")
|
||||
|
||||
for v in variants:
|
||||
v_w = v.get('width', 0) or 0
|
||||
v_h = v.get('height', 0) or 0
|
||||
v_locs = v.get('locations', [])
|
||||
|
||||
# Check if this is a higher resolution
|
||||
if v_w * v_h > best_width * best_height:
|
||||
for loc in v_locs:
|
||||
loc_url = loc.get('location', '')
|
||||
# Prefer streaming formats for 4K
|
||||
if '.m3u8' in loc_url or '.mp4' in loc_url or '.mov' in loc_url:
|
||||
best_width = v_w
|
||||
best_height = v_h
|
||||
|
||||
# Construct signed URL if metadata present
|
||||
metadata = loc.get('metadata', {})
|
||||
if metadata:
|
||||
params = []
|
||||
for key in ['Key-Pair-Id', 'Signature', 'Policy']:
|
||||
if key in metadata:
|
||||
params.append(f"{key}={metadata[key]}")
|
||||
if params:
|
||||
best_url = loc_url + '?' + '&'.join(params)
|
||||
else:
|
||||
best_url = loc_url
|
||||
else:
|
||||
best_url = loc_url
|
||||
|
||||
if v_w >= 3840 or v_h >= 3840:
|
||||
found_4k = True
|
||||
break
|
||||
break
|
||||
|
||||
if found_4k and best_url:
|
||||
print(f" ✓ UPGRADE FOUND: {best_width}x{best_height}")
|
||||
upgrades_found += 1
|
||||
|
||||
# Update the attachment
|
||||
db.conn.execute("""
|
||||
UPDATE paid_content_attachments
|
||||
SET download_url = ?,
|
||||
width = ?,
|
||||
height = ?,
|
||||
status = 'pending',
|
||||
download_attempts = 0,
|
||||
error_message = NULL,
|
||||
local_path = NULL,
|
||||
local_filename = NULL
|
||||
WHERE id = ?
|
||||
""", (best_url, best_width, best_height, att_id))
|
||||
db.conn.commit()
|
||||
print(f" → Updated and queued for re-download")
|
||||
|
||||
elif best_width > width or best_height > height:
|
||||
print(f" ~ Better quality available: {best_width}x{best_height} (not 4K)")
|
||||
else:
|
||||
print(f" - Already at best available quality")
|
||||
already_best += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Error: {e}")
|
||||
errors += 1
|
||||
|
||||
# Rate limiting
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print(f"Summary:")
|
||||
print(f" Upgrades found and queued: {upgrades_found}")
|
||||
print(f" Already at best quality: {already_best}")
|
||||
print(f" Errors: {errors}")
|
||||
print(f" Total checked: {len(attachments)}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(check_and_upgrade_attachments())
|
||||
Reference in New Issue
Block a user