6350 lines
292 KiB
Python
Executable File
6350 lines
292 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Unified Database Manager for Media Downloader System
|
|
Consolidates all platform databases into a single, optimized database
|
|
"""
|
|
|
|
import sqlite3
|
|
import json
|
|
import hashlib
|
|
import time
|
|
import functools
|
|
from pathlib import Path
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Optional, Tuple, Any
|
|
from contextlib import contextmanager
|
|
from threading import Lock, RLock
|
|
import queue
|
|
from modules.universal_logger import get_logger
|
|
|
|
logger = get_logger('Database')
|
|
|
|
# Database retry configuration constants
|
|
DB_MAX_RETRIES = 3
|
|
DB_BASE_DELAY = 0.1 # Base delay in seconds for exponential backoff
|
|
DB_MAX_DELAY = 5.0 # Maximum delay in seconds
|
|
|
|
|
|
def _is_lock_error(e: Exception) -> bool:
|
|
"""Check if an OperationalError is a database lock/busy/deadlock error."""
|
|
msg = str(e).lower()
|
|
return ("database is locked" in msg or "database is busy" in msg
|
|
or "deadlock detected" in msg or "could not obtain lock" in msg
|
|
or "lock timeout" in msg or "canceling statement due to lock" in msg)
|
|
|
|
|
|
def _safe_alter(cursor, sql: str) -> bool:
|
|
"""Execute an ALTER TABLE statement, skipping gracefully on lock timeout.
|
|
Returns True if executed, False if skipped due to lock contention."""
|
|
try:
|
|
cursor.execute(sql)
|
|
return True
|
|
except Exception as e:
|
|
msg = str(e).lower()
|
|
if _is_lock_error(e):
|
|
logger.debug(f"Skipped migration (lock contention): {sql[:80]}")
|
|
return False
|
|
# For "already exists" type errors, also skip gracefully
|
|
if 'already exists' in msg or 'duplicate column' in msg:
|
|
return True
|
|
raise
|
|
|
|
|
|
def retry_on_lock(operation_name: str = "database operation", max_retries: int = DB_MAX_RETRIES,
|
|
base_delay: float = DB_BASE_DELAY, max_delay: float = DB_MAX_DELAY):
|
|
"""
|
|
Decorator for retrying database operations on lock errors.
|
|
|
|
Args:
|
|
operation_name: Name for logging
|
|
max_retries: Maximum number of retry attempts
|
|
base_delay: Base delay in seconds for exponential backoff
|
|
max_delay: Maximum delay cap in seconds
|
|
"""
|
|
def decorator(func):
|
|
@functools.wraps(func)
|
|
def wrapper(*args, **kwargs):
|
|
last_error = None
|
|
for attempt in range(max_retries):
|
|
try:
|
|
return func(*args, **kwargs)
|
|
except sqlite3.OperationalError as e:
|
|
last_error = e
|
|
if _is_lock_error(e):
|
|
if attempt < max_retries - 1:
|
|
delay = min(max_delay, base_delay * (2 ** attempt))
|
|
logger.warning(f"{operation_name} locked, retrying in {delay:.1f}s (attempt {attempt + 1}/{max_retries})")
|
|
time.sleep(delay)
|
|
continue
|
|
else:
|
|
logger.error(f"{operation_name} failed after {max_retries} attempts: {e}")
|
|
else:
|
|
# Non-lock operational error, don't retry
|
|
raise
|
|
# If we exhausted retries, raise the last error
|
|
if last_error:
|
|
raise last_error
|
|
return wrapper
|
|
return decorator
|
|
|
|
|
|
class DatabasePool:
|
|
"""Connection pool for better concurrency"""
|
|
|
|
def __init__(self, db_path: str, pool_size: int = 20):
|
|
self.db_path = db_path
|
|
self.pool = queue.Queue(maxsize=pool_size)
|
|
self.write_lock = RLock() # Reentrant lock for writes
|
|
|
|
# Pre-populate pool with connections configured for better concurrency
|
|
for _ in range(pool_size):
|
|
conn = sqlite3.connect(
|
|
db_path,
|
|
check_same_thread=False,
|
|
timeout=30.0,
|
|
isolation_level=None # Manual transaction control
|
|
)
|
|
conn.row_factory = sqlite3.Row
|
|
# PRAGMAs are translated to no-ops by pg_adapter when using PostgreSQL
|
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
conn.execute("PRAGMA synchronous=NORMAL")
|
|
conn.execute("PRAGMA cache_size=10000")
|
|
conn.execute("PRAGMA temp_store=MEMORY")
|
|
conn.execute("PRAGMA busy_timeout=30000")
|
|
conn.execute("PRAGMA wal_checkpoint=TRUNCATE")
|
|
conn.execute("PRAGMA foreign_keys=ON")
|
|
self.pool.put(conn)
|
|
|
|
@contextmanager
|
|
def get_connection(self, for_write=False):
|
|
"""Get a connection from the pool
|
|
|
|
Args:
|
|
for_write: If True, acquire write lock for serialized writes
|
|
"""
|
|
if for_write:
|
|
with self.write_lock:
|
|
conn = self.pool.get(timeout=5) # 5 second timeout to get connection
|
|
try:
|
|
# Use IMMEDIATE mode for writes to fail fast if locked
|
|
conn.execute("BEGIN IMMEDIATE")
|
|
yield conn
|
|
conn.commit()
|
|
except Exception:
|
|
conn.rollback()
|
|
raise
|
|
finally:
|
|
self.pool.put(conn)
|
|
else:
|
|
conn = self.pool.get(timeout=5)
|
|
try:
|
|
yield conn
|
|
finally:
|
|
self.pool.put(conn)
|
|
|
|
def close_all(self):
|
|
"""Close all connections in the pool"""
|
|
while not self.pool.empty():
|
|
conn = self.pool.get()
|
|
conn.close()
|
|
|
|
def checkpoint(self):
|
|
"""Run a WAL checkpoint to merge WAL file into main database"""
|
|
conn = self.pool.get(timeout=5)
|
|
try:
|
|
result = conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").fetchone()
|
|
# result is (busy, log_pages, checkpointed_pages)
|
|
if result:
|
|
logger.debug(f"WAL checkpoint: busy={result[0]}, log={result[1]}, checkpointed={result[2]}")
|
|
return result
|
|
except Exception as e:
|
|
logger.warning(f"WAL checkpoint failed: {e}")
|
|
return None
|
|
finally:
|
|
self.pool.put(conn)
|
|
|
|
def get_write_connection(self):
|
|
"""Get a dedicated connection for writing (not from pool)"""
|
|
conn = sqlite3.connect(
|
|
self.db_path,
|
|
check_same_thread=False,
|
|
timeout=60.0, # Wait up to 60 seconds
|
|
isolation_level='IMMEDIATE' # Lock immediately for writes
|
|
)
|
|
conn.row_factory = sqlite3.Row
|
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
conn.execute("PRAGMA synchronous=NORMAL")
|
|
conn.execute("PRAGMA busy_timeout=60000") # 60 second timeout
|
|
conn.execute("PRAGMA foreign_keys=ON") # Enable foreign key enforcement
|
|
return conn
|
|
|
|
|
|
class UnifiedDatabase:
|
|
"""Unified database for all media downloads"""
|
|
|
|
def __init__(self, db_path: str = None, use_pool: bool = True, pool_size: int = 3):
|
|
"""
|
|
Initialize unified database
|
|
|
|
Args:
|
|
db_path: Path to the unified database file (defaults to /opt/media-downloader/database/media_downloader.db)
|
|
use_pool: Whether to use connection pooling
|
|
pool_size: Size of connection pool (default: 5, recommended: 20 for API workers)
|
|
"""
|
|
# Use proper default path if none provided
|
|
if db_path is None:
|
|
db_path = str(Path(__file__).parent.parent / 'database' / 'media_downloader.db')
|
|
self.db_path = Path(db_path)
|
|
self.use_pool = use_pool
|
|
|
|
if use_pool:
|
|
self.pool = DatabasePool(str(self.db_path), pool_size=pool_size)
|
|
else:
|
|
self.pool = None
|
|
|
|
self._init_database()
|
|
|
|
@contextmanager
|
|
def get_connection(self, for_write=False):
|
|
"""Get a database connection
|
|
|
|
Args:
|
|
for_write: If True, use write lock for serialized writes
|
|
"""
|
|
if self.pool:
|
|
with self.pool.get_connection(for_write=for_write) as conn:
|
|
yield conn
|
|
else:
|
|
conn = sqlite3.connect(
|
|
self.db_path,
|
|
timeout=10.0, # Reduced timeout for faster failure
|
|
isolation_level=None # Manual transaction control
|
|
)
|
|
conn.row_factory = sqlite3.Row
|
|
# PRAGMAs are translated to no-ops by pg_adapter when using PostgreSQL
|
|
conn.execute("PRAGMA busy_timeout=10000")
|
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
try:
|
|
if for_write:
|
|
conn.execute("BEGIN IMMEDIATE")
|
|
yield conn
|
|
if for_write:
|
|
conn.commit()
|
|
except Exception:
|
|
if for_write:
|
|
conn.rollback()
|
|
raise
|
|
finally:
|
|
conn.close()
|
|
|
|
def get_dedicated_write_connection(self):
|
|
"""Get a dedicated write connection (not from pool) for critical writes"""
|
|
if self.pool:
|
|
return self.pool.get_write_connection()
|
|
else:
|
|
conn = sqlite3.connect(
|
|
self.db_path,
|
|
timeout=60.0,
|
|
isolation_level='IMMEDIATE'
|
|
)
|
|
conn.row_factory = sqlite3.Row
|
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
conn.execute("PRAGMA busy_timeout=60000")
|
|
return conn
|
|
|
|
def _init_database(self):
|
|
"""Initialize the unified database schema"""
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# PRAGMAs are translated to no-ops by pg_adapter when using PostgreSQL
|
|
cursor.execute("PRAGMA journal_mode = WAL")
|
|
cursor.execute("PRAGMA busy_timeout = 10000")
|
|
cursor.execute("PRAGMA synchronous = NORMAL")
|
|
cursor.execute("PRAGMA foreign_keys = ON")
|
|
|
|
# Set a short lock_timeout for ALTER TABLE migrations so they don't
|
|
# block forever if pg_dump holds ACCESS SHARE locks. Columns already
|
|
# exist after first run, so skipping on timeout is safe.
|
|
try:
|
|
cursor.execute("SET lock_timeout = '2s'")
|
|
except Exception:
|
|
pass # SQLite doesn't support SET
|
|
|
|
# Main downloads table - unified for all platforms
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS downloads (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
url_hash TEXT UNIQUE NOT NULL, -- SHA256 hash of URL for deduplication
|
|
url TEXT NOT NULL,
|
|
platform TEXT NOT NULL, -- 'instagram', 'tiktok', 'forum'
|
|
source TEXT, -- Username for social media, forum name for forums
|
|
content_type TEXT, -- 'post', 'story', 'reel', 'video', 'image', etc.
|
|
filename TEXT,
|
|
file_path TEXT,
|
|
file_size INTEGER,
|
|
file_hash TEXT, -- SHA256 of file content
|
|
method TEXT, -- Download method used
|
|
media_id TEXT, -- Platform-specific media identifier
|
|
post_date DATETIME,
|
|
download_date DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
status TEXT DEFAULT 'completed', -- 'completed', 'failed', 'pending'
|
|
attempts INTEGER DEFAULT 1,
|
|
error_message TEXT,
|
|
metadata TEXT -- JSON string for platform-specific data (includes intended_path when in review)
|
|
)
|
|
''')
|
|
|
|
# Forum-specific tables (still needed for thread tracking)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS forum_threads (
|
|
thread_id TEXT PRIMARY KEY,
|
|
forum_name TEXT NOT NULL,
|
|
thread_url TEXT UNIQUE NOT NULL,
|
|
thread_title TEXT,
|
|
author TEXT,
|
|
created_date DATETIME,
|
|
last_checked DATETIME,
|
|
last_post_date DATETIME,
|
|
post_count INTEGER DEFAULT 0,
|
|
status TEXT DEFAULT 'active', -- 'active', 'expired', 'completed'
|
|
monitor_until DATETIME,
|
|
metadata TEXT
|
|
)
|
|
''')
|
|
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS forum_posts (
|
|
post_id TEXT PRIMARY KEY,
|
|
thread_id TEXT NOT NULL,
|
|
post_url TEXT,
|
|
author TEXT,
|
|
post_date DATETIME,
|
|
content_hash TEXT,
|
|
has_images BOOLEAN DEFAULT 0,
|
|
images_downloaded INTEGER DEFAULT 0,
|
|
metadata TEXT,
|
|
FOREIGN KEY (thread_id) REFERENCES forum_threads(thread_id)
|
|
)
|
|
''')
|
|
|
|
# Search monitoring table
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS search_monitors (
|
|
search_id TEXT PRIMARY KEY,
|
|
platform TEXT NOT NULL,
|
|
source TEXT, -- Forum name or username
|
|
search_query TEXT,
|
|
search_url TEXT,
|
|
last_checked DATETIME,
|
|
check_frequency_hours INTEGER DEFAULT 24,
|
|
active BOOLEAN DEFAULT 1,
|
|
results_found INTEGER DEFAULT 0,
|
|
metadata TEXT
|
|
)
|
|
''')
|
|
|
|
# Scheduler state table
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS scheduler_state (
|
|
task_id TEXT PRIMARY KEY,
|
|
last_run DATETIME,
|
|
next_run DATETIME,
|
|
run_count INTEGER DEFAULT 0,
|
|
status TEXT DEFAULT 'active', -- 'active', 'paused', 'disabled'
|
|
error_count INTEGER DEFAULT 0,
|
|
last_error TEXT,
|
|
metadata TEXT -- JSON string for additional data
|
|
)
|
|
''')
|
|
|
|
# Thread check history table to prevent duplicate checks
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS thread_check_history (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
thread_id TEXT NOT NULL,
|
|
forum_name TEXT NOT NULL,
|
|
check_time DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
last_post_date DATETIME,
|
|
new_posts_found INTEGER DEFAULT 0,
|
|
images_downloaded INTEGER DEFAULT 0,
|
|
status TEXT DEFAULT 'completed', -- 'completed', 'failed', 'skipped'
|
|
UNIQUE(thread_id, check_time)
|
|
)
|
|
''')
|
|
|
|
# Index for efficient lookups
|
|
cursor.execute('''
|
|
CREATE INDEX IF NOT EXISTS idx_thread_check_history
|
|
ON thread_check_history(thread_id, check_time DESC)
|
|
''')
|
|
|
|
# Download queue (unified)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS download_queue (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
url TEXT UNIQUE NOT NULL,
|
|
platform TEXT NOT NULL,
|
|
source TEXT,
|
|
referer TEXT,
|
|
save_path TEXT,
|
|
priority INTEGER DEFAULT 5, -- 1-10, lower is higher priority
|
|
status TEXT DEFAULT 'pending', -- 'pending', 'downloading', 'completed', 'failed'
|
|
attempts INTEGER DEFAULT 0,
|
|
max_attempts INTEGER DEFAULT 3,
|
|
created_date DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
download_date DATETIME,
|
|
error_message TEXT,
|
|
metadata TEXT
|
|
)
|
|
''')
|
|
|
|
# Notifications table (tracks sent Pushover notifications)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS notifications (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
platform TEXT NOT NULL,
|
|
source TEXT,
|
|
content_type TEXT,
|
|
message TEXT NOT NULL,
|
|
title TEXT,
|
|
priority INTEGER DEFAULT 0,
|
|
download_count INTEGER DEFAULT 1,
|
|
sent_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
status TEXT DEFAULT 'sent', -- 'sent', 'failed'
|
|
response_data TEXT, -- JSON response from Pushover API
|
|
metadata TEXT -- Additional JSON metadata
|
|
)
|
|
''')
|
|
|
|
# Recycle Bin table for soft deletes
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS recycle_bin (
|
|
id TEXT PRIMARY KEY, -- UUID used as unique filename
|
|
original_path TEXT NOT NULL, -- Full original path for restore
|
|
original_filename TEXT NOT NULL, -- Original filename (shown in UI)
|
|
recycle_path TEXT NOT NULL, -- Current path in recycle bin
|
|
file_extension TEXT, -- .jpg, .mp4, etc.
|
|
file_size INTEGER, -- Size in bytes
|
|
file_hash TEXT, -- SHA256 of file content
|
|
original_mtime REAL, -- Original modification timestamp (Unix time)
|
|
deleted_from TEXT NOT NULL, -- 'downloads', 'media', 'review'
|
|
deleted_at DATETIME DEFAULT CURRENT_TIMESTAMP, -- When deleted
|
|
deleted_by TEXT, -- User who deleted (username)
|
|
metadata TEXT, -- JSON: platform, source, content_type, face_recognition, etc.
|
|
restore_count INTEGER DEFAULT 0 -- How many times restored
|
|
)
|
|
''')
|
|
|
|
# Instagram Perceptual Hashes table (for duplicate detection)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS instagram_perceptual_hashes (
|
|
id TEXT PRIMARY KEY,
|
|
file_path TEXT NOT NULL UNIQUE,
|
|
filename TEXT,
|
|
platform TEXT,
|
|
source TEXT,
|
|
content_type TEXT,
|
|
perceptual_hash TEXT NOT NULL,
|
|
text_overlay_count INTEGER DEFAULT 0,
|
|
text_overlay_chars INTEGER DEFAULT 0,
|
|
quality_score REAL DEFAULT 0,
|
|
clean_score REAL DEFAULT 0,
|
|
resolution INTEGER DEFAULT 0,
|
|
file_size INTEGER DEFAULT 0,
|
|
width INTEGER DEFAULT 0,
|
|
height INTEGER DEFAULT 0,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# File Inventory table (database-first file tracking)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS file_inventory (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
file_path TEXT NOT NULL UNIQUE,
|
|
filename TEXT NOT NULL,
|
|
platform TEXT NOT NULL,
|
|
source TEXT,
|
|
content_type TEXT,
|
|
method TEXT, -- Download method used
|
|
video_id TEXT, -- Video identifier for video platforms
|
|
file_size INTEGER,
|
|
file_hash TEXT,
|
|
width INTEGER,
|
|
height INTEGER,
|
|
location TEXT NOT NULL DEFAULT 'final',
|
|
created_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
last_verified TIMESTAMP,
|
|
metadata TEXT
|
|
)
|
|
''')
|
|
|
|
# Universal Video Downloads table (YouTube, Vimeo, Dailymotion, Bilibili, etc.)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS video_downloads (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
platform TEXT NOT NULL DEFAULT 'youtube',
|
|
video_id TEXT NOT NULL,
|
|
url TEXT NOT NULL,
|
|
title TEXT,
|
|
uploader TEXT,
|
|
upload_date DATETIME,
|
|
duration INTEGER,
|
|
file_path TEXT,
|
|
file_size INTEGER,
|
|
download_date DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
status TEXT DEFAULT 'completed',
|
|
metadata TEXT,
|
|
UNIQUE(platform, video_id)
|
|
)
|
|
''')
|
|
|
|
# Universal Video Preview List table (for multi-device sync)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS video_preview_list (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
platform TEXT NOT NULL DEFAULT 'youtube',
|
|
video_id TEXT NOT NULL,
|
|
url TEXT NOT NULL,
|
|
title TEXT,
|
|
uploader TEXT,
|
|
upload_date DATETIME,
|
|
duration INTEGER,
|
|
description TEXT,
|
|
thumbnail TEXT,
|
|
thumbnail_data BLOB,
|
|
view_count INTEGER,
|
|
like_count INTEGER,
|
|
already_downloaded INTEGER DEFAULT 0,
|
|
added_date DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
metadata TEXT,
|
|
UNIQUE(platform, video_id)
|
|
)
|
|
''')
|
|
|
|
# Add thumbnail_data column if not exists (migration for existing tables)
|
|
_safe_alter(cursor, "ALTER TABLE video_preview_list ADD COLUMN IF NOT EXISTS thumbnail_data BLOB")
|
|
_safe_alter(cursor, "ALTER TABLE video_downloads ADD COLUMN IF NOT EXISTS thumbnail_data BLOB")
|
|
|
|
# Migrate old youtube_downloads to video_downloads if exists
|
|
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='youtube_downloads'")
|
|
if cursor.fetchone():
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO video_downloads
|
|
(platform, video_id, url, title, uploader, upload_date, duration, file_path, file_size, download_date, status, metadata)
|
|
SELECT 'youtube', video_id, url, title, uploader, upload_date, duration, file_path, file_size, download_date, status, metadata
|
|
FROM youtube_downloads
|
|
''')
|
|
|
|
# Migrate old youtube_preview_list to video_preview_list if exists
|
|
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='youtube_preview_list'")
|
|
if cursor.fetchone():
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO video_preview_list
|
|
(platform, video_id, url, title, uploader, upload_date, duration, description, thumbnail, view_count, like_count, already_downloaded, added_date, metadata)
|
|
SELECT 'youtube', video_id, url, title, uploader, upload_date, duration, description, thumbnail, view_count, like_count, already_downloaded, added_date, metadata
|
|
FROM youtube_preview_list
|
|
''')
|
|
|
|
# Video downloads indexes
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_video_platform ON video_downloads(platform)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_video_id ON video_downloads(video_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_video_platform_id ON video_downloads(platform, video_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_video_uploader ON video_downloads(uploader)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_video_upload_date ON video_downloads(upload_date)')
|
|
|
|
# Create optimized indexes
|
|
# Single column indexes
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_url_hash ON downloads(url_hash)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_platform ON downloads(platform)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_source ON downloads(source)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_status ON downloads(status)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_download_date ON downloads(download_date)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_post_date ON downloads(post_date)')
|
|
|
|
# Composite indexes for common queries
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_platform_source ON downloads(platform, source)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_platform_status ON downloads(platform, status)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_source_content ON downloads(source, content_type)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_platform_date ON downloads(platform, post_date)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_source_date ON downloads(source, download_date)')
|
|
|
|
# File hash index for deduplication
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_file_hash ON downloads(file_hash)')
|
|
|
|
# Composite index for deduplication queries (file_hash + platform)
|
|
cursor.execute('''
|
|
CREATE INDEX IF NOT EXISTS idx_file_hash_platform
|
|
ON downloads(file_hash, platform)
|
|
WHERE file_hash IS NOT NULL
|
|
''')
|
|
|
|
# Add media_id column if it doesn't exist (for fast metadata queries)
|
|
_safe_alter(cursor, 'ALTER TABLE downloads ADD COLUMN IF NOT EXISTS media_id TEXT')
|
|
|
|
# Index for media_id (fast metadata queries)
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_media_id ON downloads(media_id)')
|
|
|
|
# Forum-specific indexes
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_forum_threads_status ON forum_threads(status)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_forum_threads_monitor ON forum_threads(monitor_until)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_forum_posts_thread ON forum_posts(thread_id)')
|
|
|
|
# Queue indexes
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_queue_status ON download_queue(status)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_queue_priority ON download_queue(priority, status)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_queue_platform ON download_queue(platform, status)')
|
|
|
|
# Notifications indexes
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_notifications_sent_at ON notifications(sent_at DESC)')
|
|
|
|
# Instagram Perceptual Hashes indexes
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_perceptual_hash_platform_source ON instagram_perceptual_hashes(platform, source)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_perceptual_hash_file_path ON instagram_perceptual_hashes(file_path)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_perceptual_hash_value ON instagram_perceptual_hashes(perceptual_hash)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_notifications_platform ON notifications(platform)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_notifications_source ON notifications(source)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_notifications_status ON notifications(status)')
|
|
|
|
# Recycle Bin indexes
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_recycle_deleted_at ON recycle_bin(deleted_at DESC)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_recycle_deleted_from ON recycle_bin(deleted_from)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_recycle_deleted_by ON recycle_bin(deleted_by)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_recycle_original_path ON recycle_bin(original_path)')
|
|
|
|
# File Inventory indexes
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_fi_platform_location ON file_inventory(platform, location, created_date DESC)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_fi_source ON file_inventory(source, created_date DESC)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_fi_location ON file_inventory(location)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_fi_hash ON file_inventory(file_hash)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_fi_file_path ON file_inventory(file_path)')
|
|
|
|
# Add tracking columns for dashboard card exclusions (migration for existing databases)
|
|
_safe_alter(cursor, 'ALTER TABLE file_inventory ADD COLUMN IF NOT EXISTS moved_from_review INTEGER DEFAULT 0')
|
|
_safe_alter(cursor, 'ALTER TABLE file_inventory ADD COLUMN IF NOT EXISTS moved_from_media INTEGER DEFAULT 0')
|
|
_safe_alter(cursor, 'ALTER TABLE file_inventory ADD COLUMN IF NOT EXISTS from_discovery INTEGER DEFAULT 0')
|
|
|
|
# Indexes for downloads.filename (used in subqueries)
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_downloads_filename ON downloads(filename)')
|
|
|
|
# Face recognition scans table
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS face_recognition_scans (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
file_path TEXT NOT NULL,
|
|
has_match INTEGER DEFAULT 0,
|
|
matched_person TEXT,
|
|
confidence REAL,
|
|
face_count INTEGER DEFAULT 0,
|
|
scan_date DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
scan_type TEXT DEFAULT 'standard'
|
|
)
|
|
''')
|
|
|
|
# Indexes for face_recognition_scans table
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_face_scan_file_path ON face_recognition_scans(file_path)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_face_scan_date ON face_recognition_scans(scan_date DESC)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_face_scan_has_match ON face_recognition_scans(has_match)')
|
|
|
|
# Enable optimizations
|
|
cursor.execute('PRAGMA journal_mode=WAL')
|
|
cursor.execute('PRAGMA synchronous=NORMAL')
|
|
cursor.execute('PRAGMA cache_size=10000')
|
|
cursor.execute('PRAGMA temp_store=MEMORY')
|
|
|
|
# Create cleanup triggers
|
|
cursor.execute('''
|
|
CREATE TRIGGER IF NOT EXISTS cleanup_old_downloads
|
|
AFTER INSERT ON downloads
|
|
WHEN (SELECT COUNT(*) FROM downloads) > 100000
|
|
BEGIN
|
|
DELETE FROM downloads
|
|
WHERE download_date < datetime('now', '-180 days')
|
|
AND status = 'completed';
|
|
END
|
|
''')
|
|
|
|
cursor.execute('''
|
|
CREATE TRIGGER IF NOT EXISTS cleanup_failed_downloads
|
|
AFTER INSERT ON downloads
|
|
WHEN (SELECT COUNT(*) FROM downloads WHERE status = 'failed') > 10000
|
|
BEGIN
|
|
DELETE FROM downloads
|
|
WHERE download_date < datetime('now', '-30 days')
|
|
AND status = 'failed';
|
|
END
|
|
''')
|
|
|
|
cursor.execute('''
|
|
CREATE TRIGGER IF NOT EXISTS expire_forum_monitors
|
|
AFTER INSERT ON forum_threads
|
|
BEGIN
|
|
UPDATE forum_threads
|
|
SET status = 'expired'
|
|
WHERE monitor_until IS NOT NULL
|
|
AND monitor_until < datetime('now')
|
|
AND status = 'active';
|
|
END
|
|
''')
|
|
|
|
cursor.execute('''
|
|
CREATE TRIGGER IF NOT EXISTS cleanup_old_queue
|
|
AFTER INSERT ON download_queue
|
|
WHEN (SELECT COUNT(*) FROM download_queue WHERE status IN ('completed', 'failed')) > 50000
|
|
BEGIN
|
|
DELETE FROM download_queue
|
|
WHERE created_date < datetime('now', '-90 days')
|
|
AND status IN ('completed', 'failed');
|
|
END
|
|
''')
|
|
|
|
# ============================================================================
|
|
# SMART CONTENT ARCHIVE & DISCOVERY SYSTEM TABLES
|
|
# ============================================================================
|
|
|
|
# Tags table - hierarchical tagging system
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS tags (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
name TEXT NOT NULL,
|
|
slug TEXT NOT NULL UNIQUE,
|
|
parent_id INTEGER REFERENCES tags(id) ON DELETE SET NULL,
|
|
color TEXT DEFAULT '#6366f1',
|
|
icon TEXT DEFAULT 'tag',
|
|
description TEXT,
|
|
source TEXT DEFAULT 'auto',
|
|
created_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
updated_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# File-Tag relationships (many-to-many)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS file_tags (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
file_id INTEGER NOT NULL REFERENCES file_inventory(id) ON DELETE CASCADE,
|
|
tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
|
|
created_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
created_by TEXT,
|
|
UNIQUE(file_id, tag_id)
|
|
)
|
|
''')
|
|
|
|
# Smart Folders - saved query filters (virtual folders)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS smart_folders (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
name TEXT NOT NULL,
|
|
slug TEXT NOT NULL UNIQUE,
|
|
icon TEXT DEFAULT 'folder',
|
|
color TEXT DEFAULT '#6366f1',
|
|
description TEXT,
|
|
filters TEXT NOT NULL,
|
|
sort_by TEXT DEFAULT 'post_date',
|
|
sort_order TEXT DEFAULT 'desc',
|
|
is_system INTEGER DEFAULT 0,
|
|
display_order INTEGER DEFAULT 0,
|
|
created_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
updated_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Collections - manually curated groups (like albums)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS collections (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
name TEXT NOT NULL,
|
|
slug TEXT NOT NULL UNIQUE,
|
|
description TEXT,
|
|
cover_file_id INTEGER REFERENCES file_inventory(id) ON DELETE SET NULL,
|
|
color TEXT DEFAULT '#6366f1',
|
|
is_public INTEGER DEFAULT 0,
|
|
created_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
updated_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Collection-File relationships (many-to-many with ordering)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS collection_files (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
collection_id INTEGER NOT NULL REFERENCES collections(id) ON DELETE CASCADE,
|
|
file_id INTEGER NOT NULL REFERENCES file_inventory(id) ON DELETE CASCADE,
|
|
display_order INTEGER DEFAULT 0,
|
|
added_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
added_by TEXT,
|
|
UNIQUE(collection_id, file_id)
|
|
)
|
|
''')
|
|
|
|
# Content Embeddings - CLIP vectors for semantic search
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS content_embeddings (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
file_id INTEGER NOT NULL UNIQUE REFERENCES file_inventory(id) ON DELETE CASCADE,
|
|
embedding BLOB NOT NULL,
|
|
embedding_model TEXT DEFAULT 'clip-vit-base-patch32',
|
|
embedding_version INTEGER DEFAULT 1,
|
|
created_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# NOTE: tags and file_tags tables are created earlier in the Smart Content Archive section
|
|
# (lines 658-682) - do not duplicate here
|
|
|
|
# Discovery Scan Queue - background processing for new files
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS discovery_scan_queue (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
file_id INTEGER NOT NULL REFERENCES file_inventory(id) ON DELETE CASCADE,
|
|
file_path TEXT NOT NULL,
|
|
scan_type TEXT NOT NULL DEFAULT 'embedding',
|
|
priority INTEGER DEFAULT 5,
|
|
status TEXT DEFAULT 'pending',
|
|
attempts INTEGER DEFAULT 0,
|
|
error_message TEXT,
|
|
created_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
started_date TIMESTAMP,
|
|
completed_date TIMESTAMP,
|
|
UNIQUE(file_id, scan_type)
|
|
)
|
|
''')
|
|
|
|
# User preferences - for storing per-user settings (dashboard state, etc.)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS user_preferences (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
user_id TEXT NOT NULL,
|
|
preference_key TEXT NOT NULL,
|
|
preference_value TEXT,
|
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(user_id, preference_key)
|
|
)
|
|
''')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_user_preferences_user ON user_preferences(user_id)')
|
|
|
|
# Discovery System indexes
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_tags_parent ON tags(parent_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_tags_slug ON tags(slug)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_file_tags_file ON file_tags(file_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_file_tags_tag ON file_tags(tag_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_smart_folders_order ON smart_folders(display_order)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_collections_slug ON collections(slug)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_collection_files_collection ON collection_files(collection_id, display_order)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_collection_files_file ON collection_files(file_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_content_embeddings_file ON content_embeddings(file_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_discovery_queue_status ON discovery_scan_queue(status, priority)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_discovery_queue_file ON discovery_scan_queue(file_id)')
|
|
|
|
# Insert default system smart folders
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO smart_folders (name, slug, icon, filters, is_system, display_order)
|
|
VALUES
|
|
('Recent Downloads', 'recent-downloads', 'clock', '{"date_range": "7d"}', 1, 1),
|
|
('Images', 'images', 'image', '{"media_type": "image"}', 1, 2),
|
|
('Videos', 'videos', 'video', '{"media_type": "video"}', 1, 3),
|
|
('Instagram', 'instagram', 'instagram', '{"platform": "instagram"}', 1, 4),
|
|
('TikTok', 'tiktok', 'video', '{"platform": "tiktok"}', 1, 5),
|
|
('Large Files', 'large-files', 'hard-drive', '{"size_min": 10485760}', 1, 6)
|
|
''')
|
|
|
|
# ============================================================================
|
|
# SCRAPER PROXY CONFIGURATION SYSTEM
|
|
# See docs/SCRAPER_PROXY_SYSTEM.md for full documentation
|
|
# ============================================================================
|
|
|
|
# Scrapers table - centralized configuration for all download modules
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS scrapers (
|
|
id TEXT PRIMARY KEY,
|
|
name TEXT NOT NULL,
|
|
type TEXT NOT NULL, -- 'direct', 'proxy', 'forum', 'cli_tool'
|
|
module TEXT, -- Python module name, NULL for cli_tool
|
|
base_url TEXT, -- Primary URL for the scraper
|
|
target_platform TEXT, -- 'instagram', 'snapchat', 'tiktok', NULL for forums/cli
|
|
enabled INTEGER DEFAULT 1, -- Enable/disable scraper
|
|
|
|
-- Proxy settings
|
|
proxy_enabled INTEGER DEFAULT 0,
|
|
proxy_url TEXT, -- e.g., "socks5://user:pass@host:port"
|
|
|
|
-- Cloudflare/Cookie settings
|
|
flaresolverr_required INTEGER DEFAULT 0,
|
|
cookies_json TEXT, -- JSON blob of cookies
|
|
cookies_updated_at TEXT, -- ISO timestamp of last cookie update
|
|
|
|
-- Test status
|
|
last_test_at TEXT, -- ISO timestamp of last test
|
|
last_test_status TEXT, -- 'success', 'failed', 'timeout'
|
|
last_test_message TEXT, -- Error message if failed
|
|
|
|
-- Module-specific settings
|
|
settings_json TEXT, -- Additional JSON settings per-scraper
|
|
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Scrapers indexes
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_scrapers_type ON scrapers(type)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_scrapers_enabled ON scrapers(enabled)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_scrapers_target_platform ON scrapers(target_platform)')
|
|
|
|
# Seed default scrapers (INSERT OR IGNORE to not overwrite existing)
|
|
default_scrapers = [
|
|
('imginn', 'Imginn', 'proxy', 'imginn_module', 'https://imginn.com', 'instagram', 1),
|
|
('imginn_api', 'ImgInn API', 'proxy', 'imginn_api_module', 'https://imginn.com', 'instagram', 1),
|
|
('fastdl', 'FastDL', 'proxy', 'fastdl_module', 'https://fastdl.app', 'instagram', 1),
|
|
('toolzu', 'Toolzu', 'proxy', 'toolzu_module', 'https://toolzu.com', 'instagram', 1),
|
|
('snapchat', 'Snapchat Direct', 'direct', 'snapchat_scraper', 'https://snapchat.com', 'snapchat', 0),
|
|
('instagram', 'Instagram (Direct)', 'direct', 'instaloader_module', 'https://instagram.com', 'instagram', 0),
|
|
('tiktok', 'TikTok', 'direct', 'tiktok_module', 'https://tiktok.com', 'tiktok', 0),
|
|
('coppermine', 'Coppermine', 'direct', 'coppermine_module', 'https://hqdiesel.net', None, 1),
|
|
('forum_phun', 'Phun.org', 'forum', 'forum_downloader', 'https://forum.phun.org', None, 1),
|
|
('forum_hqcelebcorner', 'HQCelebCorner', 'forum', 'forum_downloader', 'https://hqcelebcorner.com', None, 0),
|
|
('forum_picturepub', 'PicturePub', 'forum', 'forum_downloader', 'https://picturepub.net', None, 0),
|
|
('ytdlp', 'yt-dlp', 'cli_tool', None, None, None, 0),
|
|
('gallerydl', 'gallery-dl', 'cli_tool', None, None, None, 0),
|
|
]
|
|
for scraper in default_scrapers:
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO scrapers
|
|
(id, name, type, module, base_url, target_platform, flaresolverr_required)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
''', scraper)
|
|
|
|
# Migrate cookies from files to database (one-time migration)
|
|
self._migrate_scraper_cookies(cursor)
|
|
|
|
# Error log table for tracking errors from log files
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS error_log (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
error_hash TEXT UNIQUE NOT NULL, -- Hash of module+message for deduplication
|
|
module TEXT NOT NULL, -- Module name (e.g., 'FaceRecognition', 'Forum')
|
|
level TEXT DEFAULT 'ERROR', -- Log level (ERROR, CRITICAL)
|
|
message TEXT NOT NULL, -- Error message
|
|
first_seen DATETIME NOT NULL, -- First occurrence
|
|
last_seen DATETIME NOT NULL, -- Most recent occurrence
|
|
occurrence_count INTEGER DEFAULT 1, -- How many times this error occurred
|
|
log_file TEXT, -- Which log file/component it came from
|
|
line_context TEXT, -- JSON: lines before/after for context
|
|
dismissed_at DATETIME, -- NULL if not dismissed
|
|
viewed_at DATETIME, -- NULL if not viewed in dashboard
|
|
push_alert_sent_at DATETIME, -- When we last sent a push about this
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Error log indexes
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_error_log_hash ON error_log(error_hash)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_error_log_dismissed ON error_log(dismissed_at)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_error_log_last_seen ON error_log(last_seen)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_error_log_viewed ON error_log(viewed_at)')
|
|
|
|
# Error tracking settings (last dashboard visit, etc.)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS error_tracking (
|
|
id INTEGER PRIMARY KEY,
|
|
user_id TEXT UNIQUE NOT NULL DEFAULT 'default',
|
|
last_dashboard_visit DATETIME,
|
|
last_errors_viewed DATETIME,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Add UNIQUE constraint to user_id if missing (migration for existing tables)
|
|
try:
|
|
# Try PostgreSQL information_schema approach first
|
|
cursor.execute("""
|
|
SELECT COUNT(*) FROM information_schema.table_constraints
|
|
WHERE table_name = 'error_tracking' AND constraint_type = 'UNIQUE'
|
|
""")
|
|
has_unique = cursor.fetchone()[0] > 0
|
|
if not has_unique:
|
|
cursor.execute('ALTER TABLE error_tracking ADD CONSTRAINT error_tracking_user_id_key UNIQUE (user_id)')
|
|
logger.info("Migrated error_tracking table to add UNIQUE constraint on user_id")
|
|
except Exception:
|
|
# Fallback: try CREATE UNIQUE INDEX (works on both SQLite and PostgreSQL)
|
|
try:
|
|
cursor.execute('CREATE UNIQUE INDEX IF NOT EXISTS idx_error_tracking_user_id ON error_tracking(user_id)')
|
|
except Exception:
|
|
pass # Constraint/index may already exist
|
|
|
|
# ============================================================================
|
|
# CELEBRITY DISCOVERY SYSTEM TABLES
|
|
# For searching and discovering content from talk shows, interviews, etc.
|
|
# ============================================================================
|
|
|
|
# Celebrity Profiles - people to search for
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS celebrity_profiles (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
name TEXT NOT NULL,
|
|
slug TEXT NOT NULL UNIQUE,
|
|
image_url TEXT,
|
|
notes TEXT,
|
|
enabled INTEGER DEFAULT 1,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Search Presets - saved searches for each celebrity
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS celebrity_search_presets (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
celebrity_id INTEGER NOT NULL REFERENCES celebrity_profiles(id) ON DELETE CASCADE,
|
|
name TEXT NOT NULL,
|
|
source_type TEXT NOT NULL, -- 'youtube_channel', 'youtube_search', 'youtube_rss'
|
|
source_value TEXT NOT NULL, -- channel_id, search query, or RSS URL
|
|
keywords TEXT, -- JSON array of filter keywords
|
|
content_type TEXT, -- 'interview', 'red_carpet', 'photoshoot', 'bts', 'premiere', 'all'
|
|
enabled INTEGER DEFAULT 1,
|
|
last_checked DATETIME,
|
|
check_frequency_hours INTEGER DEFAULT 24,
|
|
results_count INTEGER DEFAULT 0,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Discovered Videos - videos found from search presets
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS celebrity_discovered_videos (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
preset_id INTEGER NOT NULL REFERENCES celebrity_search_presets(id) ON DELETE CASCADE,
|
|
celebrity_id INTEGER NOT NULL REFERENCES celebrity_profiles(id) ON DELETE CASCADE,
|
|
video_id TEXT NOT NULL,
|
|
platform TEXT NOT NULL DEFAULT 'youtube',
|
|
url TEXT NOT NULL,
|
|
title TEXT,
|
|
channel_name TEXT,
|
|
channel_id TEXT,
|
|
thumbnail TEXT,
|
|
duration INTEGER,
|
|
upload_date DATETIME,
|
|
view_count INTEGER,
|
|
description TEXT,
|
|
content_type TEXT, -- detected or manual: 'interview', 'red_carpet', etc.
|
|
status TEXT DEFAULT 'new', -- 'new', 'queued', 'downloaded', 'ignored', 'watched'
|
|
discovered_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
status_updated_at DATETIME,
|
|
downloaded_path TEXT,
|
|
metadata TEXT, -- JSON for extra data
|
|
UNIQUE(celebrity_id, platform, video_id)
|
|
)
|
|
''')
|
|
|
|
# Celebrity Discovery indexes
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_celeb_profile_slug ON celebrity_profiles(slug)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_celeb_profile_enabled ON celebrity_profiles(enabled)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_celeb_preset_celebrity ON celebrity_search_presets(celebrity_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_celeb_preset_enabled ON celebrity_search_presets(enabled)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_celeb_preset_type ON celebrity_search_presets(source_type)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_celeb_discovered_preset ON celebrity_discovered_videos(preset_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_celeb_discovered_celebrity ON celebrity_discovered_videos(celebrity_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_celeb_discovered_status ON celebrity_discovered_videos(status)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_celeb_discovered_date ON celebrity_discovered_videos(discovered_at DESC)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_celeb_discovered_upload ON celebrity_discovered_videos(upload_date DESC)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_celeb_discovered_video_id ON celebrity_discovered_videos(platform, video_id)')
|
|
|
|
# ============================================================================
|
|
# CELEBRITY APPEARANCES TRACKING
|
|
# ============================================================================
|
|
|
|
# Celebrity Appearances - track upcoming media appearances (TV, podcasts, radio)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS celebrity_appearances (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
celebrity_id INTEGER NOT NULL,
|
|
celebrity_name TEXT NOT NULL,
|
|
appearance_type TEXT NOT NULL,
|
|
show_name TEXT NOT NULL,
|
|
episode_title TEXT,
|
|
network TEXT,
|
|
appearance_date DATETIME NOT NULL,
|
|
announcement_date DATETIME,
|
|
url TEXT,
|
|
watch_url TEXT,
|
|
description TEXT,
|
|
tmdb_show_id INTEGER,
|
|
tmdb_episode_id INTEGER,
|
|
season_number INTEGER,
|
|
episode_number INTEGER,
|
|
status TEXT DEFAULT 'upcoming',
|
|
notified BOOLEAN DEFAULT FALSE,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (celebrity_id) REFERENCES celebrity_profiles(id) ON DELETE CASCADE,
|
|
UNIQUE(celebrity_id, appearance_type, tmdb_show_id, season_number, episode_number)
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_appearances_date ON celebrity_appearances(appearance_date)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_appearances_celebrity ON celebrity_appearances(celebrity_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_appearances_status ON celebrity_appearances(status)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_appearances_type ON celebrity_appearances(appearance_type)')
|
|
|
|
# Partial unique index for movies (to prevent duplicates since season/episode are NULL)
|
|
cursor.execute('''
|
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_appearances_movie_unique
|
|
ON celebrity_appearances(celebrity_id, appearance_type, tmdb_show_id)
|
|
WHERE appearance_type = 'Movie'
|
|
''')
|
|
|
|
# Partial unique index for podcasts (to prevent duplicates)
|
|
cursor.execute('''
|
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_appearances_podcast_unique
|
|
ON celebrity_appearances(celebrity_id, appearance_type, tmdb_show_id)
|
|
WHERE appearance_type = 'Podcast'
|
|
''')
|
|
|
|
# Appearance notifications log - track all sent notifications
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS appearance_notifications (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
appearance_id INTEGER,
|
|
celebrity_name TEXT NOT NULL,
|
|
show_name TEXT NOT NULL,
|
|
appearance_type TEXT NOT NULL,
|
|
appearance_date DATE NOT NULL,
|
|
notification_type TEXT NOT NULL,
|
|
message TEXT,
|
|
poster_url TEXT,
|
|
sent_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (appearance_id) REFERENCES celebrity_appearances(id) ON DELETE SET NULL
|
|
)
|
|
''')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_appearance_notifications_date ON appearance_notifications(sent_at)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_appearance_notifications_type ON appearance_notifications(notification_type)')
|
|
|
|
# Appearance configuration - singleton table for TMDb and other API settings
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS appearance_config (
|
|
id INTEGER PRIMARY KEY CHECK (id = 1),
|
|
tmdb_api_key TEXT,
|
|
tmdb_enabled BOOLEAN DEFAULT TRUE,
|
|
tmdb_check_interval_hours INTEGER DEFAULT 12,
|
|
tmdb_last_check DATETIME,
|
|
podcast_enabled BOOLEAN DEFAULT FALSE,
|
|
podcast_sources TEXT,
|
|
radio_enabled BOOLEAN DEFAULT FALSE,
|
|
radio_sources TEXT,
|
|
notify_new_appearances BOOLEAN DEFAULT TRUE,
|
|
notify_days_before INTEGER DEFAULT 1,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
cursor.execute('INSERT OR IGNORE INTO appearance_config (id) VALUES (1)')
|
|
|
|
# Extend celebrity_profiles table with TMDb fields
|
|
cursor.execute('PRAGMA table_info(celebrity_profiles)')
|
|
columns = [row[1] for row in cursor.fetchall()]
|
|
if 'tmdb_person_id' not in columns:
|
|
if _safe_alter(cursor, 'ALTER TABLE celebrity_profiles ADD COLUMN IF NOT EXISTS tmdb_person_id INTEGER'):
|
|
try:
|
|
cursor.execute('CREATE UNIQUE INDEX IF NOT EXISTS idx_celebrity_tmdb_person ON celebrity_profiles(tmdb_person_id) WHERE tmdb_person_id IS NOT NULL')
|
|
except Exception:
|
|
pass
|
|
if 'tmdb_last_sync' not in columns:
|
|
_safe_alter(cursor, 'ALTER TABLE celebrity_profiles ADD COLUMN IF NOT EXISTS tmdb_last_sync DATETIME')
|
|
if 'podchaser_creator_id' not in columns:
|
|
if _safe_alter(cursor, 'ALTER TABLE celebrity_profiles ADD COLUMN IF NOT EXISTS podchaser_creator_id TEXT'):
|
|
try:
|
|
cursor.execute('CREATE UNIQUE INDEX IF NOT EXISTS idx_celebrity_podchaser_creator ON celebrity_profiles(podchaser_creator_id) WHERE podchaser_creator_id IS NOT NULL')
|
|
except Exception:
|
|
pass
|
|
if 'podchaser_last_sync' not in columns:
|
|
_safe_alter(cursor, 'ALTER TABLE celebrity_profiles ADD COLUMN IF NOT EXISTS podchaser_last_sync DATETIME')
|
|
|
|
# Extend appearance_config table with Podchaser fields
|
|
cursor.execute('PRAGMA table_info(appearance_config)')
|
|
config_columns = [row[1] for row in cursor.fetchall()]
|
|
if 'podchaser_api_key' not in config_columns:
|
|
_safe_alter(cursor, 'ALTER TABLE appearance_config ADD COLUMN IF NOT EXISTS podchaser_api_key TEXT')
|
|
if 'podchaser_enabled' not in config_columns:
|
|
_safe_alter(cursor, 'ALTER TABLE appearance_config ADD COLUMN IF NOT EXISTS podchaser_enabled BOOLEAN DEFAULT FALSE')
|
|
if 'podchaser_last_check' not in config_columns:
|
|
_safe_alter(cursor, 'ALTER TABLE appearance_config ADD COLUMN IF NOT EXISTS podchaser_last_check DATETIME')
|
|
|
|
# Extend celebrity_appearances table with filmography/credit fields
|
|
cursor.execute('PRAGMA table_info(celebrity_appearances)')
|
|
appearance_columns = [row[1] for row in cursor.fetchall()]
|
|
if 'credit_type' not in appearance_columns:
|
|
_safe_alter(cursor, "ALTER TABLE celebrity_appearances ADD COLUMN IF NOT EXISTS credit_type TEXT DEFAULT 'acting'")
|
|
if 'character_name' not in appearance_columns:
|
|
_safe_alter(cursor, 'ALTER TABLE celebrity_appearances ADD COLUMN IF NOT EXISTS character_name TEXT')
|
|
if 'job_title' not in appearance_columns:
|
|
_safe_alter(cursor, 'ALTER TABLE celebrity_appearances ADD COLUMN IF NOT EXISTS job_title TEXT')
|
|
if 'plex_rating_key' not in appearance_columns:
|
|
_safe_alter(cursor, 'ALTER TABLE celebrity_appearances ADD COLUMN IF NOT EXISTS plex_rating_key TEXT')
|
|
if 'plex_library_id' not in appearance_columns:
|
|
_safe_alter(cursor, 'ALTER TABLE celebrity_appearances ADD COLUMN IF NOT EXISTS plex_library_id INTEGER')
|
|
|
|
# Create index for credit_type filtering
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_appearances_credit_type ON celebrity_appearances(credit_type)')
|
|
|
|
# Update unique constraint to include credit_type (allows multiple credits per appearance)
|
|
# Check if we need to migrate the table (if credit_type isn't in the unique constraint)
|
|
# Check if credit_type is already in the unique constraint
|
|
cursor.execute("PRAGMA table_info(celebrity_appearances)")
|
|
col_names = [row[1] for row in cursor.fetchall()]
|
|
needs_migration = 'credit_type' not in col_names
|
|
if needs_migration:
|
|
# Need to recreate table with updated unique constraint
|
|
try:
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS celebrity_appearances_new (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
celebrity_id INTEGER NOT NULL,
|
|
celebrity_name TEXT NOT NULL,
|
|
appearance_type TEXT NOT NULL,
|
|
show_name TEXT NOT NULL,
|
|
episode_title TEXT,
|
|
network TEXT,
|
|
appearance_date DATETIME NOT NULL,
|
|
announcement_date DATETIME,
|
|
url TEXT,
|
|
watch_url TEXT,
|
|
description TEXT,
|
|
tmdb_show_id INTEGER,
|
|
tmdb_episode_id INTEGER,
|
|
season_number INTEGER,
|
|
episode_number INTEGER,
|
|
status TEXT DEFAULT 'upcoming',
|
|
notified BOOLEAN DEFAULT FALSE,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
credit_type TEXT DEFAULT 'acting',
|
|
character_name TEXT,
|
|
job_title TEXT,
|
|
plex_rating_key TEXT,
|
|
plex_library_id INTEGER,
|
|
FOREIGN KEY (celebrity_id) REFERENCES celebrity_profiles(id) ON DELETE CASCADE,
|
|
UNIQUE(celebrity_id, appearance_type, tmdb_show_id, season_number, episode_number, credit_type)
|
|
)
|
|
''')
|
|
# Copy existing data
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO celebrity_appearances_new
|
|
SELECT id, celebrity_id, celebrity_name, appearance_type, show_name, episode_title,
|
|
network, appearance_date, announcement_date, url, watch_url, description,
|
|
tmdb_show_id, tmdb_episode_id, season_number, episode_number, status,
|
|
CASE WHEN notified = 1 THEN TRUE WHEN notified = 0 THEN FALSE ELSE notified END,
|
|
created_at, updated_at,
|
|
COALESCE(credit_type, 'acting'), character_name, job_title,
|
|
plex_rating_key, plex_library_id
|
|
FROM celebrity_appearances
|
|
''')
|
|
cursor.execute('DROP TABLE celebrity_appearances')
|
|
cursor.execute('ALTER TABLE celebrity_appearances_new RENAME TO celebrity_appearances')
|
|
# Recreate indexes
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_appearances_date ON celebrity_appearances(appearance_date)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_appearances_celebrity ON celebrity_appearances(celebrity_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_appearances_status ON celebrity_appearances(status)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_appearances_type ON celebrity_appearances(appearance_type)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_appearances_credit_type ON celebrity_appearances(credit_type)')
|
|
cursor.execute('''
|
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_appearances_movie_unique
|
|
ON celebrity_appearances(celebrity_id, appearance_type, tmdb_show_id, credit_type)
|
|
WHERE appearance_type = 'Movie'
|
|
''')
|
|
cursor.execute('''
|
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_appearances_podcast_unique
|
|
ON celebrity_appearances(celebrity_id, appearance_type, tmdb_show_id, credit_type)
|
|
WHERE appearance_type = 'Podcast'
|
|
''')
|
|
logger.info("Migrated celebrity_appearances table to include credit_type in unique constraint")
|
|
except Exception as e:
|
|
logger.warning(f"Could not migrate celebrity_appearances table: {e}")
|
|
|
|
# Extend appearance_config table with Plex settings
|
|
cursor.execute('PRAGMA table_info(appearance_config)')
|
|
config_columns = [row[1] for row in cursor.fetchall()]
|
|
if 'plex_url' not in config_columns:
|
|
_safe_alter(cursor, 'ALTER TABLE appearance_config ADD COLUMN IF NOT EXISTS plex_url TEXT')
|
|
if 'plex_token' not in config_columns:
|
|
_safe_alter(cursor, 'ALTER TABLE appearance_config ADD COLUMN IF NOT EXISTS plex_token TEXT')
|
|
|
|
# ============================================================================
|
|
# UNIFIED VIDEO DOWNLOAD QUEUE
|
|
# ============================================================================
|
|
|
|
# Video download queue - unified queue for all video sources
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS video_download_queue (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
platform TEXT NOT NULL DEFAULT 'youtube',
|
|
video_id TEXT NOT NULL,
|
|
url TEXT NOT NULL,
|
|
title TEXT NOT NULL,
|
|
custom_title TEXT, -- User-editable title for filename
|
|
channel_name TEXT,
|
|
thumbnail TEXT,
|
|
duration INTEGER,
|
|
upload_date DATETIME,
|
|
custom_date DATETIME, -- User-editable date
|
|
view_count INTEGER,
|
|
description TEXT,
|
|
source_type TEXT, -- 'celebrity', 'manual', 'search', etc.
|
|
source_id INTEGER, -- Reference to source (celebrity_id, etc.)
|
|
source_name TEXT, -- Display name of source
|
|
priority INTEGER DEFAULT 5, -- 1-10, lower is higher priority
|
|
status TEXT DEFAULT 'pending', -- 'pending', 'downloading', 'completed', 'failed', 'paused'
|
|
progress INTEGER DEFAULT 0, -- Download progress percentage
|
|
file_path TEXT,
|
|
file_size INTEGER,
|
|
error_message TEXT,
|
|
attempts INTEGER DEFAULT 0,
|
|
max_attempts INTEGER DEFAULT 3,
|
|
added_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
started_at DATETIME,
|
|
completed_at DATETIME,
|
|
metadata TEXT, -- JSON for extra data
|
|
UNIQUE(platform, video_id)
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_vdq_status ON video_download_queue(status)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_vdq_priority ON video_download_queue(priority, status)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_vdq_source ON video_download_queue(source_type, source_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_vdq_added ON video_download_queue(added_at DESC)')
|
|
|
|
# ============================================================================
|
|
# YOUTUBE CHANNEL MONITOR
|
|
# ============================================================================
|
|
|
|
# YouTube channel monitor global settings
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS youtube_monitor_settings (
|
|
id INTEGER PRIMARY KEY CHECK (id = 1),
|
|
phrases TEXT NOT NULL DEFAULT '[]',
|
|
check_interval_hours INTEGER DEFAULT 6,
|
|
quality TEXT DEFAULT 'best',
|
|
enabled INTEGER DEFAULT 1,
|
|
last_checked TEXT,
|
|
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Insert default settings row if not exists
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO youtube_monitor_settings (id, phrases, check_interval_hours, quality, enabled)
|
|
VALUES (1, '[]', 6, 'best', 1)
|
|
''')
|
|
|
|
# YouTube channel monitors - just the channels to monitor
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS youtube_channel_monitors (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
channel_url TEXT NOT NULL UNIQUE,
|
|
channel_name TEXT,
|
|
enabled INTEGER DEFAULT 1,
|
|
last_checked TEXT,
|
|
videos_found INTEGER DEFAULT 0,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_ycm_enabled ON youtube_channel_monitors(enabled)')
|
|
|
|
# YouTube monitor history - track which videos we've already seen/processed
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS youtube_monitor_history (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
monitor_id INTEGER,
|
|
video_id TEXT NOT NULL,
|
|
video_title TEXT,
|
|
matched_phrase TEXT,
|
|
action TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (monitor_id) REFERENCES youtube_channel_monitors(id)
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_ymh_monitor ON youtube_monitor_history(monitor_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_ymh_video ON youtube_monitor_history(video_id)')
|
|
cursor.execute('CREATE UNIQUE INDEX IF NOT EXISTS idx_ymh_unique ON youtube_monitor_history(monitor_id, video_id)')
|
|
|
|
# ============================================================================
|
|
# EASYNEWS INTEGRATION TABLES
|
|
# ============================================================================
|
|
|
|
# Easynews configuration (singleton, id=1)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS easynews_config (
|
|
id INTEGER PRIMARY KEY CHECK (id = 1),
|
|
username TEXT,
|
|
password TEXT,
|
|
enabled INTEGER DEFAULT 0,
|
|
check_interval_hours INTEGER DEFAULT 12,
|
|
last_check TEXT,
|
|
auto_download INTEGER DEFAULT 0,
|
|
min_quality TEXT DEFAULT '720p',
|
|
proxy_enabled INTEGER DEFAULT 0,
|
|
proxy_type TEXT DEFAULT 'http',
|
|
proxy_host TEXT,
|
|
proxy_port INTEGER,
|
|
proxy_username TEXT,
|
|
proxy_password TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Insert default config row if not exists
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO easynews_config (id, enabled, check_interval_hours, auto_download, min_quality)
|
|
VALUES (1, 0, 12, 0, '720p')
|
|
''')
|
|
|
|
# Easynews search terms to monitor
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS easynews_searches (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
search_term TEXT NOT NULL,
|
|
media_type TEXT DEFAULT 'any',
|
|
tmdb_id INTEGER,
|
|
tmdb_title TEXT,
|
|
poster_url TEXT,
|
|
enabled INTEGER DEFAULT 1,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_ens_enabled ON easynews_searches(enabled)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_ens_term ON easynews_searches(search_term)')
|
|
|
|
# Easynews discovered results
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS easynews_results (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
search_id INTEGER,
|
|
filename TEXT NOT NULL,
|
|
download_url TEXT NOT NULL,
|
|
size_bytes INTEGER,
|
|
post_date TEXT,
|
|
discovered_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
parsed_title TEXT,
|
|
parsed_season INTEGER,
|
|
parsed_episode INTEGER,
|
|
parsed_year INTEGER,
|
|
tmdb_id INTEGER,
|
|
tmdb_title TEXT,
|
|
poster_url TEXT,
|
|
quality TEXT,
|
|
status TEXT DEFAULT 'new',
|
|
download_path TEXT,
|
|
file_hash TEXT,
|
|
FOREIGN KEY (search_id) REFERENCES easynews_searches(id)
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_enr_search ON easynews_results(search_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_enr_status ON easynews_results(status)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_enr_filename ON easynews_results(filename)')
|
|
cursor.execute('CREATE UNIQUE INDEX IF NOT EXISTS idx_enr_unique ON easynews_results(filename, download_url)')
|
|
|
|
# ============================================================================
|
|
# YOUTUBE CHANNEL MONITOR MIGRATIONS (v11.20.0)
|
|
# ============================================================================
|
|
|
|
# Add new columns to youtube_channel_monitors for status management
|
|
# Step 1: Add all columns first
|
|
_safe_alter(cursor, "ALTER TABLE youtube_channel_monitors ADD COLUMN IF NOT EXISTS status TEXT DEFAULT 'active'")
|
|
_safe_alter(cursor, "ALTER TABLE youtube_channel_monitors ADD COLUMN IF NOT EXISTS always_active INTEGER DEFAULT 0")
|
|
_safe_alter(cursor, "ALTER TABLE youtube_channel_monitors ADD COLUMN IF NOT EXISTS last_video_date TEXT")
|
|
_safe_alter(cursor, "ALTER TABLE youtube_channel_monitors ADD COLUMN IF NOT EXISTS last_check_date TEXT")
|
|
_safe_alter(cursor, "ALTER TABLE youtube_channel_monitors ADD COLUMN IF NOT EXISTS paused_date TEXT")
|
|
_safe_alter(cursor, "ALTER TABLE youtube_channel_monitors ADD COLUMN IF NOT EXISTS paused_reason TEXT")
|
|
_safe_alter(cursor, "ALTER TABLE youtube_channel_monitors ADD COLUMN IF NOT EXISTS total_videos_found INTEGER DEFAULT 0")
|
|
|
|
# Step 2: Migrate existing enabled=0 channels to paused_manual status (after columns exist)
|
|
cursor.execute("""
|
|
UPDATE youtube_channel_monitors
|
|
SET status = 'paused_manual',
|
|
paused_date = datetime('now'),
|
|
paused_reason = 'Manually disabled before v11.20.0'
|
|
WHERE enabled = 0 AND (status IS NULL OR status = 'active')
|
|
""")
|
|
migrated_count = cursor.rowcount
|
|
if migrated_count > 0:
|
|
self.log(f"Migrated {migrated_count} disabled channels to paused_manual status", "info")
|
|
|
|
# Create index for status queries
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_ycm_status ON youtube_channel_monitors(status)')
|
|
|
|
# Add new settings columns to youtube_monitor_settings
|
|
_safe_alter(cursor, "ALTER TABLE youtube_monitor_settings ADD COLUMN IF NOT EXISTS auto_pause_threshold_months INTEGER DEFAULT 24")
|
|
_safe_alter(cursor, "ALTER TABLE youtube_monitor_settings ADD COLUMN IF NOT EXISTS paused_check_interval_days INTEGER DEFAULT 14")
|
|
_safe_alter(cursor, "ALTER TABLE youtube_monitor_settings ADD COLUMN IF NOT EXISTS auto_start_queue INTEGER DEFAULT 0")
|
|
_safe_alter(cursor, "ALTER TABLE youtube_monitor_settings ADD COLUMN IF NOT EXISTS notifications_enabled INTEGER DEFAULT 1")
|
|
_safe_alter(cursor, "ALTER TABLE youtube_monitor_settings ADD COLUMN IF NOT EXISTS max_results_per_phrase INTEGER DEFAULT 100")
|
|
|
|
# ============================================================================
|
|
# PAID CONTENT FEATURE TABLES
|
|
# For tracking content from subscription-based creator platforms
|
|
# (OnlyFans, Fansly, Patreon, Fanbox, etc.) via Coomer.su and Kemono.su APIs
|
|
# ============================================================================
|
|
|
|
# Table 1: paid_content_services - API Configuration for Coomer/Kemono
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_services (
|
|
id TEXT PRIMARY KEY, -- 'coomer', 'kemono'
|
|
name TEXT NOT NULL,
|
|
base_url TEXT NOT NULL, -- e.g., https://coomer.party
|
|
enabled INTEGER DEFAULT 1,
|
|
session_cookie TEXT,
|
|
session_updated_at TEXT,
|
|
last_health_check TEXT,
|
|
health_status TEXT DEFAULT 'unknown', -- 'healthy', 'degraded', 'down'
|
|
supported_services TEXT, -- JSON: ['onlyfans', 'fansly']
|
|
rate_limit_requests INTEGER DEFAULT 2,
|
|
rate_limit_window_seconds INTEGER DEFAULT 1,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Seed paid_content_services data
|
|
# Note: These services change domains frequently. The api_client has fallback defaults.
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_services (id, name, base_url, supported_services) VALUES
|
|
('coomer', 'Coomer', 'https://coomer.party', '["onlyfans", "fansly", "candfans"]')
|
|
''')
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_services (id, name, base_url, supported_services) VALUES
|
|
('kemono', 'Kemono', 'https://kemono.party', '["patreon", "fanbox", "gumroad", "subscribestar", "discord"]')
|
|
''')
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_services (id, name, base_url, supported_services) VALUES
|
|
('youtube', 'YouTube', 'https://www.youtube.com', '["youtube"]')
|
|
''')
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_services (id, name, base_url, supported_services) VALUES
|
|
('fansly_direct', 'Fansly Direct', 'https://apiv3.fansly.com/api/v1', '["fansly"]')
|
|
''')
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_services (id, name, base_url, supported_services) VALUES
|
|
('pornhub', 'Pornhub', 'https://www.pornhub.com', '["pornhub"]')
|
|
''')
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_services (id, name, base_url, supported_services) VALUES
|
|
('onlyfans_direct', 'OnlyFans Direct', 'https://onlyfans.com/api2/v2', '["onlyfans"]')
|
|
''')
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_services (id, name, base_url, supported_services) VALUES
|
|
('xhamster', 'xHamster', 'https://xhamster.com', '["xhamster"]')
|
|
''')
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_services (id, name, base_url, supported_services) VALUES
|
|
('tiktok', 'TikTok', 'https://www.tiktok.com', '["tiktok"]')
|
|
''')
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_services (id, name, base_url, supported_services) VALUES
|
|
('instagram', 'Instagram', 'https://www.instagram.com', '["instagram"]')
|
|
''')
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_services (id, name, base_url, supported_services) VALUES
|
|
('soundgasm', 'Soundgasm', 'https://soundgasm.net', '["soundgasm"]')
|
|
''')
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_services (id, name, base_url, supported_services) VALUES
|
|
('snapchat', 'Snapchat', 'https://www.snapchat.com', '["snapchat"]')
|
|
''')
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_services (id, name, base_url, supported_services) VALUES
|
|
('hqcelebcorner', 'HQCelebCorner', 'https://www.hqcelebcorner.net', '["hqcelebcorner"]')
|
|
''')
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_services (id, name, base_url, supported_services) VALUES
|
|
('picturepub', 'PicturePub', 'https://picturepub.net', '["picturepub"]')
|
|
''')
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_services (id, name, base_url, supported_services) VALUES
|
|
('reddit', 'Reddit', 'https://www.reddit.com', '["reddit"]')
|
|
''')
|
|
|
|
# Migrate existing old URLs to new domains
|
|
cursor.execute('''
|
|
UPDATE paid_content_services SET base_url = 'https://coomer.party'
|
|
WHERE id = 'coomer' AND (base_url LIKE '%coomer.su%' OR base_url LIKE '%coomer.st%')
|
|
''')
|
|
cursor.execute('''
|
|
UPDATE paid_content_services SET base_url = 'https://kemono.party'
|
|
WHERE id = 'kemono' AND (base_url LIKE '%kemono.su%' OR base_url LIKE '%kemono.cr%')
|
|
''')
|
|
|
|
# Table 2: paid_content_identities - Creator Linking (same person across platforms)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_identities (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
name TEXT NOT NULL, -- User-defined unified name
|
|
slug TEXT NOT NULL UNIQUE,
|
|
profile_image_url TEXT,
|
|
notes TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Creator Groups (named collections of creators for filtering)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_creator_groups (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
name TEXT NOT NULL,
|
|
description TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_creator_group_members (
|
|
group_id INTEGER NOT NULL REFERENCES paid_content_creator_groups(id) ON DELETE CASCADE,
|
|
creator_id INTEGER NOT NULL REFERENCES paid_content_creators(id) ON DELETE CASCADE,
|
|
added_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
filter_tagged_users TEXT DEFAULT NULL,
|
|
filter_tag_ids TEXT DEFAULT NULL,
|
|
PRIMARY KEY (group_id, creator_id)
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_group_members_creator ON paid_content_creator_group_members(creator_id)')
|
|
|
|
# Table 3: paid_content_creators - Tracked Creators
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_creators (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
service_id TEXT NOT NULL, -- 'coomer' or 'kemono'
|
|
platform TEXT NOT NULL, -- 'onlyfans', 'patreon', etc.
|
|
creator_id TEXT NOT NULL, -- Platform-specific ID
|
|
username TEXT NOT NULL,
|
|
display_name TEXT,
|
|
profile_image_url TEXT,
|
|
banner_image_url TEXT,
|
|
bio TEXT, -- Creator bio/description
|
|
joined_date TEXT, -- When creator joined the platform
|
|
location TEXT, -- Creator's location
|
|
external_links TEXT, -- JSON array of social links
|
|
identity_id INTEGER REFERENCES paid_content_identities(id) ON DELETE SET NULL,
|
|
enabled INTEGER DEFAULT 1,
|
|
last_checked TEXT,
|
|
last_post_date TEXT,
|
|
post_count INTEGER DEFAULT 0,
|
|
downloaded_count INTEGER DEFAULT 0,
|
|
total_size_bytes INTEGER DEFAULT 0,
|
|
auto_download INTEGER DEFAULT 1,
|
|
download_embeds INTEGER DEFAULT 1,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(service_id, platform, creator_id),
|
|
FOREIGN KEY (service_id) REFERENCES paid_content_services(id)
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_creators_identity ON paid_content_creators(identity_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_creators_service_platform ON paid_content_creators(service_id, platform)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_creators_enabled ON paid_content_creators(enabled)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_creators_last_checked ON paid_content_creators(last_checked)')
|
|
|
|
# Add bio column if it doesn't exist (migration)
|
|
_safe_alter(cursor, 'ALTER TABLE paid_content_creators ADD COLUMN IF NOT EXISTS bio TEXT')
|
|
|
|
# Add joined_date, location, external_links columns (migration)
|
|
for col in ['joined_date TEXT', 'location TEXT', 'external_links TEXT']:
|
|
_safe_alter(cursor, f'ALTER TABLE paid_content_creators ADD COLUMN IF NOT EXISTS {col}')
|
|
|
|
# Add last_coomer_check column (migration)
|
|
_safe_alter(cursor, 'ALTER TABLE paid_content_creators ADD COLUMN IF NOT EXISTS last_coomer_check TEXT')
|
|
|
|
# Add sync settings and tagged user filter columns (migration)
|
|
for col in ['sync_posts INTEGER DEFAULT 1', 'sync_stories INTEGER DEFAULT 1',
|
|
'sync_highlights INTEGER DEFAULT 1', 'filter_tagged_users TEXT DEFAULT NULL',
|
|
'use_authenticated_api INTEGER DEFAULT 0']:
|
|
_safe_alter(cursor, f'ALTER TABLE paid_content_creators ADD COLUMN IF NOT EXISTS {col}')
|
|
|
|
# Add filter columns to group members (migration)
|
|
for col in ['filter_tagged_users TEXT DEFAULT NULL', 'filter_tag_ids TEXT DEFAULT NULL']:
|
|
_safe_alter(cursor, f'ALTER TABLE paid_content_creator_group_members ADD COLUMN IF NOT EXISTS {col}')
|
|
|
|
# Table 4: paid_content_posts - Individual Posts
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_posts (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
creator_id INTEGER NOT NULL,
|
|
post_id TEXT NOT NULL, -- ID from Coomer/Kemono API
|
|
title TEXT,
|
|
content TEXT, -- Post text
|
|
published_at TEXT,
|
|
added_at TEXT, -- When added to archive
|
|
edited_at TEXT,
|
|
has_attachments INTEGER DEFAULT 0,
|
|
attachment_count INTEGER DEFAULT 0,
|
|
downloaded INTEGER DEFAULT 0,
|
|
download_date TEXT,
|
|
embed_count INTEGER DEFAULT 0,
|
|
embed_downloaded INTEGER DEFAULT 0,
|
|
is_favorited INTEGER DEFAULT 0,
|
|
is_viewed INTEGER DEFAULT 0,
|
|
view_date TEXT,
|
|
local_path TEXT, -- Directory where post files are stored
|
|
metadata TEXT, -- JSON for additional data
|
|
deleted_at TEXT DEFAULT NULL, -- Soft delete timestamp
|
|
UNIQUE(creator_id, post_id),
|
|
FOREIGN KEY (creator_id) REFERENCES paid_content_creators(id) ON DELETE CASCADE
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_posts_creator ON paid_content_posts(creator_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_posts_published ON paid_content_posts(published_at DESC)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_posts_downloaded ON paid_content_posts(downloaded)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_posts_favorited ON paid_content_posts(is_favorited)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_posts_viewed ON paid_content_posts(is_viewed)')
|
|
|
|
# Add soft-delete column (migration)
|
|
_safe_alter(cursor, 'ALTER TABLE paid_content_posts ADD COLUMN IF NOT EXISTS deleted_at TEXT DEFAULT NULL')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_posts_deleted ON paid_content_posts(deleted_at)')
|
|
|
|
# Table 5: paid_content_attachments - Post Attachments
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_attachments (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
post_id INTEGER NOT NULL,
|
|
attachment_index INTEGER DEFAULT 0, -- Order in post
|
|
name TEXT NOT NULL,
|
|
file_type TEXT, -- 'image', 'video', 'archive', 'document'
|
|
extension TEXT,
|
|
server_path TEXT, -- Path on Coomer/Kemono server
|
|
download_url TEXT,
|
|
file_size INTEGER,
|
|
width INTEGER,
|
|
height INTEGER,
|
|
duration INTEGER, -- For videos (seconds)
|
|
status TEXT DEFAULT 'pending', -- 'pending', 'downloading', 'completed', 'failed', 'duplicate', 'skipped'
|
|
local_path TEXT,
|
|
local_filename TEXT,
|
|
file_hash TEXT, -- SHA256
|
|
perceptual_hash TEXT,
|
|
error_message TEXT,
|
|
download_attempts INTEGER DEFAULT 0,
|
|
last_attempt TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
downloaded_at TEXT,
|
|
UNIQUE(post_id, server_path),
|
|
FOREIGN KEY (post_id) REFERENCES paid_content_posts(id) ON DELETE CASCADE
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_attachments_post ON paid_content_attachments(post_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_attachments_status ON paid_content_attachments(status)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_attachments_hash ON paid_content_attachments(file_hash)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_attachments_phash ON paid_content_attachments(perceptual_hash)')
|
|
|
|
# Add thumbnail_data column if it doesn't exist (migration)
|
|
_safe_alter(cursor, 'ALTER TABLE paid_content_attachments ADD COLUMN IF NOT EXISTS thumbnail_data BLOB')
|
|
|
|
# Add quality recheck tracking columns (migration)
|
|
_safe_alter(cursor, 'ALTER TABLE paid_content_attachments ADD COLUMN IF NOT EXISTS needs_quality_recheck INTEGER DEFAULT 0')
|
|
_safe_alter(cursor, 'ALTER TABLE paid_content_attachments ADD COLUMN IF NOT EXISTS last_quality_check TEXT')
|
|
_safe_alter(cursor, 'ALTER TABLE paid_content_attachments ADD COLUMN IF NOT EXISTS quality_recheck_count INTEGER DEFAULT 0')
|
|
|
|
# Table 6a: paid_content_messages - Chat messages from creators
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_messages (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
creator_id INTEGER NOT NULL,
|
|
message_id TEXT NOT NULL,
|
|
text TEXT,
|
|
sent_at TEXT,
|
|
is_from_creator INTEGER DEFAULT 1,
|
|
is_tip INTEGER DEFAULT 0,
|
|
tip_amount REAL,
|
|
price REAL,
|
|
is_free INTEGER DEFAULT 1,
|
|
is_purchased INTEGER DEFAULT 0,
|
|
has_attachments INTEGER DEFAULT 0,
|
|
attachment_count INTEGER DEFAULT 0,
|
|
is_read INTEGER DEFAULT 0,
|
|
reply_to_message_id TEXT,
|
|
metadata TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(creator_id, message_id),
|
|
FOREIGN KEY (creator_id) REFERENCES paid_content_creators(id) ON DELETE CASCADE
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_messages_creator ON paid_content_messages(creator_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_messages_sent_at ON paid_content_messages(sent_at DESC)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_messages_from_creator ON paid_content_messages(is_from_creator)')
|
|
|
|
# Add message_id column to paid_content_attachments (migration)
|
|
_safe_alter(cursor, 'ALTER TABLE paid_content_attachments ADD COLUMN IF NOT EXISTS message_id INTEGER REFERENCES paid_content_messages(id) ON DELETE CASCADE')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_attachments_message ON paid_content_attachments(message_id)')
|
|
|
|
# Table 6: paid_content_embeds - Embedded Videos (YouTube, etc.)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_embeds (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
post_id INTEGER NOT NULL,
|
|
url TEXT NOT NULL,
|
|
platform TEXT, -- 'youtube', 'vimeo', etc.
|
|
video_id TEXT,
|
|
title TEXT,
|
|
status TEXT DEFAULT 'pending', -- 'pending', 'downloading', 'completed', 'failed', 'skipped'
|
|
local_path TEXT,
|
|
local_filename TEXT,
|
|
file_size INTEGER,
|
|
duration INTEGER,
|
|
error_message TEXT,
|
|
download_attempts INTEGER DEFAULT 0,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
downloaded_at TEXT,
|
|
UNIQUE(post_id, url),
|
|
FOREIGN KEY (post_id) REFERENCES paid_content_posts(id) ON DELETE CASCADE
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_embeds_post ON paid_content_embeds(post_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_embeds_status ON paid_content_embeds(status)')
|
|
|
|
# Table 7: paid_content_favorites - User Favorites
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_favorites (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
item_type TEXT NOT NULL, -- 'creator', 'post', 'attachment'
|
|
item_id INTEGER NOT NULL,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(item_type, item_id)
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_favorites_type_item ON paid_content_favorites(item_type, item_id)')
|
|
|
|
# Table 8: paid_content_download_history - For Retry Tracking
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_download_history (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
attachment_id INTEGER,
|
|
embed_id INTEGER,
|
|
url TEXT,
|
|
attempt_date TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
status TEXT NOT NULL, -- 'success', 'failed', 'skipped'
|
|
error_message TEXT,
|
|
response_code INTEGER,
|
|
duration_seconds REAL,
|
|
FOREIGN KEY (attachment_id) REFERENCES paid_content_attachments(id) ON DELETE CASCADE,
|
|
FOREIGN KEY (embed_id) REFERENCES paid_content_embeds(id) ON DELETE CASCADE
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_history_attachment ON paid_content_download_history(attachment_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_history_status ON paid_content_download_history(status)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_history_date ON paid_content_download_history(attempt_date DESC)')
|
|
|
|
# Table 9: paid_content_notifications - Notification History
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_notifications (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
notification_type TEXT NOT NULL, -- 'new_content', 'download_complete', 'sync_complete', 'error'
|
|
creator_id INTEGER,
|
|
post_id INTEGER,
|
|
title TEXT NOT NULL,
|
|
message TEXT NOT NULL,
|
|
download_count INTEGER DEFAULT 0,
|
|
file_count INTEGER DEFAULT 0,
|
|
is_read INTEGER DEFAULT 0,
|
|
read_at TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (creator_id) REFERENCES paid_content_creators(id) ON DELETE SET NULL,
|
|
FOREIGN KEY (post_id) REFERENCES paid_content_posts(id) ON DELETE SET NULL
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_notifications_created ON paid_content_notifications(created_at DESC)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_notifications_read ON paid_content_notifications(is_read)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_notifications_creator ON paid_content_notifications(creator_id)')
|
|
|
|
# Add metadata column if not exists (for storing media_files)
|
|
_safe_alter(cursor, 'ALTER TABLE paid_content_notifications ADD COLUMN IF NOT EXISTS metadata TEXT')
|
|
|
|
# Table 10: paid_content_config - Settings (Singleton)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_config (
|
|
id INTEGER PRIMARY KEY CHECK (id = 1), -- Singleton
|
|
base_download_path TEXT DEFAULT '/paid-content',
|
|
organize_by_date INTEGER DEFAULT 1,
|
|
organize_by_post INTEGER DEFAULT 1,
|
|
check_interval_hours INTEGER DEFAULT 6,
|
|
max_concurrent_downloads INTEGER DEFAULT 3,
|
|
download_embeds INTEGER DEFAULT 1,
|
|
embed_quality TEXT DEFAULT 'best',
|
|
notifications_enabled INTEGER DEFAULT 1,
|
|
push_notifications_enabled INTEGER DEFAULT 1,
|
|
perceptual_duplicate_detection INTEGER DEFAULT 1,
|
|
perceptual_threshold INTEGER DEFAULT 12, -- Hamming distance
|
|
auto_retry_failed INTEGER DEFAULT 1,
|
|
retry_max_attempts INTEGER DEFAULT 3,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Insert default paid content config
|
|
cursor.execute('INSERT OR IGNORE INTO paid_content_config (id) VALUES (1)')
|
|
|
|
# Table 11: paid_content_recycle_bin - Soft-deleted content
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_recycle_bin (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
item_type TEXT NOT NULL, -- 'post', 'attachment', 'creator'
|
|
original_id INTEGER NOT NULL,
|
|
original_data TEXT NOT NULL, -- JSON of original record
|
|
deleted_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
deleted_by TEXT,
|
|
restore_path TEXT,
|
|
UNIQUE(item_type, original_id)
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_recycle_type ON paid_content_recycle_bin(item_type)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_recycle_deleted_at ON paid_content_recycle_bin(deleted_at DESC)')
|
|
|
|
# Table 12: paid_content_tags - Tag definitions
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_tags (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
name TEXT NOT NULL UNIQUE,
|
|
slug TEXT NOT NULL UNIQUE,
|
|
color TEXT DEFAULT '#6b7280', -- Hex color for UI display
|
|
description TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_tags_slug ON paid_content_tags(slug)')
|
|
|
|
# Insert default PPV tag
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_tags (name, slug, color, description)
|
|
VALUES ('PPV', 'ppv', '#f59e0b', 'Pay-per-view content')
|
|
''')
|
|
|
|
# Insert default Short tag (for xHamster moments/shorts)
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO paid_content_tags (name, slug, color, description)
|
|
VALUES ('Short', 'short', '#8b5cf6', 'Short-form content (moments, clips)')
|
|
''')
|
|
|
|
# Table 13: paid_content_post_tags - Junction table for post-tag relationships
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_post_tags (
|
|
post_id INTEGER NOT NULL,
|
|
tag_id INTEGER NOT NULL,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (post_id, tag_id),
|
|
FOREIGN KEY (post_id) REFERENCES paid_content_posts(id) ON DELETE CASCADE,
|
|
FOREIGN KEY (tag_id) REFERENCES paid_content_tags(id) ON DELETE CASCADE
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_post_tags_post ON paid_content_post_tags(post_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_post_tags_tag ON paid_content_post_tags(tag_id)')
|
|
|
|
# Table 13b: paid_content_post_tagged_users - Junction table for Instagram tagged users
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_post_tagged_users (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
post_id INTEGER NOT NULL,
|
|
username TEXT NOT NULL,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(post_id, username),
|
|
FOREIGN KEY (post_id) REFERENCES paid_content_posts(id) ON DELETE CASCADE
|
|
)
|
|
''')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_tagged_users_username ON paid_content_post_tagged_users(username)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_tagged_users_post_id ON paid_content_post_tagged_users(post_id)')
|
|
|
|
# Table 14: paid_content_auto_tag_rules - Auto-tagging rules for sync
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_auto_tag_rules (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
name TEXT NOT NULL,
|
|
enabled INTEGER DEFAULT 1,
|
|
conditions TEXT NOT NULL,
|
|
tag_ids TEXT NOT NULL,
|
|
priority INTEGER DEFAULT 0,
|
|
match_count INTEGER DEFAULT 0,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_auto_tag_rules_enabled ON paid_content_auto_tag_rules(enabled)')
|
|
|
|
# Table 15: paid_content_watch_later - Watch later playlist queue
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS paid_content_watch_later (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
attachment_id INTEGER NOT NULL UNIQUE,
|
|
post_id INTEGER NOT NULL,
|
|
creator_id INTEGER NOT NULL,
|
|
position INTEGER NOT NULL DEFAULT 0,
|
|
added_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (attachment_id) REFERENCES paid_content_attachments(id) ON DELETE CASCADE,
|
|
FOREIGN KEY (post_id) REFERENCES paid_content_posts(id) ON DELETE CASCADE,
|
|
FOREIGN KEY (creator_id) REFERENCES paid_content_creators(id) ON DELETE CASCADE
|
|
)
|
|
''')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_watch_later_position ON paid_content_watch_later(position)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pc_watch_later_attachment ON paid_content_watch_later(attachment_id)')
|
|
|
|
# ============================================================================
|
|
# PRIVATE GALLERY TABLES
|
|
# Encrypted private media storage with person tracking
|
|
# ============================================================================
|
|
|
|
# Private Gallery Config - Singleton settings table
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media_config (
|
|
key TEXT PRIMARY KEY,
|
|
value TEXT NOT NULL,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Insert default config values
|
|
default_config = [
|
|
('storage_path', '/opt/immich/private'),
|
|
('thumbnail_path', '/opt/immich/private/thumbnails'),
|
|
('organize_by_person', 'true'),
|
|
('organize_by_date', 'true'),
|
|
('auto_lock_minutes', '30'),
|
|
('duplicate_auto_select_distance', '2'),
|
|
('is_setup_complete', 'false'),
|
|
]
|
|
for key, value in default_config:
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO private_media_config (key, value)
|
|
VALUES (?, ?)
|
|
''', (key, value))
|
|
|
|
# Private Gallery Relationships - Configurable relationship types
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media_relationships (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
encrypted_name TEXT NOT NULL,
|
|
color TEXT DEFAULT '#6366f1',
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Private Gallery Persons - People tracked in the gallery
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media_persons (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
encrypted_name TEXT NOT NULL,
|
|
encrypted_sort_name TEXT,
|
|
relationship_id INTEGER NOT NULL,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (relationship_id) REFERENCES private_media_relationships(id) ON DELETE RESTRICT
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_persons_relationship ON private_media_persons(relationship_id)')
|
|
|
|
# Private Gallery Posts - Groups multiple media items together
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media_posts (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
person_id INTEGER,
|
|
encrypted_description TEXT,
|
|
encrypted_media_date TEXT NOT NULL,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (person_id) REFERENCES private_media_persons(id) ON DELETE SET NULL
|
|
)
|
|
''')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_posts_person ON private_media_posts(person_id)')
|
|
|
|
# Private Gallery Media - The actual media items
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
post_id INTEGER,
|
|
storage_id TEXT NOT NULL UNIQUE,
|
|
encrypted_filename TEXT NOT NULL,
|
|
encrypted_description TEXT,
|
|
file_hash TEXT NOT NULL,
|
|
file_size INTEGER NOT NULL,
|
|
file_type TEXT NOT NULL,
|
|
mime_type TEXT NOT NULL,
|
|
width INTEGER,
|
|
height INTEGER,
|
|
duration REAL,
|
|
person_id INTEGER,
|
|
encrypted_media_date TEXT NOT NULL,
|
|
source_type TEXT,
|
|
encrypted_source_path TEXT,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (post_id) REFERENCES private_media_posts(id) ON DELETE CASCADE,
|
|
FOREIGN KEY (person_id) REFERENCES private_media_persons(id) ON DELETE SET NULL
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_media_person ON private_media(person_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_media_hash ON private_media(file_hash)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_media_file_type ON private_media(file_type)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_media_storage_id ON private_media(storage_id)')
|
|
|
|
# Migration: Add perceptual_hash column for perceptual duplicate detection
|
|
_safe_alter(cursor, 'ALTER TABLE private_media ADD COLUMN IF NOT EXISTS perceptual_hash TEXT')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_media_phash ON private_media(perceptual_hash)')
|
|
|
|
# Migration: Add post_id column to existing private_media table
|
|
_safe_alter(cursor, 'ALTER TABLE private_media ADD COLUMN IF NOT EXISTS post_id INTEGER REFERENCES private_media_posts(id) ON DELETE CASCADE')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_media_post_id ON private_media(post_id)')
|
|
|
|
# Migration: Add is_read column to private_media_posts
|
|
_safe_alter(cursor, 'ALTER TABLE private_media_posts ADD COLUMN IF NOT EXISTS is_read INTEGER DEFAULT 0')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_posts_is_read ON private_media_posts(is_read)')
|
|
|
|
# Private Gallery Tags - Encrypted tag definitions for private gallery
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_gallery_tags (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
encrypted_name TEXT NOT NULL,
|
|
color TEXT DEFAULT '#6b7280',
|
|
encrypted_description TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Private Gallery Tags Junction Table
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media_tags (
|
|
media_id INTEGER NOT NULL,
|
|
tag_id INTEGER NOT NULL,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (media_id, tag_id),
|
|
FOREIGN KEY (media_id) REFERENCES private_media(id) ON DELETE CASCADE,
|
|
FOREIGN KEY (tag_id) REFERENCES private_gallery_tags(id) ON DELETE CASCADE
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_tags_media ON private_media_tags(media_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_tags_tag ON private_media_tags(tag_id)')
|
|
|
|
# Private Gallery Post Tags - Tags linked to posts (for grouped uploads)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media_post_tags (
|
|
post_id INTEGER NOT NULL,
|
|
tag_id INTEGER NOT NULL,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (post_id, tag_id),
|
|
FOREIGN KEY (post_id) REFERENCES private_media_posts(id) ON DELETE CASCADE,
|
|
FOREIGN KEY (tag_id) REFERENCES private_gallery_tags(id) ON DELETE CASCADE
|
|
)
|
|
''')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_post_tags_post ON private_media_post_tags(post_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_post_tags_tag ON private_media_post_tags(tag_id)')
|
|
|
|
# Private Gallery Person Default Tags - Default tags auto-applied when person is selected
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media_person_default_tags (
|
|
person_id INTEGER NOT NULL,
|
|
tag_id INTEGER NOT NULL,
|
|
PRIMARY KEY (person_id, tag_id),
|
|
FOREIGN KEY (person_id) REFERENCES private_media_persons(id) ON DELETE CASCADE,
|
|
FOREIGN KEY (tag_id) REFERENCES private_gallery_tags(id) ON DELETE CASCADE
|
|
)
|
|
''')
|
|
|
|
# Private Gallery Person Groups (named collections of persons for filtering)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media_person_groups (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
encrypted_name TEXT NOT NULL,
|
|
encrypted_description TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media_person_group_members (
|
|
group_id INTEGER NOT NULL REFERENCES private_media_person_groups(id) ON DELETE CASCADE,
|
|
person_id INTEGER NOT NULL REFERENCES private_media_persons(id) ON DELETE CASCADE,
|
|
added_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (group_id, person_id)
|
|
)
|
|
''')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_person_group_members_person ON private_media_person_group_members(person_id)')
|
|
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media_person_group_tag_members (
|
|
group_id INTEGER NOT NULL REFERENCES private_media_person_groups(id) ON DELETE CASCADE,
|
|
tag_id INTEGER NOT NULL REFERENCES private_gallery_tags(id) ON DELETE CASCADE,
|
|
added_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (group_id, tag_id)
|
|
)
|
|
''')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_person_group_tag_members_tag ON private_media_person_group_tag_members(tag_id)')
|
|
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media_person_group_excluded_tags (
|
|
group_id INTEGER NOT NULL REFERENCES private_media_person_groups(id) ON DELETE CASCADE,
|
|
tag_id INTEGER NOT NULL REFERENCES private_gallery_tags(id) ON DELETE CASCADE,
|
|
added_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (group_id, tag_id)
|
|
)
|
|
''')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_person_group_excluded_tags_tag ON private_media_person_group_excluded_tags(tag_id)')
|
|
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media_person_group_relationship_members (
|
|
group_id INTEGER NOT NULL REFERENCES private_media_person_groups(id) ON DELETE CASCADE,
|
|
relationship_id INTEGER NOT NULL REFERENCES private_media_relationships(id) ON DELETE CASCADE,
|
|
added_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (group_id, relationship_id)
|
|
)
|
|
''')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pm_person_group_rel_members_rel ON private_media_person_group_relationship_members(relationship_id)')
|
|
|
|
# Migration: Add min_resolution column to person groups
|
|
_safe_alter(cursor, 'ALTER TABLE private_media_person_groups ADD COLUMN IF NOT EXISTS min_resolution INTEGER DEFAULT 0')
|
|
|
|
# Reddit community monitoring for private gallery
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media_reddit_communities (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
subreddit_name TEXT NOT NULL,
|
|
person_id INTEGER NOT NULL,
|
|
enabled INTEGER DEFAULT 1,
|
|
last_checked TEXT,
|
|
total_media_found INTEGER DEFAULT 0,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (person_id) REFERENCES private_media_persons(id) ON DELETE CASCADE
|
|
)
|
|
''')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_reddit_communities_person ON private_media_reddit_communities(person_id)')
|
|
cursor.execute('CREATE UNIQUE INDEX IF NOT EXISTS idx_reddit_communities_unique ON private_media_reddit_communities(subreddit_name, person_id)')
|
|
|
|
# Track which Reddit posts have been processed (avoid re-downloading)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media_reddit_history (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
community_id INTEGER NOT NULL,
|
|
reddit_post_id TEXT NOT NULL,
|
|
media_count INTEGER DEFAULT 0,
|
|
processed_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (community_id) REFERENCES private_media_reddit_communities(id) ON DELETE CASCADE
|
|
)
|
|
''')
|
|
cursor.execute('CREATE UNIQUE INDEX IF NOT EXISTS idx_reddit_history_unique ON private_media_reddit_history(community_id, reddit_post_id)')
|
|
|
|
# Private Gallery Import Auth - Per-domain authentication for URL imports
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_gallery_import_auth (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
domain TEXT NOT NULL UNIQUE,
|
|
auth_type TEXT NOT NULL DEFAULT 'basic',
|
|
encrypted_username TEXT,
|
|
encrypted_password TEXT,
|
|
encrypted_cookies_json TEXT,
|
|
encrypted_user_agent TEXT,
|
|
notes TEXT,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pg_import_auth_domain ON private_gallery_import_auth(domain)')
|
|
|
|
# Scraper account → person mappings for Instagram, TikTok, Snapchat
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS private_media_scraper_accounts (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
platform TEXT NOT NULL,
|
|
username TEXT NOT NULL,
|
|
person_id INTEGER NOT NULL,
|
|
enabled INTEGER DEFAULT 1,
|
|
last_imported_at TEXT,
|
|
last_imported_file_id INTEGER DEFAULT 0,
|
|
total_media_imported INTEGER DEFAULT 0,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (person_id) REFERENCES private_media_persons(id) ON DELETE CASCADE,
|
|
UNIQUE (platform, username, person_id)
|
|
)
|
|
''')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_scraper_accounts_platform ON private_media_scraper_accounts(platform)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_scraper_accounts_person ON private_media_scraper_accounts(person_id)')
|
|
|
|
# Migration: Add last_imported_file_id column if missing
|
|
_safe_alter(cursor, 'ALTER TABLE private_media_scraper_accounts ADD COLUMN IF NOT EXISTS last_imported_file_id INTEGER DEFAULT 0')
|
|
|
|
# Migration: Add original_post_id column to track media moved between posts
|
|
_safe_alter(cursor, 'ALTER TABLE private_media ADD COLUMN IF NOT EXISTS original_post_id INTEGER REFERENCES private_media_posts(id) ON DELETE SET NULL')
|
|
|
|
# ============================================================================
|
|
# PRESS MONITOR TABLES
|
|
# For tracking news articles about celebrities from GDELT
|
|
# ============================================================================
|
|
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS press_config (
|
|
id INTEGER PRIMARY KEY CHECK (id = 1),
|
|
enabled INTEGER DEFAULT 1,
|
|
check_interval_hours INTEGER DEFAULT 6,
|
|
max_records_per_query INTEGER DEFAULT 25,
|
|
notify_new_articles INTEGER DEFAULT 1,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO press_config (id, enabled, check_interval_hours, max_records_per_query, notify_new_articles)
|
|
VALUES (1, 1, 6, 25, 1)
|
|
''')
|
|
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS press_articles (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
celebrity_id INTEGER NOT NULL,
|
|
title TEXT,
|
|
url TEXT NOT NULL,
|
|
url_hash TEXT NOT NULL,
|
|
domain TEXT,
|
|
published_date TEXT,
|
|
image_url TEXT,
|
|
language TEXT,
|
|
country TEXT,
|
|
article_content TEXT,
|
|
snippet TEXT,
|
|
fetched_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
notified INTEGER DEFAULT 0,
|
|
read INTEGER DEFAULT 0,
|
|
FOREIGN KEY (celebrity_id) REFERENCES celebrity_profiles(id) ON DELETE CASCADE
|
|
)
|
|
''')
|
|
|
|
cursor.execute('CREATE UNIQUE INDEX IF NOT EXISTS idx_press_url_hash ON press_articles(url_hash)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_press_celebrity ON press_articles(celebrity_id)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_press_published ON press_articles(published_date DESC)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_press_domain ON press_articles(domain)')
|
|
|
|
# Add celebrity_ids column to press_config if not exists (migration)
|
|
_safe_alter(cursor, "ALTER TABLE press_config ADD COLUMN IF NOT EXISTS celebrity_ids TEXT")
|
|
|
|
# ============================================================================
|
|
# KEY-VALUE STORE TABLE
|
|
# For general application settings (fingerprints, cache, etc.)
|
|
# ============================================================================
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS key_value_store (
|
|
key TEXT PRIMARY KEY,
|
|
value TEXT NOT NULL,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
conn.commit()
|
|
|
|
def get_url_hash(self, url: str) -> str:
|
|
"""Generate SHA256 hash of URL"""
|
|
return hashlib.sha256(url.encode('utf-8')).hexdigest()
|
|
|
|
# ============================================================================
|
|
# KEY-VALUE STORE METHODS
|
|
# Simple key-value storage for application settings
|
|
# ============================================================================
|
|
|
|
def get_setting(self, key: str) -> Optional[str]:
|
|
"""
|
|
Get a setting value from the key-value store.
|
|
|
|
Args:
|
|
key: Setting key name
|
|
|
|
Returns:
|
|
Setting value as string, or None if not found
|
|
"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('SELECT value FROM key_value_store WHERE key = ?', (key,))
|
|
row = cursor.fetchone()
|
|
return row[0] if row else None
|
|
|
|
def set_setting(self, key: str, value: str) -> bool:
|
|
"""
|
|
Set a setting value in the key-value store.
|
|
|
|
Args:
|
|
key: Setting key name
|
|
value: Setting value (as string)
|
|
|
|
Returns:
|
|
True if successful
|
|
"""
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
INSERT INTO key_value_store (key, value, updated_at)
|
|
VALUES (?, ?, CURRENT_TIMESTAMP)
|
|
ON CONFLICT(key) DO UPDATE SET
|
|
value = excluded.value,
|
|
updated_at = CURRENT_TIMESTAMP
|
|
''', (key, value))
|
|
conn.commit()
|
|
return True
|
|
|
|
def delete_setting(self, key: str) -> bool:
|
|
"""
|
|
Delete a setting from the key-value store.
|
|
|
|
Args:
|
|
key: Setting key name
|
|
|
|
Returns:
|
|
True if deleted, False if key didn't exist
|
|
"""
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('DELETE FROM key_value_store WHERE key = ?', (key,))
|
|
conn.commit()
|
|
return cursor.rowcount > 0
|
|
|
|
@staticmethod
|
|
def get_file_hash(file_path: str) -> Optional[str]:
|
|
"""
|
|
Calculate SHA256 hash of a file
|
|
|
|
Args:
|
|
file_path: Path to the file
|
|
|
|
Returns:
|
|
SHA256 hash of file content, or None if file doesn't exist or error occurs
|
|
"""
|
|
try:
|
|
file_path = Path(file_path)
|
|
if not file_path.exists() or not file_path.is_file():
|
|
return None
|
|
|
|
sha256_hash = hashlib.sha256()
|
|
with open(file_path, "rb") as f:
|
|
# Read file in chunks to handle large files efficiently
|
|
for byte_block in iter(lambda: f.read(65536), b""):
|
|
sha256_hash.update(byte_block)
|
|
return sha256_hash.hexdigest()
|
|
except Exception as e:
|
|
logger.error(f"Failed to calculate file hash for {file_path}: {e}")
|
|
return None
|
|
|
|
# ============================================================================
|
|
# SCRAPER CONFIGURATION METHODS
|
|
# See docs/SCRAPER_PROXY_SYSTEM.md for full documentation
|
|
# ============================================================================
|
|
|
|
def _migrate_scraper_cookies(self, cursor):
|
|
"""
|
|
Migrate cookies from JSON files to database (one-time migration).
|
|
Called during database initialization.
|
|
"""
|
|
import os
|
|
|
|
cookie_files = {
|
|
'coppermine': '/opt/media-downloader/cookies/coppermine_cookies.json',
|
|
'imginn': '/opt/media-downloader/cookies/imginn_cookies.json',
|
|
'fastdl': '/opt/media-downloader/cookies/fastdl_cookies.json',
|
|
'snapchat': '/opt/media-downloader/cookies/snapchat_cookies.json',
|
|
'forum_phun': '/opt/media-downloader/cookies/forum_cookies_phun.org.json',
|
|
'forum_hqcelebcorner': '/opt/media-downloader/cookies/forum_cookies_HQCelebCorner.json',
|
|
'forum_picturepub': '/opt/media-downloader/cookies/forum_cookies_PicturePub.json',
|
|
}
|
|
|
|
for scraper_id, cookie_file in cookie_files.items():
|
|
if os.path.exists(cookie_file):
|
|
try:
|
|
# Check if scraper already has cookies (don't overwrite)
|
|
cursor.execute(
|
|
'SELECT cookies_json FROM scrapers WHERE id = ?',
|
|
(scraper_id,)
|
|
)
|
|
row = cursor.fetchone()
|
|
if row and row[0]:
|
|
# Already has cookies, skip
|
|
continue
|
|
|
|
with open(cookie_file, 'r') as f:
|
|
data = json.load(f)
|
|
|
|
# Store in database
|
|
cursor.execute('''
|
|
UPDATE scrapers
|
|
SET cookies_json = ?, cookies_updated_at = ?
|
|
WHERE id = ?
|
|
''', (json.dumps(data), datetime.now().isoformat(), scraper_id))
|
|
|
|
logger.info(f"Migrated cookies for {scraper_id} from {cookie_file}")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to migrate cookies for {scraper_id}: {e}")
|
|
|
|
def get_all_scrapers(self, type_filter: str = None) -> List[Dict]:
|
|
"""
|
|
Get all scrapers with optional type filter.
|
|
|
|
Args:
|
|
type_filter: Optional filter by type ('direct', 'proxy', 'forum', 'cli_tool')
|
|
|
|
Returns:
|
|
List of scraper configurations
|
|
"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
if type_filter:
|
|
cursor.execute('''
|
|
SELECT * FROM scrapers WHERE type = ? ORDER BY type, name
|
|
''', (type_filter,))
|
|
else:
|
|
cursor.execute('SELECT * FROM scrapers ORDER BY type, name')
|
|
|
|
rows = cursor.fetchall()
|
|
scrapers = []
|
|
|
|
for row in rows:
|
|
scraper = dict(row)
|
|
|
|
# Parse cookies to get count
|
|
cookies_count = 0
|
|
cookies_fresh = False
|
|
if scraper.get('cookies_json'):
|
|
try:
|
|
cookie_data = json.loads(scraper['cookies_json'])
|
|
# Handle both formats: list of cookies or dict with 'cookies' key
|
|
if isinstance(cookie_data, list):
|
|
cookies = cookie_data
|
|
elif isinstance(cookie_data, dict):
|
|
cookies = cookie_data.get('cookies', [])
|
|
else:
|
|
cookies = []
|
|
cookies_count = len(cookies)
|
|
|
|
# Check if cookies are fresh (updated within last 24 hours)
|
|
if scraper.get('cookies_updated_at'):
|
|
updated = datetime.fromisoformat(scraper['cookies_updated_at'])
|
|
age_hours = (datetime.now() - updated).total_seconds() / 3600
|
|
cookies_fresh = age_hours < 24
|
|
except (json.JSONDecodeError, ValueError, TypeError, AttributeError):
|
|
pass # Invalid JSON or datetime format
|
|
|
|
scraper['cookies_count'] = cookies_count
|
|
scraper['cookies_fresh'] = cookies_fresh
|
|
|
|
# Convert enabled to boolean for frontend
|
|
scraper['enabled'] = bool(scraper.get('enabled', 1))
|
|
scraper['proxy_enabled'] = bool(scraper.get('proxy_enabled', 0))
|
|
scraper['flaresolverr_required'] = bool(scraper.get('flaresolverr_required', 0))
|
|
|
|
scrapers.append(scraper)
|
|
|
|
return scrapers
|
|
|
|
def get_scraper(self, scraper_id: str) -> Optional[Dict]:
|
|
"""
|
|
Get a single scraper configuration.
|
|
|
|
Args:
|
|
scraper_id: Scraper ID (e.g., 'imginn', 'forum_phun')
|
|
|
|
Returns:
|
|
Scraper configuration dict or None if not found
|
|
"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('SELECT * FROM scrapers WHERE id = ?', (scraper_id,))
|
|
row = cursor.fetchone()
|
|
|
|
if not row:
|
|
return None
|
|
|
|
scraper = dict(row)
|
|
|
|
# Convert enabled to boolean
|
|
scraper['enabled'] = bool(scraper.get('enabled', 1))
|
|
scraper['proxy_enabled'] = bool(scraper.get('proxy_enabled', 0))
|
|
scraper['flaresolverr_required'] = bool(scraper.get('flaresolverr_required', 0))
|
|
|
|
return scraper
|
|
|
|
def update_scraper(self, scraper_id: str, updates: Dict) -> bool:
|
|
"""
|
|
Update scraper settings.
|
|
|
|
Args:
|
|
scraper_id: Scraper ID
|
|
updates: Dictionary of fields to update
|
|
|
|
Returns:
|
|
True if updated, False if scraper not found
|
|
"""
|
|
# Allowed fields for update
|
|
allowed_fields = [
|
|
'name', 'base_url', 'enabled', 'proxy_enabled', 'proxy_url',
|
|
'flaresolverr_required', 'settings_json'
|
|
]
|
|
|
|
# Filter to only allowed fields
|
|
filtered_updates = {k: v for k, v in updates.items() if k in allowed_fields}
|
|
|
|
if not filtered_updates:
|
|
return False
|
|
|
|
# Build UPDATE query
|
|
set_clauses = [f"{field} = ?" for field in filtered_updates.keys()]
|
|
set_clauses.append("updated_at = ?")
|
|
values = list(filtered_updates.values())
|
|
values.append(datetime.now().isoformat())
|
|
values.append(scraper_id)
|
|
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute(f'''
|
|
UPDATE scrapers
|
|
SET {", ".join(set_clauses)}
|
|
WHERE id = ?
|
|
''', values)
|
|
|
|
return cursor.rowcount > 0
|
|
|
|
def get_scraper_cookies(self, scraper_id: str) -> Optional[List[Dict]]:
|
|
"""
|
|
Get cookies for a scraper.
|
|
|
|
Args:
|
|
scraper_id: Scraper ID
|
|
|
|
Returns:
|
|
List of cookie dicts or None if no cookies
|
|
"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
'SELECT cookies_json FROM scrapers WHERE id = ?',
|
|
(scraper_id,)
|
|
)
|
|
row = cursor.fetchone()
|
|
|
|
if not row or not row['cookies_json']:
|
|
return None
|
|
|
|
try:
|
|
data = json.loads(row['cookies_json'])
|
|
return data.get('cookies', [])
|
|
except (json.JSONDecodeError, TypeError, KeyError):
|
|
return None # Invalid JSON format
|
|
|
|
def get_scraper_cookies_user_agent(self, scraper_id: str) -> Optional[str]:
|
|
"""
|
|
Get the user_agent stored with a scraper's cookies.
|
|
|
|
This is critical for Cloudflare cf_clearance cookies which are
|
|
fingerprinted to the browser that solved the challenge.
|
|
|
|
Args:
|
|
scraper_id: Scraper ID
|
|
|
|
Returns:
|
|
User agent string or None if not stored
|
|
"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
'SELECT cookies_json FROM scrapers WHERE id = ?',
|
|
(scraper_id,)
|
|
)
|
|
row = cursor.fetchone()
|
|
|
|
if not row or not row['cookies_json']:
|
|
return None
|
|
|
|
try:
|
|
data = json.loads(row['cookies_json'])
|
|
return data.get('user_agent')
|
|
except (json.JSONDecodeError, TypeError, KeyError):
|
|
return None
|
|
|
|
def save_scraper_cookies(self, scraper_id: str, cookies: List[Dict],
|
|
user_agent: str = None, merge: bool = True) -> bool:
|
|
"""
|
|
Save cookies for a scraper.
|
|
|
|
IMPORTANT: By default, this MERGES with existing cookies to preserve
|
|
session/auth cookies while updating Cloudflare cookies.
|
|
|
|
Args:
|
|
scraper_id: Scraper ID
|
|
cookies: List of cookie dicts
|
|
user_agent: Optional user agent (important for cf_clearance)
|
|
merge: If True, merge with existing cookies (default). If False, replace all.
|
|
|
|
Returns:
|
|
True if saved successfully
|
|
"""
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
final_cookies = cookies
|
|
|
|
if merge:
|
|
# Get existing cookies
|
|
cursor.execute(
|
|
'SELECT cookies_json FROM scrapers WHERE id = ?',
|
|
(scraper_id,)
|
|
)
|
|
row = cursor.fetchone()
|
|
|
|
if row and row['cookies_json']:
|
|
try:
|
|
existing_data = json.loads(row['cookies_json'])
|
|
existing_cookies = existing_data if isinstance(existing_data, list) else existing_data.get('cookies', [])
|
|
|
|
# Merge: existing cookies as base, new cookies override
|
|
cookie_map = {c['name']: c for c in existing_cookies}
|
|
for cookie in cookies:
|
|
cookie_map[cookie['name']] = cookie
|
|
|
|
final_cookies = list(cookie_map.values())
|
|
logger.debug(f"Merged {len(cookies)} new cookies with {len(existing_cookies)} existing -> {len(final_cookies)} total")
|
|
except (json.JSONDecodeError, TypeError, KeyError):
|
|
pass # Invalid existing cookies, use new ones only
|
|
|
|
# Prepare data to save
|
|
data = {
|
|
'cookies': final_cookies,
|
|
'timestamp': datetime.now().isoformat()
|
|
}
|
|
if user_agent:
|
|
data['user_agent'] = user_agent
|
|
|
|
cursor.execute('''
|
|
UPDATE scrapers
|
|
SET cookies_json = ?, cookies_updated_at = ?, updated_at = ?
|
|
WHERE id = ?
|
|
''', (json.dumps(data), datetime.now().isoformat(),
|
|
datetime.now().isoformat(), scraper_id))
|
|
|
|
return cursor.rowcount > 0
|
|
|
|
def clear_scraper_cookies(self, scraper_id: str) -> bool:
|
|
"""
|
|
Clear all cookies for a scraper.
|
|
|
|
Args:
|
|
scraper_id: Scraper ID
|
|
|
|
Returns:
|
|
True if cleared successfully
|
|
"""
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
UPDATE scrapers
|
|
SET cookies_json = NULL, cookies_updated_at = NULL, updated_at = ?
|
|
WHERE id = ?
|
|
''', (datetime.now().isoformat(), scraper_id))
|
|
|
|
return cursor.rowcount > 0
|
|
|
|
def update_scraper_test_status(self, scraper_id: str, status: str,
|
|
message: str = None) -> bool:
|
|
"""
|
|
Update the test status for a scraper.
|
|
|
|
Args:
|
|
scraper_id: Scraper ID
|
|
status: Status string ('success', 'failed', 'timeout')
|
|
message: Optional error message
|
|
|
|
Returns:
|
|
True if updated successfully
|
|
"""
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
UPDATE scrapers
|
|
SET last_test_at = ?, last_test_status = ?, last_test_message = ?, updated_at = ?
|
|
WHERE id = ?
|
|
''', (datetime.now().isoformat(), status, message,
|
|
datetime.now().isoformat(), scraper_id))
|
|
|
|
return cursor.rowcount > 0
|
|
|
|
def create_scraper(self, scraper: Dict) -> bool:
|
|
"""
|
|
Create a new scraper entry (used when new forums are added).
|
|
|
|
Args:
|
|
scraper: Scraper configuration dict with at least 'id', 'name', 'type'
|
|
|
|
Returns:
|
|
True if created successfully
|
|
"""
|
|
required_fields = ['id', 'name', 'type']
|
|
if not all(field in scraper for field in required_fields):
|
|
logger.error(f"Missing required fields for scraper: {required_fields}")
|
|
return False
|
|
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO scrapers
|
|
(id, name, type, module, base_url, target_platform, enabled,
|
|
proxy_enabled, proxy_url, flaresolverr_required, settings_json)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
''', (
|
|
scraper['id'],
|
|
scraper['name'],
|
|
scraper['type'],
|
|
scraper.get('module'),
|
|
scraper.get('base_url'),
|
|
scraper.get('target_platform'),
|
|
1 if scraper.get('enabled', True) else 0,
|
|
1 if scraper.get('proxy_enabled', False) else 0,
|
|
scraper.get('proxy_url'),
|
|
1 if scraper.get('flaresolverr_required', False) else 0,
|
|
json.dumps(scraper.get('settings')) if scraper.get('settings') else None
|
|
))
|
|
|
|
return cursor.rowcount > 0
|
|
|
|
def delete_scraper(self, scraper_id: str) -> bool:
|
|
"""
|
|
Delete a scraper entry (used when forums are removed).
|
|
|
|
Args:
|
|
scraper_id: Scraper ID to delete
|
|
|
|
Returns:
|
|
True if deleted successfully
|
|
"""
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('DELETE FROM scrapers WHERE id = ?', (scraper_id,))
|
|
return cursor.rowcount > 0
|
|
|
|
def get_scraper_cookies_dict(self, scraper_id: str) -> Dict[str, str]:
|
|
"""
|
|
Get cookies as a simple name->value dictionary (for requests library).
|
|
|
|
Args:
|
|
scraper_id: Scraper ID
|
|
|
|
Returns:
|
|
Dictionary of cookie name->value pairs
|
|
"""
|
|
cookies = self.get_scraper_cookies(scraper_id)
|
|
if not cookies:
|
|
return {}
|
|
return {c['name']: c['value'] for c in cookies}
|
|
|
|
def is_downloaded(self, url: str, platform: str = None) -> bool:
|
|
"""
|
|
Check if URL has been downloaded (hash persists even if file deleted)
|
|
|
|
Args:
|
|
url: URL to check
|
|
platform: Optional platform filter
|
|
|
|
Returns:
|
|
True if already downloaded (prevents redownload of deleted content)
|
|
"""
|
|
url_hash = self.get_url_hash(url)
|
|
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
if platform:
|
|
cursor.execute(
|
|
"SELECT 1 FROM downloads WHERE url_hash = ? AND platform = ? LIMIT 1",
|
|
(url_hash, platform)
|
|
)
|
|
else:
|
|
cursor.execute(
|
|
"SELECT 1 FROM downloads WHERE url_hash = ? LIMIT 1",
|
|
(url_hash,)
|
|
)
|
|
|
|
return cursor.fetchone() is not None
|
|
|
|
def is_file_hash_downloaded(self, file_hash: str) -> bool:
|
|
"""
|
|
Check if a file with this hash has been downloaded, is in recycle bin, or review queue
|
|
(hash persists even if file deleted)
|
|
|
|
Args:
|
|
file_hash: SHA256 hash of file content
|
|
|
|
Returns:
|
|
True if file with this hash exists in downloads, recycle_bin, or file_inventory
|
|
"""
|
|
if not file_hash:
|
|
return False
|
|
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Check downloads table (exclude temp files to avoid false positives during move operation)
|
|
cursor.execute(
|
|
"SELECT 1 FROM downloads WHERE file_hash = ? AND file_path NOT LIKE '%/temp/%' AND file_path NOT LIKE '%\\temp\\%' LIMIT 1",
|
|
(file_hash,)
|
|
)
|
|
if cursor.fetchone():
|
|
return True
|
|
|
|
# Check recycle_bin table
|
|
cursor.execute(
|
|
"SELECT 1 FROM recycle_bin WHERE file_hash = ? LIMIT 1",
|
|
(file_hash,)
|
|
)
|
|
if cursor.fetchone():
|
|
return True
|
|
|
|
# Check file_inventory (review queue, media files, etc.)
|
|
cursor.execute(
|
|
"SELECT 1 FROM file_inventory WHERE file_hash = ? LIMIT 1",
|
|
(file_hash,)
|
|
)
|
|
return cursor.fetchone() is not None
|
|
|
|
def get_download_by_file_hash(self, file_hash: str) -> Optional[Dict]:
|
|
"""
|
|
Get download record by file hash (returns record even if file deleted)
|
|
|
|
Args:
|
|
file_hash: SHA256 hash of file content
|
|
|
|
Returns:
|
|
Dictionary with download record, or None if not found
|
|
"""
|
|
if not file_hash:
|
|
return None
|
|
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT
|
|
id, url, platform, source, content_type,
|
|
filename, file_path, post_date, download_date,
|
|
file_size, file_hash, metadata
|
|
FROM downloads
|
|
WHERE file_hash = ?
|
|
AND file_path IS NOT NULL
|
|
AND file_path NOT LIKE '%/temp/%'
|
|
AND file_path NOT LIKE '%\\temp\\%'
|
|
ORDER BY download_date DESC
|
|
LIMIT 1
|
|
''', (file_hash,))
|
|
|
|
row = cursor.fetchone()
|
|
if row:
|
|
result = dict(row)
|
|
# Parse post_date if it's a string
|
|
if result.get('post_date'):
|
|
try:
|
|
result['post_date'] = datetime.fromisoformat(result['post_date'])
|
|
except (ValueError, TypeError) as e:
|
|
logger.debug(f"Failed to parse post_date: {e}")
|
|
# Parse metadata JSON if present
|
|
if result.get('metadata'):
|
|
try:
|
|
result['metadata'] = json.loads(result['metadata'])
|
|
except (ValueError, TypeError, json.JSONDecodeError) as e:
|
|
logger.debug(f"Failed to parse metadata JSON: {e}")
|
|
return result
|
|
return None
|
|
|
|
def get_download_by_media_id(self, media_id: str, platform: str = 'instagram', method: str = None) -> Optional[Dict]:
|
|
"""
|
|
Get download record by Instagram media ID
|
|
|
|
Args:
|
|
media_id: Instagram media ID to search for
|
|
platform: Platform filter (default 'instagram')
|
|
method: Optional method filter (fastdl, imginn, toolzu, instaloader)
|
|
|
|
Returns:
|
|
Dictionary with download record including post_date and filename, or None if not found
|
|
"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Use indexed media_id column for fast lookup (10-100x faster than LIKE)
|
|
logger.debug(f"Searching for media_id={media_id}, platform={platform}, method={method}")
|
|
|
|
if method:
|
|
cursor.execute('''
|
|
SELECT
|
|
id, url, platform, source, content_type,
|
|
filename, file_path, post_date, download_date,
|
|
metadata, method
|
|
FROM downloads
|
|
WHERE platform = ?
|
|
AND method = ?
|
|
AND media_id = ?
|
|
LIMIT 1
|
|
''', (platform, method, media_id))
|
|
else:
|
|
cursor.execute('''
|
|
SELECT
|
|
id, url, platform, source, content_type,
|
|
filename, file_path, post_date, download_date,
|
|
metadata, method
|
|
FROM downloads
|
|
WHERE platform = ?
|
|
AND media_id = ?
|
|
LIMIT 1
|
|
''', (platform, media_id))
|
|
|
|
row = cursor.fetchone()
|
|
if row:
|
|
# Convert Row object to dictionary
|
|
result = dict(row)
|
|
# Parse post_date if it's a string
|
|
if result.get('post_date'):
|
|
try:
|
|
result['post_date'] = datetime.fromisoformat(result['post_date'])
|
|
except (ValueError, TypeError) as e:
|
|
logger.debug(f"Failed to parse post_date: {e}")
|
|
# Parse metadata JSON if present
|
|
if result.get('metadata'):
|
|
try:
|
|
result['metadata'] = json.loads(result['metadata'])
|
|
except (ValueError, TypeError, json.JSONDecodeError) as e:
|
|
logger.debug(f"Failed to parse metadata JSON: {e}")
|
|
return result
|
|
return None
|
|
|
|
def mark_fastdl_upgraded(self, media_id: str) -> bool:
|
|
"""
|
|
Mark a FastDL record as upgraded with Toolzu high-res version
|
|
|
|
Args:
|
|
media_id: Instagram media ID
|
|
|
|
Returns:
|
|
True if successfully marked, False otherwise
|
|
"""
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Find the FastDL record (using indexed media_id column)
|
|
# Now uses platform='instagram' with method='fastdl'
|
|
cursor.execute('''
|
|
SELECT id, metadata FROM downloads
|
|
WHERE platform = 'instagram'
|
|
AND method = 'fastdl'
|
|
AND media_id = ?
|
|
LIMIT 1
|
|
''', (media_id,))
|
|
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return False
|
|
|
|
# Update metadata to mark as upgraded
|
|
record_id = row['id']
|
|
try:
|
|
metadata = json.loads(row['metadata']) if row['metadata'] else {}
|
|
except (ValueError, TypeError, json.JSONDecodeError) as e:
|
|
logger.debug(f"Failed to parse metadata for record {record_id}: {e}")
|
|
metadata = {}
|
|
|
|
metadata['upgraded'] = True
|
|
metadata['upgraded_date'] = datetime.now().isoformat()
|
|
|
|
cursor.execute('''
|
|
UPDATE downloads
|
|
SET metadata = ?
|
|
WHERE id = ?
|
|
''', (json.dumps(metadata), record_id))
|
|
|
|
conn.commit()
|
|
return True
|
|
|
|
def delete_downloads_by_date_range(self, platform: str, source: str = None,
|
|
days_back: int = 7) -> Dict[str, any]:
|
|
"""
|
|
Delete downloads from the last N days for a specific platform/source
|
|
|
|
Args:
|
|
platform: Platform to delete from ('instagram', 'tiktok', 'forum')
|
|
source: Optional source filter (username, forum name, etc.)
|
|
days_back: Number of days back to delete (default: 7)
|
|
|
|
Returns:
|
|
Dictionary with deletion statistics and file paths to delete
|
|
"""
|
|
cutoff_date = datetime.now() - timedelta(days=days_back)
|
|
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# First, get the records to be deleted (so we can delete files)
|
|
if source:
|
|
cursor.execute('''
|
|
SELECT id, file_path, filename, download_date, metadata
|
|
FROM downloads
|
|
WHERE platform = ?
|
|
AND source = ?
|
|
AND download_date >= ?
|
|
ORDER BY download_date DESC
|
|
''', (platform, source, cutoff_date.isoformat()))
|
|
else:
|
|
cursor.execute('''
|
|
SELECT id, file_path, filename, download_date, metadata
|
|
FROM downloads
|
|
WHERE platform = ?
|
|
AND download_date >= ?
|
|
ORDER BY download_date DESC
|
|
''', (platform, cutoff_date.isoformat()))
|
|
|
|
records = cursor.fetchall()
|
|
|
|
# Collect file paths and statistics
|
|
file_paths = []
|
|
record_ids = []
|
|
for row in records:
|
|
record_ids.append(row['id'])
|
|
if row['file_path']:
|
|
file_paths.append(row['file_path'])
|
|
|
|
# Delete the records
|
|
if record_ids:
|
|
placeholders = ','.join('?' * len(record_ids))
|
|
cursor.execute(f'''
|
|
DELETE FROM downloads
|
|
WHERE id IN ({placeholders})
|
|
''', record_ids)
|
|
|
|
deleted_count = cursor.rowcount
|
|
else:
|
|
deleted_count = 0
|
|
|
|
conn.commit()
|
|
|
|
return {
|
|
'deleted_count': deleted_count,
|
|
'file_paths': file_paths,
|
|
'cutoff_date': cutoff_date.isoformat(),
|
|
'platform': platform,
|
|
'source': source
|
|
}
|
|
|
|
def record_download(self,
|
|
url: str,
|
|
platform: str,
|
|
source: str,
|
|
content_type: str = None,
|
|
filename: str = None,
|
|
file_path: str = None,
|
|
file_size: int = None,
|
|
file_hash: str = None,
|
|
post_date: datetime = None,
|
|
status: str = 'completed',
|
|
error_message: str = None,
|
|
metadata: Dict = None,
|
|
method: str = None,
|
|
max_retries: int = 3) -> bool:
|
|
"""
|
|
Record a download in the unified database with retry logic
|
|
|
|
Note: Duplicate file hash checking is handled by MoveManager before recording
|
|
|
|
Returns:
|
|
True if successfully recorded, False if duplicate
|
|
"""
|
|
url_hash = self.get_url_hash(url)
|
|
|
|
# Extract media_id from metadata for fast queries
|
|
media_id = None
|
|
if metadata and isinstance(metadata, dict):
|
|
media_id = metadata.get('media_id') or metadata.get('video_id') or metadata.get('post_id')
|
|
|
|
# Use local time with T separator for download_date
|
|
download_date = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
|
|
|
|
for attempt in range(max_retries):
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute('''
|
|
INSERT INTO downloads (
|
|
url_hash, url, platform, source, content_type,
|
|
filename, file_path, file_size, file_hash,
|
|
post_date, download_date, status, error_message, metadata, media_id, method
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
''', (
|
|
url_hash, url, platform, source, content_type,
|
|
filename, file_path, file_size, file_hash,
|
|
post_date.isoformat() if post_date else None,
|
|
download_date,
|
|
status, error_message,
|
|
json.dumps(metadata) if metadata else None,
|
|
media_id, method
|
|
))
|
|
|
|
conn.commit()
|
|
return True
|
|
|
|
except sqlite3.IntegrityError:
|
|
# Duplicate entry - this is expected, not an error
|
|
return False
|
|
except sqlite3.OperationalError as e:
|
|
if _is_lock_error(e):
|
|
if attempt < max_retries - 1:
|
|
# Wait with exponential backoff
|
|
wait_time = min(5, (2 ** attempt) * 0.5)
|
|
logger.warning(f"Database locked, retrying in {wait_time}s (attempt {attempt + 1}/{max_retries})")
|
|
time.sleep(wait_time)
|
|
continue
|
|
else:
|
|
logger.error(f"Failed to record download after {max_retries} attempts: {e}")
|
|
return False
|
|
else:
|
|
logger.error(f"Database error: {e}")
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"Failed to record download: {e}")
|
|
return False
|
|
|
|
return False
|
|
|
|
def get_platform_stats(self, platform: str = None) -> Dict:
|
|
"""Get download statistics by platform"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
if platform:
|
|
cursor.execute('''
|
|
SELECT
|
|
COUNT(*) as total,
|
|
SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed,
|
|
SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed,
|
|
SUM(file_size) as total_size,
|
|
MIN(download_date) as first_download,
|
|
MAX(download_date) as last_download
|
|
FROM downloads
|
|
WHERE platform = ?
|
|
''', (platform,))
|
|
else:
|
|
cursor.execute('''
|
|
SELECT
|
|
platform,
|
|
COUNT(*) as total,
|
|
SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed,
|
|
SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed,
|
|
SUM(file_size) as total_size
|
|
FROM downloads
|
|
GROUP BY platform
|
|
''')
|
|
|
|
if platform:
|
|
row = cursor.fetchone()
|
|
if row:
|
|
return dict(row)
|
|
else:
|
|
return [dict(row) for row in cursor.fetchall()]
|
|
|
|
return {}
|
|
|
|
def update_file_location(self, url: str, platform: str, final_path: str,
|
|
final_hash: str = None, max_retries: int = 3) -> bool:
|
|
"""
|
|
Update the file_path and file_hash for a download record after moving
|
|
|
|
Args:
|
|
url: Original download URL
|
|
platform: Platform name
|
|
final_path: Final destination path after moving
|
|
final_hash: Optional SHA256 hash of final file (will calculate if not provided)
|
|
max_retries: Number of retries for locked database
|
|
|
|
Returns:
|
|
True if updated successfully, False otherwise
|
|
"""
|
|
import os
|
|
import time
|
|
|
|
url_hash = self.get_url_hash(url)
|
|
|
|
# Calculate hash if not provided
|
|
if not final_hash and os.path.exists(final_path):
|
|
try:
|
|
final_hash = self.get_file_hash(final_path)
|
|
except Exception:
|
|
pass # Continue without hash if calculation fails
|
|
|
|
for attempt in range(max_retries):
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute('''
|
|
UPDATE downloads
|
|
SET file_path = ?, file_hash = ?
|
|
WHERE url_hash = ? AND platform = ?
|
|
''', (final_path, final_hash, url_hash, platform))
|
|
|
|
conn.commit()
|
|
return cursor.rowcount > 0
|
|
|
|
except sqlite3.OperationalError as e:
|
|
if _is_lock_error(e):
|
|
if attempt < max_retries - 1:
|
|
time.sleep(0.1 * (2 ** attempt)) # Exponential backoff
|
|
continue
|
|
return False
|
|
except Exception as e:
|
|
return False
|
|
|
|
return False
|
|
|
|
def update_file_location_by_filename(self, filename: str, platform: str, source: str,
|
|
final_path: str, final_hash: str = None,
|
|
max_retries: int = 3) -> bool:
|
|
"""
|
|
Update the file_path and file_hash for a download record by filename
|
|
|
|
Args:
|
|
filename: Original filename in database
|
|
platform: Platform name
|
|
source: Source (username/forum name)
|
|
final_path: Final destination path after moving
|
|
final_hash: Optional SHA256 hash of final file (will calculate if not provided)
|
|
max_retries: Number of retries for locked database
|
|
|
|
Returns:
|
|
True if updated successfully, False otherwise
|
|
"""
|
|
import os
|
|
import time
|
|
|
|
# Calculate hash if not provided
|
|
if not final_hash and os.path.exists(final_path):
|
|
try:
|
|
final_hash = self.get_file_hash(final_path)
|
|
except Exception:
|
|
pass # Continue without hash if calculation fails
|
|
|
|
for attempt in range(max_retries):
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute('''
|
|
UPDATE downloads
|
|
SET file_path = ?, file_hash = ?
|
|
WHERE filename = ? AND platform = ? AND source = ?
|
|
''', (final_path, final_hash, filename, platform, source))
|
|
|
|
conn.commit()
|
|
return cursor.rowcount > 0
|
|
|
|
except sqlite3.OperationalError as e:
|
|
if _is_lock_error(e):
|
|
if attempt < max_retries - 1:
|
|
time.sleep(0.1 * (2 ** attempt)) # Exponential backoff
|
|
continue
|
|
return False
|
|
except Exception as e:
|
|
return False
|
|
|
|
return False
|
|
|
|
def get_scheduler_state(self, task_id: str) -> Optional[Dict]:
|
|
"""Get scheduler state for a task"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT * FROM scheduler_state WHERE task_id = ?
|
|
''', (task_id,))
|
|
row = cursor.fetchone()
|
|
return dict(row) if row else None
|
|
|
|
def update_scheduler_state(self, task_id: str, last_run: datetime = None,
|
|
next_run: datetime = None, status: str = None,
|
|
error: str = None, metadata: Dict = None) -> bool:
|
|
"""Update scheduler state for a task"""
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Check if record exists
|
|
cursor.execute('SELECT task_id FROM scheduler_state WHERE task_id = ?', (task_id,))
|
|
exists = cursor.fetchone() is not None
|
|
|
|
if exists:
|
|
# Update existing record
|
|
updates = []
|
|
params = []
|
|
|
|
if last_run:
|
|
updates.append('last_run = ?')
|
|
params.append(last_run)
|
|
if next_run:
|
|
updates.append('next_run = ?')
|
|
params.append(next_run)
|
|
if status:
|
|
updates.append('status = ?')
|
|
params.append(status)
|
|
if error:
|
|
updates.append('last_error = ?')
|
|
updates.append('error_count = error_count + 1')
|
|
params.append(error)
|
|
else:
|
|
updates.append('run_count = run_count + 1')
|
|
if metadata:
|
|
updates.append('metadata = ?')
|
|
params.append(json.dumps(metadata))
|
|
|
|
params.append(task_id)
|
|
cursor.execute(f'''
|
|
UPDATE scheduler_state
|
|
SET {', '.join(updates)}
|
|
WHERE task_id = ?
|
|
''', params)
|
|
else:
|
|
# Insert new record
|
|
cursor.execute('''
|
|
INSERT INTO scheduler_state (task_id, last_run, next_run, status, metadata)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
''', (task_id, last_run, next_run, status or 'active',
|
|
json.dumps(metadata) if metadata else None))
|
|
|
|
conn.commit()
|
|
return True
|
|
|
|
def get_all_scheduler_states(self) -> List[Dict]:
|
|
"""Get all scheduler states"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('SELECT * FROM scheduler_state ORDER BY task_id')
|
|
return [dict(row) for row in cursor.fetchall()]
|
|
|
|
def was_thread_checked_recently(self, thread_id: str, hours: int = 6) -> bool:
|
|
"""Check if a thread was checked recently within specified hours"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT COUNT(*) as count FROM thread_check_history
|
|
WHERE thread_id = ?
|
|
AND check_time > datetime('now', ? || ' hours')
|
|
AND status = 'completed'
|
|
''', (thread_id, f'-{hours}'))
|
|
result = cursor.fetchone()
|
|
return result['count'] > 0 if result else False
|
|
|
|
def record_thread_check(self, thread_id: str, forum_name: str,
|
|
last_post_date: datetime = None,
|
|
new_posts: int = 0, images: int = 0,
|
|
status: str = 'completed') -> bool:
|
|
"""Record that a thread was checked"""
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
try:
|
|
cursor.execute('''
|
|
INSERT INTO thread_check_history
|
|
(thread_id, forum_name, last_post_date, new_posts_found,
|
|
images_downloaded, status)
|
|
VALUES (?, ?, ?, ?, ?, ?)
|
|
''', (thread_id, forum_name, last_post_date, new_posts, images, status))
|
|
conn.commit()
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error recording thread check: {e}", module="Forum")
|
|
return False
|
|
|
|
def get_thread_last_check(self, thread_id: str) -> Optional[Dict]:
|
|
"""Get the last check information for a thread"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT * FROM thread_check_history
|
|
WHERE thread_id = ?
|
|
ORDER BY check_time DESC
|
|
LIMIT 1
|
|
''', (thread_id,))
|
|
row = cursor.fetchone()
|
|
return dict(row) if row else None
|
|
|
|
def add_to_queue(self,
|
|
url: str,
|
|
platform: str,
|
|
source: str = None,
|
|
referer: str = None,
|
|
save_path: str = None,
|
|
priority: int = 5,
|
|
metadata: Dict = None) -> bool:
|
|
"""Add item to download queue with retry logic"""
|
|
for attempt in range(3):
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute('''
|
|
INSERT INTO download_queue (
|
|
url, platform, source, referer, save_path,
|
|
priority, metadata
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
''', (
|
|
url, platform, source, referer, save_path,
|
|
priority, json.dumps(metadata) if metadata else None
|
|
))
|
|
|
|
conn.commit()
|
|
return True
|
|
|
|
except sqlite3.IntegrityError:
|
|
# Already in queue - this is expected
|
|
return False
|
|
except sqlite3.OperationalError as e:
|
|
if _is_lock_error(e) and attempt < 2:
|
|
time.sleep(0.5 * (2 ** attempt))
|
|
continue
|
|
return False
|
|
except Exception:
|
|
return False
|
|
return False
|
|
|
|
def get_queue_items(self, platform: str = None, status: str = 'pending', limit: int = 100) -> List[Dict]:
|
|
"""Get items from download queue"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
if platform:
|
|
cursor.execute('''
|
|
SELECT * FROM download_queue
|
|
WHERE platform = ? AND status = ?
|
|
ORDER BY priority ASC, created_date ASC
|
|
LIMIT ?
|
|
''', (platform, status, limit))
|
|
else:
|
|
cursor.execute('''
|
|
SELECT * FROM download_queue
|
|
WHERE status = ?
|
|
ORDER BY priority ASC, created_date ASC
|
|
LIMIT ?
|
|
''', (status, limit))
|
|
|
|
return [dict(row) for row in cursor.fetchall()]
|
|
|
|
def update_queue_status(self, queue_id: int, status: str, error_message: str = None):
|
|
"""Update queue item status with retry logic"""
|
|
for attempt in range(3):
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
if error_message:
|
|
cursor.execute('''
|
|
UPDATE download_queue
|
|
SET status = ?, error_message = ?, attempts = attempts + 1
|
|
WHERE id = ?
|
|
''', (status, error_message, queue_id))
|
|
else:
|
|
cursor.execute('''
|
|
UPDATE download_queue
|
|
SET status = ?, download_date = CURRENT_TIMESTAMP
|
|
WHERE id = ?
|
|
''', (status, queue_id))
|
|
|
|
conn.commit()
|
|
return
|
|
|
|
except sqlite3.OperationalError as e:
|
|
if _is_lock_error(e) and attempt < 2:
|
|
time.sleep(0.5 * (2 ** attempt))
|
|
continue
|
|
logger.error(f"Failed to update queue status: {e}")
|
|
return
|
|
except Exception as e:
|
|
logger.error(f"Error updating queue status: {e}")
|
|
return
|
|
|
|
def migrate_from_old_databases(self,
|
|
fastdl_db: str = None,
|
|
tiktok_db: str = None,
|
|
forum_dbs: List[str] = None,
|
|
verbose: bool = True) -> Dict[str, int]:
|
|
"""
|
|
Migrate data from old separate databases to unified database
|
|
|
|
Args:
|
|
fastdl_db: Path to FastDL database
|
|
tiktok_db: Path to TikTok database
|
|
forum_dbs: List of paths to forum databases
|
|
verbose: Print progress messages
|
|
|
|
Returns:
|
|
Dictionary with migration statistics
|
|
"""
|
|
stats = {
|
|
'fastdl': 0,
|
|
'tiktok': 0,
|
|
'forum': 0,
|
|
'errors': 0
|
|
}
|
|
|
|
# Migrate FastDL
|
|
if fastdl_db and Path(fastdl_db).exists():
|
|
if verbose:
|
|
print(f"Migrating FastDL database: {fastdl_db}")
|
|
|
|
old_conn = sqlite3.connect(fastdl_db)
|
|
old_conn.row_factory = sqlite3.Row
|
|
cursor = old_conn.cursor()
|
|
|
|
cursor.execute("SELECT * FROM downloads")
|
|
for row in cursor.fetchall():
|
|
try:
|
|
# Convert media_id to URL (best effort)
|
|
url = row['download_url'] if row['download_url'] else f"instagram://{row['media_id']}"
|
|
|
|
self.record_download(
|
|
url=url,
|
|
platform='instagram',
|
|
source=row['username'],
|
|
content_type=row['content_type'],
|
|
filename=row['filename'],
|
|
post_date=datetime.fromisoformat(row['post_date']) if row['post_date'] else None,
|
|
metadata={'media_id': row['media_id'], 'original_metadata': row['metadata']}
|
|
)
|
|
stats['fastdl'] += 1
|
|
except Exception as e:
|
|
if verbose:
|
|
print(f" Error migrating FastDL record: {e}")
|
|
stats['errors'] += 1
|
|
|
|
old_conn.close()
|
|
|
|
# Migrate TikTok
|
|
if tiktok_db and Path(tiktok_db).exists():
|
|
if verbose:
|
|
print(f"Migrating TikTok database: {tiktok_db}")
|
|
|
|
old_conn = sqlite3.connect(tiktok_db)
|
|
old_conn.row_factory = sqlite3.Row
|
|
cursor = old_conn.cursor()
|
|
|
|
cursor.execute("SELECT * FROM downloads")
|
|
for row in cursor.fetchall():
|
|
try:
|
|
url = f"https://www.tiktok.com/@{row['username']}/video/{row['video_id']}"
|
|
|
|
self.record_download(
|
|
url=url,
|
|
platform='tiktok',
|
|
source=row['username'],
|
|
content_type='video',
|
|
filename=row['filename'],
|
|
post_date=datetime.fromisoformat(row['post_date']) if row['post_date'] else None,
|
|
metadata={'video_id': row['video_id'], 'original_metadata': row['metadata']}
|
|
)
|
|
stats['tiktok'] += 1
|
|
except Exception as e:
|
|
if verbose:
|
|
print(f" Error migrating TikTok record: {e}")
|
|
stats['errors'] += 1
|
|
|
|
old_conn.close()
|
|
|
|
# Migrate Forum databases
|
|
if forum_dbs:
|
|
for forum_db in forum_dbs:
|
|
if Path(forum_db).exists():
|
|
if verbose:
|
|
print(f"Migrating Forum database: {forum_db}")
|
|
|
|
old_conn = sqlite3.connect(forum_db)
|
|
old_conn.row_factory = sqlite3.Row
|
|
cursor = old_conn.cursor()
|
|
|
|
# Extract forum name from filename (e.g., forum_PicturePub.db -> PicturePub)
|
|
forum_name = Path(forum_db).stem.replace('forum_', '')
|
|
|
|
# Migrate threads
|
|
try:
|
|
cursor.execute("SELECT * FROM threads")
|
|
with self.get_connection(for_write=True) as conn:
|
|
new_cursor = conn.cursor()
|
|
for row in cursor.fetchall():
|
|
try:
|
|
new_cursor.execute('''
|
|
INSERT OR IGNORE INTO forum_threads (
|
|
thread_id, forum_name, thread_url, thread_title,
|
|
author, created_date, last_checked, last_post_date,
|
|
post_count, status, monitor_until, metadata
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
''', (
|
|
row['thread_id'], forum_name, row['thread_url'],
|
|
row['thread_title'], row['author'], row['created_date'],
|
|
row['last_checked'], row['last_post_date'],
|
|
row['post_count'], row['status'],
|
|
row['monitor_until'], row['metadata']
|
|
))
|
|
except Exception as e:
|
|
if verbose:
|
|
print(f" Error migrating thread: {e}")
|
|
stats['errors'] += 1
|
|
conn.commit()
|
|
except Exception as e:
|
|
if verbose:
|
|
print(f" Error migrating threads: {e}")
|
|
logger.warning(f"Failed to migrate threads from {forum_db}: {e}")
|
|
|
|
# Migrate download queue items as downloads
|
|
try:
|
|
cursor.execute("SELECT * FROM download_queue WHERE status = 'completed'")
|
|
for row in cursor.fetchall():
|
|
try:
|
|
self.record_download(
|
|
url=row['url'],
|
|
platform='forums',
|
|
source=forum_name,
|
|
content_type='image',
|
|
file_path=row['save_path'],
|
|
metadata={
|
|
'thread_id': row['thread_id'],
|
|
'post_id': row['post_id'],
|
|
'original_metadata': row['metadata']
|
|
}
|
|
)
|
|
stats['forum'] += 1
|
|
except Exception as e:
|
|
if verbose:
|
|
print(f" Error migrating forum download: {e}")
|
|
stats['errors'] += 1
|
|
except Exception as e:
|
|
if verbose:
|
|
print(f" Error migrating forum downloads: {e}")
|
|
logger.warning(f"Failed to migrate forum downloads from {forum_db}: {e}")
|
|
|
|
old_conn.close()
|
|
|
|
if verbose:
|
|
print(f"\nMigration complete:")
|
|
print(f" FastDL records: {stats['fastdl']}")
|
|
print(f" TikTok records: {stats['tiktok']}")
|
|
print(f" Forum records: {stats['forum']}")
|
|
print(f" Errors: {stats['errors']}")
|
|
print(f" Total migrated: {stats['fastdl'] + stats['tiktok'] + stats['forum']}")
|
|
|
|
return stats
|
|
|
|
def log_face_recognition_scan(self, file_path: str, has_match: bool,
|
|
matched_person: str = None, confidence: float = None,
|
|
face_count: int = 0, scan_type: str = 'auto') -> bool:
|
|
"""
|
|
Log a face recognition scan result
|
|
|
|
Args:
|
|
file_path: Path to the scanned file
|
|
has_match: Whether a face was matched
|
|
matched_person: Name of matched person (if any)
|
|
confidence: Match confidence (0-1)
|
|
face_count: Number of faces detected
|
|
scan_type: Type of scan ('auto', 'manual', 'retroactive')
|
|
|
|
Returns:
|
|
True if logged successfully
|
|
"""
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Try to find download_id from file_path
|
|
cursor.execute('SELECT id FROM downloads WHERE file_path = ? LIMIT 1', (file_path,))
|
|
row = cursor.fetchone()
|
|
download_id = row['id'] if row else None
|
|
|
|
# Insert scan result
|
|
# Convert numpy float to Python float for proper numeric storage
|
|
confidence_value = float(confidence) if confidence is not None else None
|
|
cursor.execute('''
|
|
INSERT INTO face_recognition_scans
|
|
(download_id, file_path, has_match, matched_person, confidence, face_count, scan_type)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
''', (download_id, file_path, has_match, matched_person, confidence_value, face_count, scan_type))
|
|
|
|
conn.commit()
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Failed to log face recognition scan: {e}")
|
|
return False
|
|
|
|
@staticmethod
|
|
def _convert_face_scan_row(row) -> Dict:
|
|
"""Convert a face_recognition_scans row, handling bytes→proper types."""
|
|
import struct
|
|
result = dict(row)
|
|
for key, value in result.items():
|
|
if isinstance(value, bytes):
|
|
if key == 'confidence':
|
|
try:
|
|
result[key] = struct.unpack('d', value)[0]
|
|
except (struct.error, ValueError):
|
|
result[key] = None
|
|
else:
|
|
try:
|
|
result[key] = value.decode('utf-8')
|
|
except (UnicodeDecodeError, AttributeError):
|
|
result[key] = None
|
|
return result
|
|
|
|
def get_face_recognition_result(self, file_path: str) -> Optional[Dict]:
|
|
"""
|
|
Get face recognition result for a file
|
|
|
|
Args:
|
|
file_path: Path to the file
|
|
|
|
Returns:
|
|
Dictionary with scan result or None
|
|
"""
|
|
try:
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Try exact path match first
|
|
cursor.execute('''
|
|
SELECT has_match, matched_person, confidence, face_count, scan_date, scan_type
|
|
FROM face_recognition_scans
|
|
WHERE file_path = ?
|
|
ORDER BY scan_date DESC
|
|
LIMIT 1
|
|
''', (file_path,))
|
|
|
|
row = cursor.fetchone()
|
|
if row:
|
|
return self._convert_face_scan_row(row)
|
|
|
|
# If no exact match, try matching by filename (in case file was moved)
|
|
import os
|
|
filename = os.path.basename(file_path)
|
|
cursor.execute('''
|
|
SELECT has_match, matched_person, confidence, face_count, scan_date, scan_type
|
|
FROM face_recognition_scans
|
|
WHERE file_path LIKE ?
|
|
ORDER BY scan_date DESC
|
|
LIMIT 1
|
|
''', (f'%{filename}',))
|
|
|
|
row = cursor.fetchone()
|
|
if row:
|
|
return self._convert_face_scan_row(row)
|
|
return None
|
|
except Exception as e:
|
|
logger.error(f"Failed to get face recognition result: {e}")
|
|
return None
|
|
|
|
def get_face_recognition_results_batch(self, file_paths: List[str]) -> Dict[str, Dict]:
|
|
"""
|
|
Get face recognition results for multiple files in a single query.
|
|
|
|
Args:
|
|
file_paths: List of file paths to look up
|
|
|
|
Returns:
|
|
Dictionary mapping file_path -> face recognition result dict
|
|
"""
|
|
if not file_paths:
|
|
return {}
|
|
|
|
try:
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Use a single query with IN clause for batch lookup
|
|
placeholders = ','.join(['?' for _ in file_paths])
|
|
cursor.execute(f'''
|
|
SELECT file_path, has_match, matched_person, confidence, face_count, scan_date, scan_type
|
|
FROM face_recognition_scans
|
|
WHERE file_path IN ({placeholders})
|
|
''', file_paths)
|
|
|
|
results = {}
|
|
for row in cursor.fetchall():
|
|
file_path = row['file_path']
|
|
results[file_path] = {
|
|
'has_match': row['has_match'],
|
|
'matched_person': row['matched_person'],
|
|
'confidence': row['confidence'],
|
|
'face_count': row['face_count'],
|
|
'scan_date': row['scan_date'],
|
|
'scan_type': row['scan_type']
|
|
}
|
|
|
|
return results
|
|
except Exception as e:
|
|
logger.error(f"Failed to batch get face recognition results: {e}")
|
|
return {}
|
|
|
|
def _get_dimensions_batch(self, file_paths: List[str]) -> Dict[str, tuple]:
|
|
"""
|
|
Batch lookup dimensions from media_metadata.db - avoids N+1 queries.
|
|
|
|
Args:
|
|
file_paths: List of file paths to look up
|
|
|
|
Returns:
|
|
Dictionary mapping file_path -> (width, height) tuple
|
|
"""
|
|
import hashlib
|
|
import sqlite3
|
|
|
|
if not file_paths:
|
|
return {}
|
|
|
|
try:
|
|
metadata_db_path = self.db_path.parent / 'media_metadata.db'
|
|
|
|
# Build hash -> path mapping
|
|
hash_to_path = {}
|
|
for fp in file_paths:
|
|
file_hash = hashlib.sha256(fp.encode()).hexdigest()
|
|
hash_to_path[file_hash] = fp
|
|
|
|
# Query all at once
|
|
conn = sqlite3.connect(str(metadata_db_path))
|
|
placeholders = ','.join('?' * len(hash_to_path))
|
|
cursor = conn.execute(
|
|
f"SELECT file_hash, width, height FROM media_metadata WHERE file_hash IN ({placeholders})",
|
|
list(hash_to_path.keys())
|
|
)
|
|
|
|
# Build result mapping by file_path
|
|
result = {}
|
|
for row in cursor.fetchall():
|
|
file_hash, width, height = row
|
|
if file_hash in hash_to_path:
|
|
result[hash_to_path[file_hash]] = (width, height)
|
|
|
|
conn.close()
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to batch get dimensions: {e}")
|
|
return {}
|
|
|
|
# ==================== Recycle Bin Methods ====================
|
|
|
|
def move_to_recycle_bin(self, file_path: str, deleted_from: str, deleted_by: str = None, metadata: dict = None) -> Optional[str]:
|
|
"""
|
|
Move a file to the recycle bin (soft delete)
|
|
|
|
Args:
|
|
file_path: Original file path to delete
|
|
deleted_from: Where file was deleted from ('downloads', 'media', 'review')
|
|
deleted_by: Username of person who deleted
|
|
metadata: Additional metadata (platform, source, content_type, etc.)
|
|
|
|
Returns:
|
|
UUID of recycle bin entry, or None if failed
|
|
"""
|
|
import uuid
|
|
import shutil
|
|
import os
|
|
|
|
try:
|
|
source_path = Path(file_path)
|
|
if not source_path.exists():
|
|
logger.error(f"File not found for recycle: {file_path}")
|
|
return None
|
|
|
|
# Generate UUID for unique filename
|
|
recycle_id = str(uuid.uuid4())
|
|
file_extension = source_path.suffix
|
|
original_filename = source_path.name
|
|
file_size = source_path.stat().st_size
|
|
original_mtime = source_path.stat().st_mtime
|
|
|
|
# Create recycle directory if needed
|
|
recycle_dir = Path("/opt/immich/recycle")
|
|
recycle_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Build recycle path with UUID
|
|
recycle_path = recycle_dir / f"{recycle_id}{file_extension}"
|
|
|
|
# Record in database (do this BEFORE moving file)
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Get metadata from file_inventory to preserve it for restoration
|
|
cursor.execute('''
|
|
SELECT platform, source, content_type, width, height, created_date FROM file_inventory
|
|
WHERE file_path = ?
|
|
''', (file_path,))
|
|
inventory_row = cursor.fetchone()
|
|
|
|
# Get dates from downloads table to preserve for restoration
|
|
cursor.execute('''
|
|
SELECT post_date, download_date FROM downloads
|
|
WHERE file_path = ?
|
|
ORDER BY download_date DESC LIMIT 1
|
|
''', (file_path,))
|
|
downloads_row = cursor.fetchone()
|
|
|
|
# Merge file_inventory metadata with passed metadata
|
|
full_metadata = metadata.copy() if metadata else {}
|
|
if inventory_row:
|
|
full_metadata.setdefault('platform', inventory_row['platform'])
|
|
full_metadata.setdefault('source', inventory_row['source'])
|
|
full_metadata.setdefault('content_type', inventory_row['content_type'])
|
|
if inventory_row['width']:
|
|
full_metadata.setdefault('width', inventory_row['width'])
|
|
if inventory_row['height']:
|
|
full_metadata.setdefault('height', inventory_row['height'])
|
|
if downloads_row:
|
|
full_metadata.setdefault('post_date', downloads_row['post_date'])
|
|
full_metadata.setdefault('download_date', downloads_row['download_date'])
|
|
|
|
# Fallback: Use file_inventory.created_date as download_date if not from downloads
|
|
if 'download_date' not in full_metadata and inventory_row and inventory_row['created_date']:
|
|
full_metadata['download_date'] = inventory_row['created_date']
|
|
|
|
# Fallback: Extract post_date from filename if not available (format: YYYYMMDD_HHMMSS_...)
|
|
if 'post_date' not in full_metadata:
|
|
import re
|
|
date_match = re.match(r'^[^_]*_(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})_', original_filename)
|
|
if date_match:
|
|
full_metadata['post_date'] = f"{date_match.group(1)}-{date_match.group(2)}-{date_match.group(3)}T{date_match.group(4)}:{date_match.group(5)}:{date_match.group(6)}"
|
|
|
|
# CRITICAL: Force content_type to be 'image' or 'video' based on file extension
|
|
ext = source_path.suffix.lower()
|
|
image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.heic', '.heif', '.webp', '.bmp', '.tiff'}
|
|
full_metadata['content_type'] = 'image' if ext in image_exts else 'video'
|
|
|
|
# Calculate file hash BEFORE moving file (for duplicate detection)
|
|
file_hash = None
|
|
try:
|
|
file_hash = UnifiedDatabase.get_file_hash(str(source_path))
|
|
except Exception as e:
|
|
logger.warning(f"Failed to calculate hash for recycle bin: {e}")
|
|
|
|
# Check if this hash already exists in recycle bin (prevent internal duplicates)
|
|
if file_hash:
|
|
cursor.execute("SELECT id, original_filename FROM recycle_bin WHERE file_hash = ? LIMIT 1", (file_hash,))
|
|
existing = cursor.fetchone()
|
|
if existing:
|
|
logger.info(f"File hash already in recycle bin (existing: {existing['original_filename']}), skipping duplicate: {original_filename}")
|
|
|
|
# Delete the source file since we already have this in recycle bin
|
|
try:
|
|
source_path.unlink()
|
|
logger.debug(f"Deleted duplicate file: {source_path}")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to delete duplicate file {source_path}: {e}")
|
|
|
|
# Still need to clean up the database records (downloads, file_inventory)
|
|
cursor.execute('DELETE FROM file_inventory WHERE file_path = ?', (file_path,))
|
|
|
|
# Delete from downloads table
|
|
cursor.execute('SELECT url FROM downloads WHERE file_path = ? LIMIT 1', (file_path,))
|
|
url_row = cursor.fetchone()
|
|
if url_row and url_row['url']:
|
|
cursor.execute('DELETE FROM downloads WHERE url = ?', (url_row['url'],))
|
|
else:
|
|
cursor.execute('DELETE FROM downloads WHERE file_path = ?', (file_path,))
|
|
|
|
return existing['id'] # Return existing recycle bin ID
|
|
|
|
# Update perceptual hash path BEFORE moving file (so duplicate detection still works)
|
|
cursor.execute('''
|
|
UPDATE instagram_perceptual_hashes SET file_path = ? WHERE file_path = ?
|
|
''', (str(recycle_path), file_path))
|
|
if cursor.rowcount > 0:
|
|
logger.debug(f"Updated perceptual hash path to recycle: {original_filename}")
|
|
|
|
# Update face recognition scan path
|
|
cursor.execute('''
|
|
UPDATE face_recognition_scans SET file_path = ? WHERE file_path = ?
|
|
''', (str(recycle_path), file_path))
|
|
|
|
# Update semantic embeddings path
|
|
try:
|
|
cursor.execute('''
|
|
UPDATE semantic_embeddings SET file_path = ? WHERE file_path = ?
|
|
''', (str(recycle_path), file_path))
|
|
except sqlite3.OperationalError:
|
|
pass # Table may not exist
|
|
|
|
# Move file to recycle bin (preserves mtime)
|
|
shutil.move(str(source_path), str(recycle_path))
|
|
|
|
# Restore original mtime in case shutil.move changed it
|
|
os.utime(str(recycle_path), (original_mtime, original_mtime))
|
|
|
|
# Insert into recycle_bin
|
|
cursor.execute('''
|
|
INSERT INTO recycle_bin
|
|
(id, original_path, original_filename, recycle_path, file_extension,
|
|
file_size, original_mtime, deleted_from, deleted_by, metadata, restore_count, file_hash)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, ?)
|
|
''', (
|
|
recycle_id,
|
|
str(source_path),
|
|
original_filename,
|
|
str(recycle_path),
|
|
file_extension,
|
|
file_size,
|
|
original_mtime,
|
|
deleted_from,
|
|
deleted_by,
|
|
json.dumps(full_metadata, default=str),
|
|
file_hash
|
|
))
|
|
|
|
# Update file_inventory to 'recycle' location (preserves tags and collections)
|
|
# Don't delete - just update location and path so tags/collections are preserved for restore
|
|
cursor.execute('''
|
|
UPDATE file_inventory
|
|
SET location = 'recycle', file_path = ?, last_verified = CURRENT_TIMESTAMP
|
|
WHERE file_path = ?
|
|
''', (str(recycle_path), file_path))
|
|
if cursor.rowcount > 0:
|
|
logger.debug(f"Updated file_inventory location to recycle: {original_filename}")
|
|
|
|
# Delete content embeddings when moving to recycle bin
|
|
# (will be regenerated if file is restored to final)
|
|
cursor.execute('''
|
|
DELETE FROM content_embeddings
|
|
WHERE file_id IN (SELECT id FROM file_inventory WHERE file_path = ?)
|
|
''', (str(recycle_path),))
|
|
if cursor.rowcount > 0:
|
|
logger.debug(f"Deleted content embedding for recycled file: {original_filename}")
|
|
|
|
# Delete from downloads table - for carousel posts, delete ALL entries with same URL
|
|
# First get the URL for this file
|
|
cursor.execute('SELECT url FROM downloads WHERE file_path = ? LIMIT 1', (file_path,))
|
|
url_row = cursor.fetchone()
|
|
|
|
if url_row and url_row['url']:
|
|
# Delete all downloads with this URL (handles carousel posts with multiple files)
|
|
cursor.execute('DELETE FROM downloads WHERE url = ?', (url_row['url'],))
|
|
logger.info(f"Deleted {cursor.rowcount} downloads records for URL: {url_row['url']}")
|
|
else:
|
|
# Fallback to just deleting by file_path
|
|
cursor.execute('DELETE FROM downloads WHERE file_path = ?', (file_path,))
|
|
logger.info(f"Deleted {cursor.rowcount} downloads records")
|
|
|
|
logger.info(f"Moved to recycle bin: {original_filename} (ID: {recycle_id})")
|
|
return recycle_id
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to move file to recycle bin: {e}")
|
|
return None
|
|
|
|
def restore_from_recycle_bin(self, recycle_id: str) -> bool:
|
|
"""
|
|
Restore a file from recycle bin to its original location
|
|
|
|
Args:
|
|
recycle_id: UUID of recycle bin entry
|
|
|
|
Returns:
|
|
True if restored successfully
|
|
"""
|
|
import shutil
|
|
import os
|
|
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Get recycle bin record
|
|
cursor.execute('''
|
|
SELECT original_path, recycle_path, original_mtime, restore_count, original_filename,
|
|
deleted_from, file_size, metadata
|
|
FROM recycle_bin WHERE id = ?
|
|
''', (recycle_id,))
|
|
row = cursor.fetchone()
|
|
|
|
if not row:
|
|
logger.error(f"Recycle bin entry not found: {recycle_id}")
|
|
return False
|
|
|
|
original_path = Path(row['original_path'])
|
|
recycle_path = Path(row['recycle_path'])
|
|
original_mtime = row['original_mtime']
|
|
restore_count = row['restore_count']
|
|
original_filename = row['original_filename']
|
|
deleted_from = row['deleted_from']
|
|
file_size = row['file_size']
|
|
metadata = json.loads(row['metadata']) if row['metadata'] else {}
|
|
|
|
if not recycle_path.exists():
|
|
logger.error(f"Recycle file not found: {recycle_path}")
|
|
return False
|
|
|
|
# Extract metadata
|
|
platform = metadata.get('platform', 'unknown')
|
|
# Normalize platform name (forum -> forums for consistency)
|
|
if platform == 'forum':
|
|
platform = 'forums'
|
|
source = metadata.get('source', 'unknown')
|
|
|
|
# Infer platform/source from original path if not in metadata
|
|
original_path_str_lower = str(original_path).lower()
|
|
if platform == 'unknown':
|
|
if '/instagram/' in original_path_str_lower:
|
|
platform = 'instagram'
|
|
elif '/tiktok/' in original_path_str_lower:
|
|
platform = 'tiktok'
|
|
elif '/snapchat/' in original_path_str_lower:
|
|
platform = 'snapchat'
|
|
elif '/reddit/' in original_path_str_lower:
|
|
platform = 'reddit'
|
|
if source == 'unknown' and original_filename:
|
|
import re
|
|
# Extract username from filename pattern: username_YYYYMMDD_...
|
|
src_match = re.match(r'^(.+?)[-_](\d{8})[-_]', original_filename)
|
|
if src_match:
|
|
source = src_match.group(1)
|
|
|
|
# CRITICAL: Determine content_type from file extension only (must be 'image' or 'video')
|
|
ext = original_path.suffix.lower()
|
|
image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.heic', '.heif', '.webp', '.bmp', '.tiff'}
|
|
content_type = 'image' if ext in image_exts else 'video'
|
|
|
|
# Check if original_path is a temp folder - if so, redirect to proper destination
|
|
original_path_str = str(original_path)
|
|
if '/temp/' in original_path_str or original_path_str.startswith('/opt/media-downloader/temp/'):
|
|
# File was never moved from temp - determine correct destination
|
|
logger.warning(f"Original path is temp folder, redirecting to proper destination: {original_path}")
|
|
|
|
# Determine destination based on platform and content type from path
|
|
if '/instagram/' in original_path_str or platform == 'instagram':
|
|
if '/stories/' in original_path_str:
|
|
dest_base = Path('/opt/immich/md/social media/instagram/stories')
|
|
elif '/tagged/' in original_path_str:
|
|
dest_base = Path('/opt/immich/review/social media/instagram/tagged')
|
|
else:
|
|
dest_base = Path('/opt/immich/md/social media/instagram/posts')
|
|
elif '/tiktok/' in original_path_str or platform == 'tiktok':
|
|
dest_base = Path('/opt/immich/md/social media/tiktok/reels')
|
|
elif '/snapchat/' in original_path_str or platform == 'snapchat':
|
|
dest_base = Path('/opt/immich/md/social media/snapchat')
|
|
else:
|
|
# Default to review folder
|
|
dest_base = Path('/opt/immich/review')
|
|
|
|
# For Instagram/TikTok, add username subdirectory
|
|
if platform in ('instagram', 'tiktok') or '/instagram/' in original_path_str or '/tiktok/' in original_path_str:
|
|
import re
|
|
username_match = re.match(r'^(.+?)_(\d{8})_', original_filename)
|
|
# For tagged content, extract username from filename (poster's username)
|
|
# because source in database is the monitored account, not the poster
|
|
if '/tagged/' in original_path_str and username_match:
|
|
dest_base = dest_base / username_match.group(1)
|
|
elif source and source != 'unknown':
|
|
dest_base = dest_base / source
|
|
elif username_match:
|
|
# Fallback: extract username from filename
|
|
dest_base = dest_base / username_match.group(1)
|
|
|
|
final_path = dest_base / original_filename
|
|
logger.info(f"Redirected restore path from temp to: {final_path}")
|
|
else:
|
|
# Normal case - restore to original path
|
|
# But check if Instagram/TikTok file needs username subdirectory added
|
|
final_path = original_path
|
|
|
|
if platform in ('instagram', 'tiktok') or '/instagram/' in original_path_str or '/tiktok/' in original_path_str:
|
|
# Check if path is missing username subdirectory
|
|
# Pattern: .../instagram/stories/filename (missing username)
|
|
# vs: .../instagram/stories/username/filename (has username)
|
|
import re
|
|
path_parts = original_path_str.split('/')
|
|
# Find the content type folder (stories, posts, reels, tagged)
|
|
content_folders = ['stories', 'posts', 'reels', 'tagged']
|
|
for i, part in enumerate(path_parts):
|
|
if part in content_folders and i + 1 < len(path_parts):
|
|
# Check if next part is the filename (no username subdirectory)
|
|
next_part = path_parts[i + 1]
|
|
if next_part == original_filename:
|
|
# Missing username subdirectory - add it
|
|
# For tagged content, extract username from filename (poster's username)
|
|
# because source in database is the monitored account, not the poster
|
|
username_match = re.match(r'^(.+?)_(\d{8})_', original_filename)
|
|
if part == 'tagged' and username_match:
|
|
# Tagged content - use poster's username from filename
|
|
username = username_match.group(1)
|
|
elif source and source != 'unknown':
|
|
username = source
|
|
elif username_match:
|
|
# Fallback: extract from filename
|
|
username = username_match.group(1)
|
|
else:
|
|
username = None
|
|
|
|
if username:
|
|
# Insert username subdirectory
|
|
new_parts = path_parts[:i+1] + [username] + path_parts[i+1:]
|
|
final_path = Path('/'.join(new_parts))
|
|
logger.info(f"Added username subdirectory to restore path: {final_path}")
|
|
break
|
|
|
|
# Check if final path already exists
|
|
if final_path.exists():
|
|
logger.warning(f"File already exists at destination: {final_path}")
|
|
return False
|
|
|
|
# Ensure parent directory exists
|
|
final_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Move file from recycle bin to final location
|
|
shutil.move(str(recycle_path), str(final_path))
|
|
|
|
try:
|
|
# Restore original mtime
|
|
os.utime(str(final_path), (original_mtime, original_mtime))
|
|
|
|
# Update file_inventory with restored path (preserves tags and collections)
|
|
# Use T separator for ISO 8601 format (consistent timezone handling in frontend)
|
|
from datetime import datetime
|
|
created_date = datetime.fromtimestamp(original_mtime).strftime("%Y-%m-%dT%H:%M:%S")
|
|
|
|
# Determine location based on where it was deleted from
|
|
location = 'review' if deleted_from == 'review' else 'final'
|
|
|
|
# First try to update existing record (file_inventory row kept during recycle)
|
|
# Also restore platform/source from metadata in case they were lost
|
|
cursor.execute('''
|
|
UPDATE file_inventory
|
|
SET file_path = ?, location = ?, last_verified = CURRENT_TIMESTAMP,
|
|
platform = CASE WHEN platform = 'unknown' OR platform IS NULL THEN ? ELSE platform END,
|
|
source = CASE WHEN source = 'unknown' OR source IS NULL THEN ? ELSE source END
|
|
WHERE file_path = ?
|
|
''', (str(final_path), location, platform, source, str(recycle_path)))
|
|
|
|
if cursor.rowcount == 0:
|
|
# No existing record - insert new one (for legacy entries before this fix)
|
|
cursor.execute('''
|
|
INSERT INTO file_inventory (
|
|
file_path, filename, platform, source, content_type,
|
|
file_size, location, created_date, last_verified
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
|
|
ON CONFLICT(file_path) DO UPDATE SET
|
|
filename = excluded.filename,
|
|
platform = excluded.platform,
|
|
source = excluded.source,
|
|
content_type = excluded.content_type,
|
|
file_size = excluded.file_size,
|
|
location = excluded.location,
|
|
created_date = excluded.created_date,
|
|
last_verified = CURRENT_TIMESTAMP
|
|
''', (str(final_path), original_filename, platform, source, content_type,
|
|
file_size, location, created_date))
|
|
else:
|
|
logger.debug(f"Updated existing file_inventory record for restored file: {original_filename}")
|
|
|
|
# Re-add to downloads table so file appears in UI
|
|
media_id = metadata.get('media_id', original_filename.split('_')[-1].split('.')[0])
|
|
url = metadata.get('url', '')
|
|
|
|
# Use original dates from metadata if available, otherwise use created_date
|
|
post_date = metadata.get('post_date') or created_date
|
|
download_date = metadata.get('download_date') or created_date
|
|
|
|
# Generate url_hash for the unique constraint
|
|
import hashlib
|
|
url_hash = hashlib.sha256(url.encode()).hexdigest() if url else hashlib.sha256(original_filename.encode()).hexdigest()
|
|
|
|
cursor.execute('''
|
|
INSERT INTO downloads (
|
|
url_hash, url, media_id, platform, source, filename, file_path, file_size,
|
|
post_date, download_date, status
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'completed')
|
|
ON CONFLICT(url_hash) DO UPDATE SET
|
|
filename = excluded.filename,
|
|
file_path = excluded.file_path,
|
|
file_size = excluded.file_size,
|
|
post_date = excluded.post_date,
|
|
download_date = excluded.download_date,
|
|
status = 'completed'
|
|
''', (url_hash, url, media_id, platform, source, original_filename, str(final_path),
|
|
file_size, post_date, download_date))
|
|
|
|
# Update perceptual hash path (from recycle to restored location)
|
|
cursor.execute('''
|
|
UPDATE instagram_perceptual_hashes SET file_path = ? WHERE file_path = ?
|
|
''', (str(final_path), str(recycle_path)))
|
|
if cursor.rowcount > 0:
|
|
logger.debug(f"Updated perceptual hash path from recycle: {original_filename}")
|
|
|
|
# Update face recognition scan path
|
|
cursor.execute('''
|
|
UPDATE face_recognition_scans SET file_path = ? WHERE file_path = ?
|
|
''', (str(final_path), str(recycle_path)))
|
|
|
|
# Update semantic embeddings path
|
|
try:
|
|
cursor.execute('''
|
|
UPDATE semantic_embeddings SET file_path = ? WHERE file_path = ?
|
|
''', (str(final_path), str(recycle_path)))
|
|
except sqlite3.OperationalError:
|
|
pass # Table may not exist
|
|
|
|
# Queue for discovery scan if restoring to 'final' (media library)
|
|
if location == 'final':
|
|
cursor.execute('SELECT id FROM file_inventory WHERE file_path = ?', (str(final_path),))
|
|
inv_row = cursor.fetchone()
|
|
if inv_row:
|
|
file_id = inv_row['id']
|
|
cursor.execute('''
|
|
INSERT INTO discovery_scan_queue (file_id, file_path, scan_type, priority, status)
|
|
VALUES (?, ?, 'embedding', 8, 'pending')
|
|
ON CONFLICT(file_id, scan_type) DO NOTHING
|
|
''', (file_id, str(final_path)))
|
|
if cursor.rowcount > 0:
|
|
logger.debug(f"Queued for discovery scan after restore: {original_filename}")
|
|
|
|
# Delete from recycle_bin table (do this last so if file_inventory insert fails, transaction rolls back)
|
|
cursor.execute('DELETE FROM recycle_bin WHERE id = ?', (recycle_id,))
|
|
|
|
logger.info(f"Restored from recycle bin: {original_filename} (restored {restore_count + 1} times)")
|
|
return True
|
|
|
|
except Exception as db_error:
|
|
# If database operations fail, move file back to recycle bin
|
|
logger.error(f"Database operation failed during restore, moving file back to recycle bin: {db_error}")
|
|
try:
|
|
shutil.move(str(final_path), str(recycle_path))
|
|
except Exception as move_error:
|
|
logger.error(f"CRITICAL: Failed to move file back to recycle bin: {move_error}")
|
|
raise
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to restore from recycle bin: {e}")
|
|
return False
|
|
|
|
def permanently_delete_from_recycle_bin(self, recycle_id: str) -> bool:
|
|
"""
|
|
Permanently delete a file from recycle bin
|
|
|
|
Args:
|
|
recycle_id: UUID of recycle bin entry
|
|
|
|
Returns:
|
|
True if deleted successfully
|
|
"""
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Get recycle bin record
|
|
cursor.execute('SELECT recycle_path, original_filename FROM recycle_bin WHERE id = ?', (recycle_id,))
|
|
row = cursor.fetchone()
|
|
|
|
if not row:
|
|
logger.error(f"Recycle bin entry not found: {recycle_id}")
|
|
return False
|
|
|
|
recycle_path = Path(row['recycle_path'])
|
|
original_filename = row['original_filename']
|
|
|
|
# Delete physical file
|
|
if recycle_path.exists():
|
|
recycle_path.unlink()
|
|
|
|
# Delete from database
|
|
cursor.execute('DELETE FROM recycle_bin WHERE id = ?', (recycle_id,))
|
|
|
|
logger.info(f"Permanently deleted: {original_filename}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to permanently delete: {e}")
|
|
return False
|
|
|
|
def list_recycle_bin(self, deleted_from: str = None, platform: str = None, source: str = None,
|
|
search: str = None, media_type: str = None, date_from: str = None,
|
|
date_to: str = None, size_min: int = None, size_max: int = None,
|
|
sort_by: str = 'deleted_at', sort_order: str = 'desc',
|
|
limit: int = 100, offset: int = 0) -> Dict:
|
|
"""
|
|
List files in recycle bin
|
|
|
|
Args:
|
|
deleted_from: Filter by source ('downloads', 'media', 'review'), or None for all
|
|
platform: Filter by platform (instagram, tiktok, etc.)
|
|
source: Filter by source/username
|
|
search: Search in filename
|
|
media_type: Filter by type ('image', 'video', or None for all)
|
|
date_from: Filter by deletion date (YYYY-MM-DD)
|
|
date_to: Filter by deletion date (YYYY-MM-DD)
|
|
size_min: Minimum file size in bytes
|
|
size_max: Maximum file size in bytes
|
|
sort_by: Column to sort by ('deleted_at', 'file_size', 'filename', 'deleted_from')
|
|
sort_order: Sort direction ('asc' or 'desc')
|
|
limit: Maximum number of results
|
|
offset: Offset for pagination
|
|
|
|
Returns:
|
|
Dict with 'items' (list of recycle bin entries) and 'total' (total count for filter)
|
|
"""
|
|
try:
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Build WHERE clause with table alias 'r.' from the start
|
|
# to avoid fragile chained .replace() calls
|
|
conditions = []
|
|
params = []
|
|
|
|
if deleted_from:
|
|
conditions.append('r.deleted_from = ?')
|
|
params.append(deleted_from)
|
|
|
|
if platform:
|
|
# Platform is stored in metadata JSON
|
|
conditions.append("json_extract(r.metadata, '$.platform') = ?")
|
|
params.append(platform)
|
|
|
|
if source:
|
|
# Source is stored in metadata JSON
|
|
conditions.append("json_extract(r.metadata, '$.source') = ?")
|
|
params.append(source)
|
|
|
|
if search:
|
|
conditions.append('r.original_filename LIKE ?')
|
|
params.append(f'%{search}%')
|
|
|
|
if media_type == 'image':
|
|
conditions.append("LOWER(r.file_extension) IN ('.jpg', '.jpeg', '.png', '.heic', '.heif', '.webp', '.bmp', '.avif', '.tiff', '.tif', '.gif')")
|
|
elif media_type == 'video':
|
|
conditions.append("LOWER(r.file_extension) IN ('.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv', '.m4v')")
|
|
|
|
if date_from:
|
|
conditions.append('r.deleted_at >= ?')
|
|
params.append(date_from)
|
|
|
|
if date_to:
|
|
conditions.append('r.deleted_at <= ?')
|
|
params.append(date_to + ' 23:59:59')
|
|
|
|
if size_min is not None:
|
|
conditions.append('r.file_size >= ?')
|
|
params.append(size_min)
|
|
|
|
if size_max is not None:
|
|
conditions.append('r.file_size <= ?')
|
|
params.append(size_max)
|
|
|
|
where_clause = ' AND '.join(conditions) if conditions else '1=1'
|
|
|
|
# Validate and build ORDER BY clause
|
|
valid_sort_columns = {
|
|
'deleted_at': 'r.deleted_at',
|
|
'file_size': 'r.file_size',
|
|
'filename': 'r.original_filename',
|
|
'deleted_from': 'r.deleted_from',
|
|
'download_date': "COALESCE(json_extract(r.metadata, '$.download_date'), CAST(r.deleted_at AS TEXT))",
|
|
'post_date': "COALESCE(json_extract(r.metadata, '$.post_date'), json_extract(r.metadata, '$.download_date'), CAST(r.deleted_at AS TEXT))",
|
|
'confidence': "COALESCE(fr.confidence, 0)"
|
|
}
|
|
sort_column = valid_sort_columns.get(sort_by, "COALESCE(json_extract(r.metadata, '$.download_date'), CAST(r.deleted_at AS TEXT))")
|
|
sort_dir = 'ASC' if sort_order == 'asc' else 'DESC'
|
|
|
|
# Get total count for the filter
|
|
cursor.execute(f'SELECT COUNT(*) as count FROM recycle_bin r WHERE {where_clause}', params)
|
|
total_count = cursor.fetchone()['count']
|
|
|
|
# Get paginated items with face recognition data
|
|
cursor.execute(f'''
|
|
SELECT r.id, r.original_path, r.original_filename, r.file_extension, r.file_size,
|
|
r.original_mtime, r.deleted_from, r.deleted_at, r.deleted_by, r.metadata, r.restore_count, r.recycle_path,
|
|
fr.has_match as face_has_match, fr.matched_person as face_matched_person,
|
|
fr.confidence as face_confidence, fr.face_count
|
|
FROM recycle_bin r
|
|
LEFT JOIN face_recognition_scans fr ON r.recycle_path = fr.file_path
|
|
WHERE {where_clause}
|
|
ORDER BY {sort_column} {sort_dir}
|
|
LIMIT ? OFFSET ?
|
|
''', params + [limit, offset])
|
|
|
|
rows = cursor.fetchall()
|
|
|
|
# Batch lookup dimensions from media_metadata.db for items missing width/height
|
|
dimensions_cache = {}
|
|
paths_needing_dimensions = []
|
|
for row in rows:
|
|
meta = {}
|
|
if row['metadata']:
|
|
try:
|
|
meta = json.loads(row['metadata'])
|
|
except (json.JSONDecodeError, TypeError):
|
|
pass
|
|
if not meta.get('width') or not meta.get('height'):
|
|
# Use original_path for dimension lookup
|
|
if row['original_path']:
|
|
paths_needing_dimensions.append(row['original_path'])
|
|
|
|
if paths_needing_dimensions:
|
|
dimensions_cache = self._get_dimensions_batch(paths_needing_dimensions)
|
|
|
|
items = []
|
|
for row in rows:
|
|
item = dict(row)
|
|
width, height = None, None
|
|
platform, source = None, None
|
|
download_date, post_date = None, None
|
|
|
|
# Try to get metadata from JSON first
|
|
video_id = None
|
|
if item.get('metadata'):
|
|
try:
|
|
meta = json.loads(item['metadata'])
|
|
width = meta.get('width')
|
|
height = meta.get('height')
|
|
platform = meta.get('platform')
|
|
source = meta.get('source')
|
|
download_date = meta.get('download_date')
|
|
post_date = meta.get('post_date')
|
|
video_id = meta.get('video_id')
|
|
except (json.JSONDecodeError, TypeError):
|
|
pass
|
|
|
|
# If dimensions not in metadata, check batch cache
|
|
if (not width or not height) and item.get('original_path'):
|
|
cached_dims = dimensions_cache.get(item['original_path'])
|
|
if cached_dims:
|
|
width, height = cached_dims
|
|
|
|
# Try to extract platform/source from filename if not in metadata
|
|
if not platform or not source:
|
|
filename = item.get('original_filename', '')
|
|
if filename and '_' in filename:
|
|
# Pattern: username_date_id... -> source is first part
|
|
parts = filename.split('_')
|
|
if len(parts) >= 2:
|
|
potential_source = parts[0]
|
|
# Validate it looks like a username (not a random prefix)
|
|
if len(potential_source) >= 3 and potential_source.replace('.', '').isalnum():
|
|
if not source:
|
|
source = potential_source
|
|
if not platform:
|
|
platform = 'instagram' # Default assumption for this filename pattern
|
|
|
|
if width:
|
|
item['width'] = width
|
|
if height:
|
|
item['height'] = height
|
|
if platform:
|
|
item['platform'] = platform
|
|
if source:
|
|
item['source'] = source
|
|
if download_date:
|
|
item['download_date'] = download_date
|
|
if post_date:
|
|
item['post_date'] = post_date
|
|
if video_id:
|
|
item['video_id'] = video_id
|
|
|
|
# Add face recognition data if available
|
|
if item.get('face_has_match') is not None or item.get('face_confidence') is not None:
|
|
item['face_recognition'] = {
|
|
'scanned': True,
|
|
'matched': bool(item.get('face_has_match')),
|
|
'matched_person': item.get('face_matched_person'),
|
|
'confidence': item.get('face_confidence'),
|
|
'face_count': item.get('face_count')
|
|
}
|
|
# Clean up raw face fields
|
|
for key in ['face_has_match', 'face_matched_person', 'face_confidence', 'face_count']:
|
|
item.pop(key, None)
|
|
|
|
items.append(item)
|
|
return {
|
|
'items': items,
|
|
'total': total_count
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to list recycle bin: {e}")
|
|
return {'items': [], 'total': 0}
|
|
|
|
def get_recycle_bin_filters(self, platform: str = None) -> Dict:
|
|
"""
|
|
Get available filter options for recycle bin
|
|
|
|
Args:
|
|
platform: If provided, only return sources for this platform
|
|
|
|
Returns:
|
|
Dict with 'platforms' and 'sources' lists
|
|
"""
|
|
# Valid platform names (exclude download methods like 'fastdl', 'imginn', etc.)
|
|
valid_platforms = {
|
|
'instagram', 'tiktok', 'twitter', 'youtube', 'reddit',
|
|
'forums', 'erome', 'fapello', 'bunkr', 'coomer', 'kemono',
|
|
'onlyfans', 'fansly', 'patreon', 'pornhub', 'xvideos',
|
|
'redgifs', 'imgur', 'gfycat', 'streamable', 'vimeo',
|
|
'bilibili', 'coppermine', 'snapchat', 'facebook', 'pinterest',
|
|
'tumblr', 'flickr', 'dailymotion', 'twitch'
|
|
}
|
|
|
|
try:
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Get distinct platforms from metadata
|
|
cursor.execute('''
|
|
SELECT DISTINCT json_extract(metadata, '$.platform') as platform
|
|
FROM recycle_bin
|
|
WHERE json_extract(metadata, '$.platform') IS NOT NULL
|
|
ORDER BY platform
|
|
''')
|
|
# Filter to only valid platforms
|
|
platforms = [
|
|
row['platform'] for row in cursor.fetchall()
|
|
if row['platform'] and row['platform'].lower() in valid_platforms
|
|
]
|
|
|
|
# Get distinct sources, optionally filtered by platform
|
|
if platform:
|
|
cursor.execute('''
|
|
SELECT DISTINCT json_extract(metadata, '$.source') as source
|
|
FROM recycle_bin
|
|
WHERE json_extract(metadata, '$.platform') = ?
|
|
AND json_extract(metadata, '$.source') IS NOT NULL
|
|
ORDER BY source
|
|
''', (platform,))
|
|
else:
|
|
cursor.execute('''
|
|
SELECT DISTINCT json_extract(metadata, '$.source') as source
|
|
FROM recycle_bin
|
|
WHERE json_extract(metadata, '$.source') IS NOT NULL
|
|
ORDER BY source
|
|
''')
|
|
sources = [row['source'] for row in cursor.fetchall() if row['source']]
|
|
|
|
return {
|
|
'platforms': platforms,
|
|
'sources': sources
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get recycle bin filters: {e}")
|
|
return {'platforms': [], 'sources': []}
|
|
|
|
def get_recycle_bin_stats(self) -> Dict:
|
|
"""
|
|
Get recycle bin statistics
|
|
|
|
Returns:
|
|
Dict with stats: total_count, total_size, count_by_source
|
|
"""
|
|
try:
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Total count and size
|
|
cursor.execute('SELECT COUNT(*), COALESCE(SUM(file_size), 0) FROM recycle_bin')
|
|
row = cursor.fetchone()
|
|
total_count = row[0]
|
|
total_size = row[1]
|
|
|
|
# Count by source
|
|
cursor.execute('''
|
|
SELECT deleted_from, COUNT(*), COALESCE(SUM(file_size), 0)
|
|
FROM recycle_bin
|
|
GROUP BY deleted_from
|
|
''')
|
|
by_source = {}
|
|
for row in cursor.fetchall():
|
|
by_source[row[0]] = {
|
|
'count': row[1],
|
|
'size': row[2]
|
|
}
|
|
|
|
return {
|
|
'total_count': total_count,
|
|
'total_size': total_size,
|
|
'by_source': by_source
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get recycle bin stats: {e}")
|
|
return {'total_count': 0, 'total_size': 0, 'by_source': {}}
|
|
|
|
def empty_recycle_bin(self, older_than_days: int = None) -> int:
|
|
"""
|
|
Empty recycle bin (delete all or files older than X days)
|
|
|
|
Args:
|
|
older_than_days: Only delete files older than this many days, or None for all
|
|
|
|
Returns:
|
|
Number of files deleted
|
|
"""
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
if older_than_days:
|
|
# Delete files older than X days
|
|
cursor.execute('''
|
|
SELECT id, recycle_path FROM recycle_bin
|
|
WHERE deleted_at < datetime('now', ? || ' days')
|
|
''', (f'-{older_than_days}',))
|
|
else:
|
|
# Delete all files
|
|
cursor.execute('SELECT id, recycle_path FROM recycle_bin')
|
|
|
|
rows = cursor.fetchall()
|
|
deleted_count = 0
|
|
|
|
for row in rows:
|
|
recycle_id = row['id']
|
|
recycle_path = Path(row['recycle_path'])
|
|
|
|
# Delete physical file
|
|
if recycle_path.exists():
|
|
recycle_path.unlink()
|
|
|
|
# Delete from database
|
|
cursor.execute('DELETE FROM recycle_bin WHERE id = ?', (recycle_id,))
|
|
deleted_count += 1
|
|
|
|
logger.info(f"Emptied recycle bin: {deleted_count} files deleted")
|
|
return deleted_count
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to empty recycle bin: {e}")
|
|
return 0
|
|
|
|
# ==================== File Inventory Methods ====================
|
|
|
|
def upsert_file_inventory(self, file_path: str, filename: str, platform: str,
|
|
source: str = None, content_type: str = None,
|
|
file_size: int = None, file_hash: str = None,
|
|
width: int = None, height: int = None,
|
|
location: str = 'final', metadata: dict = None,
|
|
created_date: str = None, method: str = None,
|
|
video_id: str = None,
|
|
max_retries: int = DB_MAX_RETRIES) -> bool:
|
|
"""
|
|
Insert or update file in inventory
|
|
|
|
Args:
|
|
file_path: Absolute path to file
|
|
filename: Filename for display
|
|
platform: Platform name (instagram, tiktok, snapchat, forum, coppermine)
|
|
source: Username, forum name, etc.
|
|
content_type: 'image' or 'video'
|
|
file_size: File size in bytes
|
|
file_hash: SHA256 hash for deduplication
|
|
width: Image/video width
|
|
height: Image/video height
|
|
location: 'final', 'review', or 'recycle'
|
|
metadata: Additional metadata as dict (will be JSON encoded)
|
|
created_date: Optional creation date (ISO format string or timestamp)
|
|
method: Download method (fastdl, imginn, toolzu, instaloader) for Instagram
|
|
video_id: Video ID for YouTube/other platforms (for thumbnail lookup)
|
|
max_retries: Maximum retry attempts for database lock errors
|
|
|
|
Returns:
|
|
bool: True if successful
|
|
"""
|
|
for attempt in range(max_retries):
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Convert metadata dict to JSON string
|
|
metadata_json = json.dumps(metadata) if metadata else None
|
|
|
|
# Use provided created_date or default to CURRENT_TIMESTAMP
|
|
if created_date:
|
|
cursor.execute('''
|
|
INSERT INTO file_inventory (
|
|
file_path, filename, platform, source, content_type,
|
|
file_size, file_hash, width, height, location, metadata,
|
|
created_date, last_verified, method, video_id
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, ?, ?)
|
|
ON CONFLICT(file_path) DO UPDATE SET
|
|
filename = excluded.filename,
|
|
platform = excluded.platform,
|
|
source = excluded.source,
|
|
content_type = excluded.content_type,
|
|
file_size = excluded.file_size,
|
|
file_hash = excluded.file_hash,
|
|
width = excluded.width,
|
|
height = excluded.height,
|
|
location = excluded.location,
|
|
metadata = excluded.metadata,
|
|
created_date = excluded.created_date,
|
|
last_verified = CURRENT_TIMESTAMP,
|
|
method = excluded.method,
|
|
video_id = COALESCE(excluded.video_id, file_inventory.video_id)
|
|
''', (file_path, filename, platform, source, content_type,
|
|
file_size, file_hash, width, height, location, metadata_json, created_date, method, video_id))
|
|
else:
|
|
cursor.execute('''
|
|
INSERT INTO file_inventory (
|
|
file_path, filename, platform, source, content_type,
|
|
file_size, file_hash, width, height, location, metadata,
|
|
created_date, last_verified, method, video_id
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?)
|
|
ON CONFLICT(file_path) DO UPDATE SET
|
|
filename = excluded.filename,
|
|
platform = excluded.platform,
|
|
source = excluded.source,
|
|
content_type = excluded.content_type,
|
|
file_size = excluded.file_size,
|
|
file_hash = excluded.file_hash,
|
|
width = excluded.width,
|
|
height = excluded.height,
|
|
location = excluded.location,
|
|
metadata = excluded.metadata,
|
|
last_verified = CURRENT_TIMESTAMP,
|
|
method = excluded.method,
|
|
video_id = COALESCE(excluded.video_id, file_inventory.video_id)
|
|
''', (file_path, filename, platform, source, content_type,
|
|
file_size, file_hash, width, height, location, metadata_json, method, video_id))
|
|
|
|
# Auto-queue for discovery scan if location is 'final' (media library)
|
|
if location == 'final':
|
|
# Get the file_id for queueing
|
|
cursor.execute('SELECT id FROM file_inventory WHERE file_path = ?', (file_path,))
|
|
row = cursor.fetchone()
|
|
if row:
|
|
file_id = row['id']
|
|
# Queue for embedding generation (low priority since it's background)
|
|
cursor.execute('''
|
|
INSERT INTO discovery_scan_queue (file_id, file_path, scan_type, priority, status)
|
|
VALUES (?, ?, 'embedding', 8, 'pending')
|
|
ON CONFLICT(file_id, scan_type) DO NOTHING
|
|
''', (file_id, file_path))
|
|
if cursor.rowcount > 0:
|
|
logger.debug(f"Queued for discovery scan: {filename}")
|
|
|
|
return True
|
|
|
|
except sqlite3.OperationalError as e:
|
|
if _is_lock_error(e):
|
|
if attempt < max_retries - 1:
|
|
delay = min(DB_MAX_DELAY, DB_BASE_DELAY * (2 ** attempt))
|
|
logger.warning(f"Database locked during file inventory upsert, retrying in {delay:.1f}s (attempt {attempt + 1}/{max_retries})")
|
|
time.sleep(delay)
|
|
continue
|
|
else:
|
|
logger.error(f"Failed to upsert file inventory after {max_retries} attempts: {e}")
|
|
return False
|
|
else:
|
|
logger.error(f"Database error during file inventory upsert: {e}")
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"Failed to upsert file inventory for {file_path}: {e}")
|
|
return False
|
|
|
|
return False
|
|
|
|
def delete_file_inventory(self, file_path: str, max_retries: int = DB_MAX_RETRIES) -> bool:
|
|
"""
|
|
Remove file from inventory (when permanently deleted)
|
|
|
|
Args:
|
|
file_path: Absolute path to file
|
|
max_retries: Maximum retry attempts for database lock errors
|
|
|
|
Returns:
|
|
bool: True if deleted
|
|
"""
|
|
for attempt in range(max_retries):
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('DELETE FROM file_inventory WHERE file_path = ?', (file_path,))
|
|
return cursor.rowcount > 0
|
|
|
|
except sqlite3.OperationalError as e:
|
|
if _is_lock_error(e):
|
|
if attempt < max_retries - 1:
|
|
delay = min(DB_MAX_DELAY, DB_BASE_DELAY * (2 ** attempt))
|
|
logger.warning(f"Database locked during file inventory delete, retrying in {delay:.1f}s (attempt {attempt + 1}/{max_retries})")
|
|
time.sleep(delay)
|
|
continue
|
|
else:
|
|
logger.error(f"Failed to delete file inventory after {max_retries} attempts: {e}")
|
|
return False
|
|
else:
|
|
logger.error(f"Database error during file inventory delete: {e}")
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"Failed to delete file inventory for {file_path}: {e}")
|
|
return False
|
|
|
|
return False
|
|
|
|
def query_file_inventory(self, location: str = None, platform: str = None,
|
|
source: str = None, content_type: str = None,
|
|
limit: int = 50, offset: int = 0) -> List[Dict]:
|
|
"""
|
|
Query file inventory with filters and pagination
|
|
|
|
Args:
|
|
location: Filter by location ('final', 'review', 'recycle')
|
|
platform: Filter by platform
|
|
source: Filter by source (username, forum name, etc.)
|
|
content_type: Filter by content type ('image', 'video')
|
|
limit: Maximum number of results
|
|
offset: Offset for pagination
|
|
|
|
Returns:
|
|
List of file records as dictionaries
|
|
"""
|
|
try:
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Build query dynamically
|
|
query = 'SELECT * FROM file_inventory WHERE 1=1'
|
|
params = []
|
|
|
|
if location:
|
|
query += ' AND location = ?'
|
|
params.append(location)
|
|
|
|
if platform:
|
|
query += ' AND platform = ?'
|
|
params.append(platform)
|
|
|
|
if source:
|
|
query += ' AND source = ?'
|
|
params.append(source)
|
|
|
|
if content_type:
|
|
query += ' AND content_type = ?'
|
|
params.append(content_type)
|
|
|
|
query += ' ORDER BY created_date DESC LIMIT ? OFFSET ?'
|
|
params.extend([limit, offset])
|
|
|
|
cursor.execute(query, params)
|
|
return [dict(row) for row in cursor.fetchall()]
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to query file inventory: {e}")
|
|
return []
|
|
|
|
def update_file_inventory_location(self, file_path: str, new_location: str, new_file_path: str = None,
|
|
max_retries: int = DB_MAX_RETRIES) -> bool:
|
|
"""
|
|
Update file location and optionally file path (e.g., final → review → recycle)
|
|
|
|
Args:
|
|
file_path: Current absolute path to file
|
|
new_location: New location ('final', 'review', 'recycle')
|
|
new_file_path: Optional new file path (if file was moved)
|
|
max_retries: Maximum retry attempts for database lock errors
|
|
|
|
Returns:
|
|
bool: True if updated
|
|
"""
|
|
for attempt in range(max_retries):
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Get file_id and current location before update
|
|
cursor.execute('SELECT id, location FROM file_inventory WHERE file_path = ?', (file_path,))
|
|
row = cursor.fetchone()
|
|
file_id = row['id'] if row else None
|
|
old_location = row['location'] if row else None
|
|
|
|
# Set moved flags based on source/destination
|
|
moved_from_review = 1 if (old_location == 'review' and new_location == 'final') else None
|
|
moved_from_media = 1 if (old_location == 'final' and new_location == 'review') else None
|
|
|
|
if new_file_path:
|
|
if moved_from_review:
|
|
cursor.execute('''
|
|
UPDATE file_inventory
|
|
SET location = ?, file_path = ?, last_verified = CURRENT_TIMESTAMP, moved_from_review = 1
|
|
WHERE file_path = ?
|
|
''', (new_location, new_file_path, file_path))
|
|
elif moved_from_media:
|
|
cursor.execute('''
|
|
UPDATE file_inventory
|
|
SET location = ?, file_path = ?, last_verified = CURRENT_TIMESTAMP, moved_from_media = 1
|
|
WHERE file_path = ?
|
|
''', (new_location, new_file_path, file_path))
|
|
else:
|
|
cursor.execute('''
|
|
UPDATE file_inventory
|
|
SET location = ?, file_path = ?, last_verified = CURRENT_TIMESTAMP
|
|
WHERE file_path = ?
|
|
''', (new_location, new_file_path, file_path))
|
|
else:
|
|
if moved_from_review:
|
|
cursor.execute('''
|
|
UPDATE file_inventory
|
|
SET location = ?, last_verified = CURRENT_TIMESTAMP, moved_from_review = 1
|
|
WHERE file_path = ?
|
|
''', (new_location, file_path))
|
|
elif moved_from_media:
|
|
cursor.execute('''
|
|
UPDATE file_inventory
|
|
SET location = ?, last_verified = CURRENT_TIMESTAMP, moved_from_media = 1
|
|
WHERE file_path = ?
|
|
''', (new_location, file_path))
|
|
else:
|
|
cursor.execute('''
|
|
UPDATE file_inventory
|
|
SET location = ?, last_verified = CURRENT_TIMESTAMP
|
|
WHERE file_path = ?
|
|
''', (new_location, file_path))
|
|
|
|
updated = cursor.rowcount > 0
|
|
|
|
if updated and file_id:
|
|
# Delete embeddings when moving AWAY from 'final' (to review or recycle)
|
|
if old_location == 'final' and new_location in ('review', 'recycle'):
|
|
cursor.execute('DELETE FROM content_embeddings WHERE file_id = ?', (file_id,))
|
|
if cursor.rowcount > 0:
|
|
logger.debug(f"Deleted embedding for file moved to {new_location}: {Path(file_path).name}")
|
|
|
|
# Auto-queue for discovery scan if moving TO 'final' (media library)
|
|
elif new_location == 'final':
|
|
actual_path = new_file_path or file_path
|
|
cursor.execute('''
|
|
INSERT INTO discovery_scan_queue (file_id, file_path, scan_type, priority, status)
|
|
VALUES (?, ?, 'embedding', 8, 'pending')
|
|
ON CONFLICT(file_id, scan_type) DO NOTHING
|
|
''', (file_id, actual_path))
|
|
if cursor.rowcount > 0:
|
|
logger.debug(f"Queued for discovery scan after move to final: {Path(actual_path).name}")
|
|
|
|
return updated
|
|
|
|
except sqlite3.OperationalError as e:
|
|
if _is_lock_error(e):
|
|
if attempt < max_retries - 1:
|
|
delay = min(DB_MAX_DELAY, DB_BASE_DELAY * (2 ** attempt))
|
|
logger.warning(f"Database locked during location update, retrying in {delay:.1f}s (attempt {attempt + 1}/{max_retries})")
|
|
time.sleep(delay)
|
|
continue
|
|
else:
|
|
logger.error(f"Failed to update file inventory location after {max_retries} attempts: {e}")
|
|
return False
|
|
else:
|
|
logger.error(f"Database error during location update: {e}")
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"Failed to update file inventory location for {file_path}: {e}")
|
|
return False
|
|
|
|
return False
|
|
|
|
def get_file_inventory_count(self, location: str = None, platform: str = None) -> int:
|
|
"""
|
|
Get count of files in inventory
|
|
|
|
Args:
|
|
location: Filter by location
|
|
platform: Filter by platform
|
|
|
|
Returns:
|
|
int: Count of files
|
|
"""
|
|
try:
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
query = 'SELECT COUNT(*) FROM file_inventory WHERE 1=1'
|
|
params = []
|
|
|
|
if location:
|
|
query += ' AND location = ?'
|
|
params.append(location)
|
|
|
|
if platform:
|
|
query += ' AND platform = ?'
|
|
params.append(platform)
|
|
|
|
cursor.execute(query, params)
|
|
return cursor.fetchone()[0]
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get file inventory count: {e}")
|
|
return 0
|
|
|
|
# ==================== Discovery Scan Queue Methods ====================
|
|
|
|
def queue_discovery_scan(self, file_id: int, file_path: str, scan_type: str = 'embedding', priority: int = 5) -> bool:
|
|
"""
|
|
Add a file to the discovery scan queue for background processing.
|
|
|
|
Args:
|
|
file_id: ID from file_inventory table
|
|
file_path: Path to the file
|
|
scan_type: Type of scan ('embedding', 'perceptual_hash', 'face_recognition')
|
|
priority: 1-10, lower = higher priority
|
|
|
|
Returns:
|
|
bool: True if queued successfully
|
|
"""
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
INSERT INTO discovery_scan_queue (file_id, file_path, scan_type, priority, status)
|
|
VALUES (?, ?, ?, ?, 'pending')
|
|
ON CONFLICT(file_id, scan_type) DO UPDATE SET
|
|
status = 'pending',
|
|
priority = MIN(excluded.priority, discovery_scan_queue.priority),
|
|
attempts = 0,
|
|
error_message = NULL
|
|
''', (file_id, file_path, scan_type, priority))
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Failed to queue discovery scan for file_id {file_id}: {e}")
|
|
return False
|
|
|
|
def queue_file_for_discovery(self, file_path: str, scan_types: List[str] = None, priority: int = 5) -> bool:
|
|
"""
|
|
Queue a file for all discovery scans (convenience method).
|
|
|
|
Args:
|
|
file_path: Path to the file
|
|
scan_types: List of scan types (default: ['embedding'])
|
|
priority: 1-10, lower = higher priority
|
|
|
|
Returns:
|
|
bool: True if queued successfully
|
|
"""
|
|
if scan_types is None:
|
|
scan_types = ['embedding']
|
|
|
|
try:
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('SELECT id FROM file_inventory WHERE file_path = ?', (file_path,))
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
logger.debug(f"File not in inventory, cannot queue for discovery: {file_path}")
|
|
return False
|
|
file_id = row['id']
|
|
|
|
success = True
|
|
for scan_type in scan_types:
|
|
if not self.queue_discovery_scan(file_id, file_path, scan_type, priority):
|
|
success = False
|
|
return success
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to queue file for discovery: {e}")
|
|
return False
|
|
|
|
def get_pending_discovery_scans(self, limit: int = 50, scan_type: str = None) -> List[Dict]:
|
|
"""
|
|
Get pending discovery scans from queue.
|
|
|
|
Args:
|
|
limit: Maximum number of items to return
|
|
scan_type: Filter by scan type (optional)
|
|
|
|
Returns:
|
|
List of queue items with file_id, file_path, scan_type
|
|
"""
|
|
try:
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
if scan_type:
|
|
cursor.execute('''
|
|
SELECT id, file_id, file_path, scan_type, priority, attempts
|
|
FROM discovery_scan_queue
|
|
WHERE status = 'pending' AND scan_type = ?
|
|
ORDER BY priority ASC, created_date ASC
|
|
LIMIT ?
|
|
''', (scan_type, limit))
|
|
else:
|
|
cursor.execute('''
|
|
SELECT id, file_id, file_path, scan_type, priority, attempts
|
|
FROM discovery_scan_queue
|
|
WHERE status = 'pending'
|
|
ORDER BY priority ASC, created_date ASC
|
|
LIMIT ?
|
|
''', (limit,))
|
|
|
|
return [dict(row) for row in cursor.fetchall()]
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get pending discovery scans: {e}")
|
|
return []
|
|
|
|
def mark_discovery_scan_started(self, queue_id: int) -> bool:
|
|
"""Mark a queue item as started (in progress)."""
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
UPDATE discovery_scan_queue
|
|
SET status = 'processing', started_date = CURRENT_TIMESTAMP, attempts = attempts + 1
|
|
WHERE id = ?
|
|
''', (queue_id,))
|
|
return cursor.rowcount > 0
|
|
except Exception as e:
|
|
logger.error(f"Failed to mark discovery scan started: {e}")
|
|
return False
|
|
|
|
def mark_discovery_scan_completed(self, queue_id: int) -> bool:
|
|
"""Mark a queue item as completed (remove from queue)."""
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('DELETE FROM discovery_scan_queue WHERE id = ?', (queue_id,))
|
|
return cursor.rowcount > 0
|
|
except Exception as e:
|
|
logger.error(f"Failed to mark discovery scan completed: {e}")
|
|
return False
|
|
|
|
def mark_discovery_scan_failed(self, queue_id: int, error_message: str, max_attempts: int = 3) -> bool:
|
|
"""Mark a queue item as failed, or remove if max attempts reached."""
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Check current attempts
|
|
cursor.execute('SELECT attempts FROM discovery_scan_queue WHERE id = ?', (queue_id,))
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return False
|
|
|
|
if row['attempts'] >= max_attempts:
|
|
# Max attempts reached, mark as permanently failed
|
|
cursor.execute('''
|
|
UPDATE discovery_scan_queue
|
|
SET status = 'failed', error_message = ?, completed_date = CURRENT_TIMESTAMP
|
|
WHERE id = ?
|
|
''', (error_message, queue_id))
|
|
else:
|
|
# Reset to pending for retry
|
|
cursor.execute('''
|
|
UPDATE discovery_scan_queue
|
|
SET status = 'pending', error_message = ?
|
|
WHERE id = ?
|
|
''', (error_message, queue_id))
|
|
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Failed to mark discovery scan failed: {e}")
|
|
return False
|
|
|
|
def get_discovery_queue_stats(self) -> Dict[str, Any]:
|
|
"""Get statistics about the discovery scan queue."""
|
|
try:
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute('''
|
|
SELECT
|
|
status,
|
|
scan_type,
|
|
COUNT(*) as count
|
|
FROM discovery_scan_queue
|
|
GROUP BY status, scan_type
|
|
''')
|
|
|
|
stats = {
|
|
'pending': 0,
|
|
'processing': 0,
|
|
'failed': 0,
|
|
'by_type': {}
|
|
}
|
|
|
|
for row in cursor.fetchall():
|
|
status = row['status']
|
|
scan_type = row['scan_type']
|
|
count = row['count']
|
|
|
|
if status in stats:
|
|
stats[status] += count
|
|
|
|
if scan_type not in stats['by_type']:
|
|
stats['by_type'][scan_type] = {'pending': 0, 'processing': 0, 'failed': 0}
|
|
if status in stats['by_type'][scan_type]:
|
|
stats['by_type'][scan_type][status] = count
|
|
|
|
return stats
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get discovery queue stats: {e}")
|
|
return {'pending': 0, 'processing': 0, 'failed': 0, 'by_type': {}}
|
|
|
|
def clear_discovery_queue(self, status: str = None, scan_type: str = None) -> int:
|
|
"""
|
|
Clear items from the discovery queue.
|
|
|
|
Args:
|
|
status: Only clear items with this status (optional)
|
|
scan_type: Only clear items with this scan type (optional)
|
|
|
|
Returns:
|
|
int: Number of items cleared
|
|
"""
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
query = 'DELETE FROM discovery_scan_queue WHERE 1=1'
|
|
params = []
|
|
|
|
if status:
|
|
query += ' AND status = ?'
|
|
params.append(status)
|
|
if scan_type:
|
|
query += ' AND scan_type = ?'
|
|
params.append(scan_type)
|
|
|
|
cursor.execute(query, params)
|
|
return cursor.rowcount
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to clear discovery queue: {e}")
|
|
return 0
|
|
|
|
# ==================== End Discovery Scan Queue Methods ====================
|
|
|
|
def validate_database_sync(self, fix_issues: bool = False) -> Dict[str, Any]:
|
|
"""Validate database synchronization and optionally fix issues
|
|
|
|
Args:
|
|
fix_issues: If True, automatically fix detected sync issues
|
|
|
|
Returns:
|
|
Dict containing validation results and issues found
|
|
"""
|
|
results = {
|
|
'recycled_out_of_sync': 0,
|
|
'temp_files_orphaned': 0,
|
|
'review_files_moved': 0,
|
|
'completed_without_inventory': 0,
|
|
'issues_fixed': 0
|
|
}
|
|
|
|
try:
|
|
with self.pool.get_connection(for_write=fix_issues) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Check 1: Files in recycle_bin but downloads table shows completed
|
|
cursor.execute('''
|
|
SELECT COUNT(*) FROM downloads d
|
|
INNER JOIN recycle_bin rb ON d.filename = rb.original_filename
|
|
WHERE d.status = 'completed'
|
|
''')
|
|
results['recycled_out_of_sync'] = cursor.fetchone()[0]
|
|
|
|
if fix_issues and results['recycled_out_of_sync'] > 0:
|
|
cursor.execute('''
|
|
UPDATE downloads
|
|
SET status = 'recycled',
|
|
file_path = (SELECT recycle_path FROM recycle_bin WHERE recycle_bin.original_filename = downloads.filename)
|
|
WHERE filename IN (SELECT original_filename FROM recycle_bin)
|
|
AND status = 'completed'
|
|
''')
|
|
results['issues_fixed'] += cursor.rowcount
|
|
|
|
# Check 2: Completed downloads with temp paths (orphaned)
|
|
cursor.execute('''
|
|
SELECT COUNT(*) FROM downloads
|
|
WHERE status = 'completed'
|
|
AND file_path LIKE '%/temp/%'
|
|
AND filename NOT IN (SELECT filename FROM file_inventory WHERE location = 'final')
|
|
''')
|
|
results['temp_files_orphaned'] = cursor.fetchone()[0]
|
|
|
|
if fix_issues and results['temp_files_orphaned'] > 0:
|
|
cursor.execute('''
|
|
UPDATE downloads
|
|
SET status = 'failed',
|
|
metadata = json_set(COALESCE(metadata, '{}'), '$.failure_reason', 'Orphaned temp file')
|
|
WHERE status = 'completed'
|
|
AND file_path LIKE '%/temp/%'
|
|
AND filename NOT IN (SELECT filename FROM file_inventory WHERE location = 'final')
|
|
''')
|
|
results['issues_fixed'] += cursor.rowcount
|
|
|
|
# Check 3: Files with review paths but actually in final location
|
|
cursor.execute('''
|
|
SELECT COUNT(*) FROM downloads d
|
|
WHERE d.file_path LIKE '%/review/%'
|
|
AND d.status = 'completed'
|
|
AND EXISTS (SELECT 1 FROM file_inventory fi WHERE fi.filename = d.filename AND fi.location = 'final')
|
|
''')
|
|
results['review_files_moved'] = cursor.fetchone()[0]
|
|
|
|
if fix_issues and results['review_files_moved'] > 0:
|
|
cursor.execute('''
|
|
UPDATE downloads
|
|
SET file_path = (
|
|
SELECT fi.file_path FROM file_inventory fi
|
|
WHERE fi.filename = downloads.filename AND fi.location = 'final' LIMIT 1
|
|
)
|
|
WHERE file_path LIKE '%/review/%'
|
|
AND status = 'completed'
|
|
AND filename IN (SELECT filename FROM file_inventory WHERE location = 'final')
|
|
''')
|
|
results['issues_fixed'] += cursor.rowcount
|
|
|
|
# Check 4: Completed downloads without file_inventory entry
|
|
cursor.execute('''
|
|
SELECT COUNT(*) FROM downloads d
|
|
WHERE d.status = 'completed'
|
|
AND d.file_path IS NOT NULL AND d.file_path <> ''
|
|
AND NOT EXISTS (SELECT 1 FROM file_inventory fi WHERE fi.filename = d.filename)
|
|
''')
|
|
results['completed_without_inventory'] = cursor.fetchone()[0]
|
|
|
|
if fix_issues and results['completed_without_inventory'] > 0:
|
|
cursor.execute('''
|
|
UPDATE downloads
|
|
SET status = 'failed',
|
|
metadata = json_set(COALESCE(metadata, '{}'), '$.failure_reason', 'File not in inventory')
|
|
WHERE status = 'completed'
|
|
AND file_path IS NOT NULL AND file_path <> ''
|
|
AND NOT EXISTS (SELECT 1 FROM file_inventory fi WHERE fi.filename = downloads.filename)
|
|
''')
|
|
results['issues_fixed'] += cursor.rowcount
|
|
|
|
if fix_issues:
|
|
conn.commit()
|
|
|
|
logger.info(f"Database validation complete: {results}")
|
|
return results
|
|
|
|
except Exception as e:
|
|
logger.error(f"Database validation failed: {e}")
|
|
return results
|
|
|
|
# ==================== Error Monitoring Methods ====================
|
|
|
|
def scan_logs_for_errors(self, since: datetime = None, max_hours: int = 24) -> List[Dict]:
|
|
"""
|
|
Scan log files for ERROR level entries
|
|
|
|
Args:
|
|
since: Only look for errors after this timestamp (default: 24 hours ago)
|
|
max_hours: Maximum hours to look back (default: 24)
|
|
|
|
Returns:
|
|
List of error dictionaries with module, message, timestamp, context
|
|
"""
|
|
import re
|
|
from pathlib import Path
|
|
|
|
if since is None:
|
|
since = datetime.now() - timedelta(hours=max_hours)
|
|
|
|
log_dir = Path('/opt/media-downloader/logs')
|
|
errors = []
|
|
|
|
# Log line pattern: 2025-12-03 06:30:01 [MediaDownloader.Module] [Module] [ERROR] Message
|
|
error_pattern = re.compile(
|
|
r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) '
|
|
r'\[MediaDownloader\.(\w+)\] '
|
|
r'\[(\w+)\] '
|
|
r'\[ERROR\] '
|
|
r'(.+)$'
|
|
)
|
|
|
|
# Get log files from last 24-48 hours
|
|
today = datetime.now()
|
|
dates_to_check = [
|
|
today.strftime('%Y%m%d'),
|
|
(today - timedelta(days=1)).strftime('%Y%m%d')
|
|
]
|
|
|
|
# Max file size to scan (50MB) - prevents OOM from huge log files
|
|
MAX_LOG_SIZE = 50 * 1024 * 1024
|
|
|
|
for log_file in log_dir.glob('*.log'):
|
|
# Skip if not from relevant dates
|
|
file_date = log_file.stem.split('_')[0] if '_' in log_file.stem else None
|
|
if file_date and file_date not in dates_to_check:
|
|
continue
|
|
# For files without date prefix (e.g. service.log), check modification time
|
|
if not file_date:
|
|
try:
|
|
mtime = datetime.fromtimestamp(log_file.stat().st_mtime)
|
|
if mtime < since:
|
|
continue
|
|
except Exception:
|
|
continue
|
|
|
|
# Skip files larger than MAX_LOG_SIZE to prevent OOM
|
|
try:
|
|
file_size = log_file.stat().st_size
|
|
if file_size > MAX_LOG_SIZE:
|
|
logger.debug(f"Skipping large log file {log_file.name} ({file_size / 1024 / 1024:.0f}MB > {MAX_LOG_SIZE / 1024 / 1024:.0f}MB limit)")
|
|
continue
|
|
except Exception:
|
|
continue
|
|
|
|
try:
|
|
# Read line-by-line with a sliding context window to avoid loading entire file
|
|
context_window = [] # Rolling window of recent lines for context
|
|
CONTEXT_SIZE = 5
|
|
|
|
with open(log_file, 'r', errors='replace') as f:
|
|
for i, line in enumerate(f):
|
|
line = line.rstrip('\n')
|
|
context_window.append(line)
|
|
if len(context_window) > CONTEXT_SIZE * 2 + 1:
|
|
context_window.pop(0)
|
|
|
|
match = error_pattern.match(line)
|
|
if match:
|
|
timestamp_str, module_full, module, message = match.groups()
|
|
try:
|
|
timestamp = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
|
|
except ValueError:
|
|
continue
|
|
|
|
if timestamp < since:
|
|
continue
|
|
|
|
# Use the sliding context window (lines before the error)
|
|
errors.append({
|
|
'timestamp': timestamp,
|
|
'module': module,
|
|
'message': message,
|
|
'log_file': log_file.name,
|
|
'line_number': i + 1,
|
|
'context': list(context_window)
|
|
})
|
|
except Exception as e:
|
|
logger.error(f"Error reading log file {log_file}: {e}")
|
|
continue
|
|
|
|
return errors
|
|
|
|
def process_and_store_errors(self, since: datetime = None) -> int:
|
|
"""
|
|
Scan logs for errors and store/update them in the database
|
|
|
|
Args:
|
|
since: Only process errors after this timestamp
|
|
|
|
Returns:
|
|
Number of new/updated errors
|
|
"""
|
|
import hashlib
|
|
import json
|
|
import re
|
|
|
|
errors = self.scan_logs_for_errors(since=since)
|
|
|
|
if not errors:
|
|
return 0
|
|
|
|
# Normalize error messages for deduplication
|
|
def normalize_message(msg: str) -> str:
|
|
"""Remove variable parts from error messages for grouping"""
|
|
# Replace file paths
|
|
msg = re.sub(r'/[\w/\-\.]+\.(jpg|png|mp4|webp|gif)', '{file}', msg)
|
|
# Replace URLs
|
|
msg = re.sub(r'https?://[^\s]+', '{url}', msg)
|
|
# Replace numbers
|
|
msg = re.sub(r'\b\d+\b', '{n}', msg)
|
|
# Replace UUIDs
|
|
msg = re.sub(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', '{uuid}', msg)
|
|
return msg
|
|
|
|
processed = 0
|
|
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
for error in errors:
|
|
normalized = normalize_message(error['message'])
|
|
error_hash = hashlib.sha256(f"{error['module']}:{normalized}".encode()).hexdigest()
|
|
|
|
# Check if this error already exists (including dismissed ones)
|
|
cursor.execute('''
|
|
SELECT id, occurrence_count, first_seen, dismissed_at
|
|
FROM error_log
|
|
WHERE error_hash = ?
|
|
''', (error_hash,))
|
|
|
|
existing = cursor.fetchone()
|
|
|
|
if existing:
|
|
# Update existing error - only un-dismiss if this is a NEW occurrence
|
|
# (i.e., error timestamp is after the last_seen time)
|
|
cursor.execute('SELECT last_seen FROM error_log WHERE id = ?', (existing['id'],))
|
|
last_seen_row = cursor.fetchone()
|
|
last_seen = last_seen_row['last_seen'] if last_seen_row else None
|
|
|
|
# Only update if this error occurrence is newer than what we've seen
|
|
if last_seen and error['timestamp'] <= last_seen:
|
|
# This is an old error we've already processed, skip it
|
|
continue
|
|
|
|
# This is a new occurrence - update and un-dismiss
|
|
cursor.execute('''
|
|
UPDATE error_log
|
|
SET last_seen = ?,
|
|
occurrence_count = occurrence_count + 1,
|
|
message = ?,
|
|
log_file = ?,
|
|
line_context = ?,
|
|
dismissed_at = NULL,
|
|
viewed_at = NULL
|
|
WHERE id = ?
|
|
''', (
|
|
error['timestamp'],
|
|
error['message'],
|
|
error['log_file'],
|
|
json.dumps(error['context']),
|
|
existing['id']
|
|
))
|
|
else:
|
|
# Insert new error
|
|
cursor.execute('''
|
|
INSERT INTO error_log
|
|
(error_hash, module, message, first_seen, last_seen,
|
|
occurrence_count, log_file, line_context)
|
|
VALUES (?, ?, ?, ?, ?, 1, ?, ?)
|
|
''', (
|
|
error_hash,
|
|
error['module'],
|
|
error['message'],
|
|
error['timestamp'],
|
|
error['timestamp'],
|
|
error['log_file'],
|
|
json.dumps(error['context'])
|
|
))
|
|
|
|
processed += 1
|
|
|
|
conn.commit()
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error storing errors: {e}")
|
|
|
|
return processed
|
|
|
|
def get_recent_errors(self, since: datetime = None, include_dismissed: bool = False, limit: int = None) -> List[Dict]:
|
|
"""
|
|
Get recent errors from the database
|
|
|
|
Args:
|
|
since: Only get errors after this timestamp
|
|
include_dismissed: Include dismissed errors
|
|
limit: Maximum number of errors to return (None for no limit)
|
|
|
|
Returns:
|
|
List of error dictionaries
|
|
"""
|
|
try:
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
query = '''
|
|
SELECT id, error_hash, module, message, first_seen, last_seen,
|
|
occurrence_count, log_file, line_context, dismissed_at, viewed_at
|
|
FROM error_log
|
|
WHERE 1=1
|
|
'''
|
|
params = []
|
|
|
|
if since:
|
|
# Use datetime() to normalize date format (handles both 'T' and space separators)
|
|
query += ' AND datetime(last_seen) >= datetime(?)'
|
|
params.append(since.isoformat())
|
|
|
|
if not include_dismissed:
|
|
query += ' AND dismissed_at IS NULL'
|
|
|
|
# Only show unviewed errors (to match the count shown in banner)
|
|
query += ' AND viewed_at IS NULL'
|
|
|
|
query += ' ORDER BY last_seen DESC'
|
|
|
|
if limit:
|
|
query += ' LIMIT ?'
|
|
params.append(limit)
|
|
|
|
cursor.execute(query, params)
|
|
|
|
errors = []
|
|
for row in cursor.fetchall():
|
|
errors.append({
|
|
'id': row['id'],
|
|
'error_hash': row['error_hash'],
|
|
'module': row['module'],
|
|
'message': row['message'],
|
|
'first_seen': row['first_seen'],
|
|
'last_seen': row['last_seen'],
|
|
'occurrence_count': row['occurrence_count'],
|
|
'log_file': row['log_file'],
|
|
'line_context': json.loads(row['line_context']) if row['line_context'] else [],
|
|
'dismissed_at': row['dismissed_at'],
|
|
'viewed_at': row['viewed_at']
|
|
})
|
|
|
|
return errors
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting recent errors: {e}")
|
|
return []
|
|
|
|
def get_unviewed_error_count(self, since: datetime = None) -> int:
|
|
"""Get count of unviewed, undismissed errors since timestamp"""
|
|
try:
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
query = '''
|
|
SELECT COUNT(*) as count
|
|
FROM error_log
|
|
WHERE dismissed_at IS NULL
|
|
AND viewed_at IS NULL
|
|
'''
|
|
params = []
|
|
|
|
if since:
|
|
# Use datetime() to normalize date format (handles both 'T' and space separators)
|
|
query += ' AND datetime(last_seen) >= datetime(?)'
|
|
params.append(since.isoformat())
|
|
|
|
cursor.execute(query, params)
|
|
result = cursor.fetchone()
|
|
return result['count'] if result else 0
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting unviewed error count: {e}")
|
|
return 0
|
|
|
|
def dismiss_errors(self, error_ids: List[int] = None, dismiss_all: bool = False) -> int:
|
|
"""
|
|
Dismiss errors by ID or all unviewed errors
|
|
|
|
Args:
|
|
error_ids: List of error IDs to dismiss
|
|
dismiss_all: If True, dismiss all undismissed errors
|
|
|
|
Returns:
|
|
Number of errors dismissed
|
|
"""
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
now = datetime.now().isoformat()
|
|
|
|
if dismiss_all:
|
|
cursor.execute('''
|
|
UPDATE error_log
|
|
SET dismissed_at = ?
|
|
WHERE dismissed_at IS NULL
|
|
''', (now,))
|
|
elif error_ids:
|
|
placeholders = ','.join('?' * len(error_ids))
|
|
cursor.execute(f'''
|
|
UPDATE error_log
|
|
SET dismissed_at = ?
|
|
WHERE id IN ({placeholders}) AND dismissed_at IS NULL
|
|
''', [now] + error_ids)
|
|
else:
|
|
return 0
|
|
|
|
dismissed = cursor.rowcount
|
|
conn.commit()
|
|
return dismissed
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error dismissing errors: {e}")
|
|
return 0
|
|
|
|
def mark_errors_viewed(self, error_ids: List[int] = None, mark_all: bool = False) -> int:
|
|
"""Mark errors as viewed"""
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
now = datetime.now().isoformat()
|
|
|
|
if mark_all:
|
|
cursor.execute('''
|
|
UPDATE error_log
|
|
SET viewed_at = ?
|
|
WHERE viewed_at IS NULL AND dismissed_at IS NULL
|
|
''', (now,))
|
|
elif error_ids:
|
|
placeholders = ','.join('?' * len(error_ids))
|
|
cursor.execute(f'''
|
|
UPDATE error_log
|
|
SET viewed_at = ?
|
|
WHERE id IN ({placeholders}) AND viewed_at IS NULL
|
|
''', [now] + error_ids)
|
|
else:
|
|
return 0
|
|
|
|
marked = cursor.rowcount
|
|
conn.commit()
|
|
return marked
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error marking errors as viewed: {e}")
|
|
return 0
|
|
|
|
def get_last_dashboard_visit(self, user_id: str = 'default') -> Optional[datetime]:
|
|
"""Get the last time user visited the dashboard"""
|
|
try:
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT last_dashboard_visit
|
|
FROM error_tracking
|
|
WHERE user_id = ?
|
|
''', (user_id,))
|
|
|
|
result = cursor.fetchone()
|
|
if result and result['last_dashboard_visit']:
|
|
val = result['last_dashboard_visit']
|
|
if isinstance(val, datetime):
|
|
return val
|
|
return datetime.fromisoformat(val)
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting last dashboard visit: {e}")
|
|
return None
|
|
|
|
def update_dashboard_visit(self, user_id: str = 'default') -> bool:
|
|
"""Update the last dashboard visit timestamp"""
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
now = datetime.now().isoformat()
|
|
|
|
cursor.execute('''
|
|
INSERT INTO error_tracking (user_id, last_dashboard_visit, updated_at)
|
|
VALUES (?, ?, ?)
|
|
ON CONFLICT(user_id) DO UPDATE SET
|
|
last_dashboard_visit = excluded.last_dashboard_visit,
|
|
updated_at = excluded.updated_at
|
|
''', (user_id, now, now))
|
|
|
|
conn.commit()
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error updating dashboard visit: {e}")
|
|
return False
|
|
|
|
def get_errors_needing_push_alert(self, delay_hours: int = 24) -> List[Dict]:
|
|
"""
|
|
Get errors that are older than delay_hours, unviewed, and haven't had a push alert recently
|
|
|
|
Args:
|
|
delay_hours: Number of hours errors must be unviewed before sending push alert (default 24)
|
|
|
|
Returns:
|
|
List of error dictionaries that need push alerts
|
|
"""
|
|
try:
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Use 'localtime' since timestamps are stored in local time
|
|
delay_modifier = f'-{delay_hours} hours'
|
|
cursor.execute(f'''
|
|
SELECT id, module, message, occurrence_count, first_seen, last_seen
|
|
FROM error_log
|
|
WHERE dismissed_at IS NULL
|
|
AND viewed_at IS NULL
|
|
AND first_seen < datetime('now', 'localtime', '{delay_modifier}')
|
|
AND (push_alert_sent_at IS NULL OR push_alert_sent_at < datetime('now', 'localtime', '{delay_modifier}'))
|
|
''')
|
|
|
|
return [dict(row) for row in cursor.fetchall()]
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting errors needing push alert: {e}")
|
|
return []
|
|
|
|
def mark_push_alert_sent(self, error_ids: List[int]) -> bool:
|
|
"""Mark that a push alert was sent for these errors"""
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
now = datetime.now().isoformat()
|
|
placeholders = ','.join('?' * len(error_ids))
|
|
|
|
cursor.execute(f'''
|
|
UPDATE error_log
|
|
SET push_alert_sent_at = ?
|
|
WHERE id IN ({placeholders})
|
|
''', [now] + error_ids)
|
|
|
|
conn.commit()
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error marking push alert sent: {e}")
|
|
return False
|
|
|
|
def cleanup_old_errors(self, days: int = 7) -> int:
|
|
"""
|
|
Delete error records older than specified days
|
|
|
|
Args:
|
|
days: Delete errors older than this many days
|
|
|
|
Returns:
|
|
Number of errors deleted
|
|
"""
|
|
try:
|
|
with self.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute('''
|
|
DELETE FROM error_log
|
|
WHERE last_seen < datetime('now', ? || ' days')
|
|
''', (f'-{days}',))
|
|
|
|
deleted = cursor.rowcount
|
|
conn.commit()
|
|
|
|
if deleted > 0:
|
|
logger.info(f"Cleaned up {deleted} old error records")
|
|
|
|
return deleted
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error cleaning up old errors: {e}")
|
|
return 0
|
|
|
|
def close(self):
|
|
"""Close all database connections.
|
|
|
|
When use_pool=True, closes all pooled connections.
|
|
When use_pool=False, connections are ephemeral (created/closed per-use
|
|
via get_connection context manager), so no cleanup is needed.
|
|
"""
|
|
if self.pool:
|
|
self.pool.close_all()
|
|
|
|
def checkpoint(self):
|
|
"""Run a WAL checkpoint to merge WAL file into main database
|
|
|
|
Should be called periodically (e.g., every 5 minutes) to prevent
|
|
WAL file from growing too large and to ensure data durability.
|
|
"""
|
|
if self.pool:
|
|
return self.pool.checkpoint()
|
|
# For non-pool mode, run checkpoint via a temporary connection
|
|
try:
|
|
conn = sqlite3.connect(str(self.db_path), timeout=10.0)
|
|
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
|
|
conn.close()
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
|
|
# Adapter classes for backward compatibility
|
|
class FastDLDatabaseAdapter:
|
|
"""Adapter to make unified database compatible with FastDL module"""
|
|
|
|
def __init__(self, unified_db: UnifiedDatabase):
|
|
self.db = unified_db
|
|
self.platform = 'instagram' # Normalized to instagram (was 'fastdl')
|
|
self.method = 'fastdl' # Download method for tracking
|
|
self.unified_db = unified_db # For compatibility with modules expecting this attribute
|
|
|
|
def get_file_hash(self, file_path: str) -> Optional[str]:
|
|
"""Calculate SHA256 hash of a file (delegates to UnifiedDatabase)"""
|
|
return UnifiedDatabase.get_file_hash(file_path)
|
|
|
|
def get_download_by_file_hash(self, file_hash: str) -> Optional[Dict]:
|
|
"""Get download record by file hash (delegates to UnifiedDatabase)"""
|
|
return self.db.get_download_by_file_hash(file_hash)
|
|
|
|
def is_already_downloaded(self, media_id: str) -> bool:
|
|
# Check by media_id for Instagram platform (all methods now use platform='instagram')
|
|
with self.db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT 1 FROM downloads
|
|
WHERE platform = 'instagram'
|
|
AND media_id = ?
|
|
LIMIT 1
|
|
''', (media_id,))
|
|
return cursor.fetchone() is not None
|
|
|
|
def record_download(self, media_id: str, username: str, content_type: str,
|
|
filename: str, download_url: str = None,
|
|
post_date: datetime = None, metadata: Dict = None, file_path: str = None):
|
|
url = download_url if download_url else f"instagram://{media_id}"
|
|
full_metadata = {'media_id': media_id}
|
|
if metadata:
|
|
full_metadata.update(metadata)
|
|
|
|
# Calculate file hash if file_path provided
|
|
file_hash = None
|
|
if file_path:
|
|
try:
|
|
file_hash = UnifiedDatabase.get_file_hash(file_path)
|
|
logger.debug(f"[FastDLAdapter] Calculated hash for {filename}: {file_hash[:16]}...")
|
|
except Exception as e:
|
|
logger.debug(f"[FastDLAdapter] Failed to calculate hash for {filename}: {e}")
|
|
|
|
logger.debug(f"[FastDLAdapter] Recording download: filename={filename}, platform={self.platform}, method={self.method}, source={username}, file_path={file_path}")
|
|
|
|
result = self.db.record_download(
|
|
url=url,
|
|
platform=self.platform,
|
|
source=username,
|
|
content_type=content_type,
|
|
filename=filename,
|
|
file_path=file_path,
|
|
file_hash=file_hash,
|
|
post_date=post_date,
|
|
metadata=full_metadata,
|
|
method=self.method
|
|
)
|
|
|
|
if result:
|
|
logger.debug(f"[FastDLAdapter] Successfully recorded download for {filename}")
|
|
else:
|
|
logger.debug(f"[FastDLAdapter] Failed to record download for {filename} (possibly duplicate)")
|
|
|
|
return result
|
|
|
|
|
|
class ToolzuDatabaseAdapter:
|
|
"""Adapter to make unified database compatible with Toolzu module"""
|
|
|
|
def __init__(self, unified_db: UnifiedDatabase):
|
|
self.db = unified_db
|
|
self.platform = 'instagram' # Toolzu downloads Instagram content
|
|
self.method = 'toolzu' # Download method for tracking
|
|
self.unified_db = unified_db # For compatibility
|
|
|
|
def get_connection(self, for_write=False):
|
|
"""Get database connection (delegates to UnifiedDatabase)"""
|
|
return self.db.get_connection(for_write)
|
|
|
|
def get_download_by_media_id(self, media_id: str, platform: str = None, method: str = None) -> Optional[Dict]:
|
|
"""Get download record by media_id (delegates to UnifiedDatabase)"""
|
|
return self.db.get_download_by_media_id(media_id, platform or self.platform, method or self.method)
|
|
|
|
def get_file_hash(self, file_path: str) -> Optional[str]:
|
|
"""Calculate SHA256 hash of a file (delegates to UnifiedDatabase)"""
|
|
return UnifiedDatabase.get_file_hash(file_path)
|
|
|
|
def get_download_by_file_hash(self, file_hash: str) -> Optional[Dict]:
|
|
"""Get download record by file hash (delegates to UnifiedDatabase)"""
|
|
return self.db.get_download_by_file_hash(file_hash)
|
|
|
|
def is_already_downloaded(self, media_id: str) -> bool:
|
|
"""Check if content is already downloaded by media_id (all methods now use platform='instagram')"""
|
|
with self.db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT 1 FROM downloads
|
|
WHERE platform = 'instagram'
|
|
AND media_id = ?
|
|
LIMIT 1
|
|
''', (media_id,))
|
|
return cursor.fetchone() is not None
|
|
|
|
def record_download(self, media_id: str, username: str, content_type: str,
|
|
filename: str, download_url: str = None,
|
|
post_date: datetime = None, metadata: Dict = None, file_path: str = None):
|
|
"""Record a download in the database"""
|
|
url = download_url if download_url else f"instagram://{media_id}"
|
|
full_metadata = {'media_id': media_id, 'source': 'toolzu', 'resolution': '1920x1440'}
|
|
if metadata:
|
|
full_metadata.update(metadata)
|
|
|
|
# Calculate file hash if file_path provided
|
|
file_hash = None
|
|
if file_path:
|
|
try:
|
|
file_hash = UnifiedDatabase.get_file_hash(file_path)
|
|
except Exception:
|
|
pass # If hash fails, continue without it
|
|
|
|
return self.db.record_download(
|
|
url=url,
|
|
platform=self.platform,
|
|
source=username,
|
|
content_type=content_type,
|
|
filename=filename,
|
|
file_path=file_path,
|
|
file_hash=file_hash,
|
|
post_date=post_date,
|
|
metadata=full_metadata,
|
|
method=self.method
|
|
)
|
|
|
|
|
|
class SnapchatDatabaseAdapter:
|
|
"""Adapter to make unified database compatible with Snapchat module"""
|
|
|
|
def __init__(self, unified_db: UnifiedDatabase):
|
|
self.db = unified_db
|
|
self.platform = 'snapchat'
|
|
self.unified_db = unified_db # For compatibility
|
|
|
|
def get_connection(self, for_write=False):
|
|
"""Get database connection (delegates to UnifiedDatabase)"""
|
|
return self.db.get_connection(for_write)
|
|
|
|
def get_file_hash(self, file_path: str) -> Optional[str]:
|
|
"""Calculate SHA256 hash of a file (delegates to UnifiedDatabase)"""
|
|
return UnifiedDatabase.get_file_hash(file_path)
|
|
|
|
def get_download_by_file_hash(self, file_hash: str) -> Optional[Dict]:
|
|
"""Get download record by file hash (delegates to UnifiedDatabase)"""
|
|
return self.db.get_download_by_file_hash(file_hash)
|
|
|
|
def is_downloaded(self, username: str, url: str, post_date: datetime = None) -> bool:
|
|
"""Check if content is already downloaded"""
|
|
# Check by URL
|
|
return self.db.is_downloaded(url, self.platform)
|
|
|
|
def mark_downloaded(self, username: str, url: str, filename: str,
|
|
post_date: datetime = None, metadata: dict = None, file_path: str = None) -> bool:
|
|
"""Mark content as downloaded in database"""
|
|
meta = metadata.copy() if metadata else {}
|
|
|
|
# Calculate file hash if file_path provided
|
|
file_hash = None
|
|
if file_path:
|
|
try:
|
|
file_hash = UnifiedDatabase.get_file_hash(file_path)
|
|
except Exception:
|
|
pass # If hash fails, continue without it
|
|
|
|
return self.db.record_download(
|
|
url=url,
|
|
platform=self.platform,
|
|
source=username,
|
|
content_type='story',
|
|
filename=filename,
|
|
file_path=file_path,
|
|
file_hash=file_hash,
|
|
post_date=post_date,
|
|
metadata=meta
|
|
)
|
|
|
|
|
|
class CoppermineDatabaseAdapter:
|
|
"""Adapter to make unified database compatible with Coppermine module"""
|
|
|
|
def __init__(self, unified_db: UnifiedDatabase):
|
|
self.db = unified_db
|
|
self.platform = 'coppermine'
|
|
self.unified_db = unified_db # For compatibility
|
|
|
|
def get_file_hash(self, file_path: str) -> Optional[str]:
|
|
"""Calculate SHA256 hash of a file (delegates to UnifiedDatabase)"""
|
|
return UnifiedDatabase.get_file_hash(file_path)
|
|
|
|
def get_download_by_file_hash(self, file_hash: str) -> Optional[Dict]:
|
|
"""Get download record by file hash (delegates to UnifiedDatabase)"""
|
|
return self.db.get_download_by_file_hash(file_hash)
|
|
|
|
def is_downloaded(self, url: str, platform: str = None) -> bool:
|
|
"""Check if content is already downloaded"""
|
|
# Use provided platform or default to coppermine
|
|
platform = platform or self.platform
|
|
return self.db.is_downloaded(url, platform)
|
|
|
|
def add_download(self, url: str, platform: str, source: str, content_type: str,
|
|
filename: str, file_path: str = None, file_size: int = None,
|
|
file_hash: str = None, post_date: datetime = None,
|
|
metadata: dict = None) -> bool:
|
|
"""Record a download in the database"""
|
|
return self.db.record_download(
|
|
url=url,
|
|
platform=platform,
|
|
source=source,
|
|
content_type=content_type,
|
|
filename=filename,
|
|
file_path=file_path,
|
|
file_size=file_size,
|
|
file_hash=file_hash,
|
|
post_date=post_date,
|
|
metadata=metadata
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Test and migration script
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
# Default to the standard database location
|
|
DEFAULT_DB_PATH = str(Path(__file__).parent.parent / 'database' / 'media_downloader.db')
|
|
|
|
parser = argparse.ArgumentParser(description="Unified Database Manager")
|
|
parser.add_argument("--migrate", action="store_true", help="Migrate from old databases")
|
|
parser.add_argument("--fastdl-db", help="Path to FastDL database")
|
|
parser.add_argument("--tiktok-db", help="Path to TikTok database")
|
|
parser.add_argument("--forum-dbs", nargs="+", help="Paths to forum databases")
|
|
parser.add_argument("--stats", action="store_true", help="Show database statistics")
|
|
parser.add_argument("--db-path", default=DEFAULT_DB_PATH, help="Path to unified database")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Create unified database
|
|
db = UnifiedDatabase(args.db_path)
|
|
|
|
if args.migrate:
|
|
print("Starting database migration...")
|
|
stats = db.migrate_from_old_databases(
|
|
fastdl_db=args.fastdl_db,
|
|
tiktok_db=args.tiktok_db,
|
|
forum_dbs=args.forum_dbs,
|
|
verbose=True
|
|
)
|
|
|
|
if args.stats:
|
|
print("\nDatabase Statistics:")
|
|
print("-" * 40)
|
|
stats = db.get_platform_stats()
|
|
for platform_stats in stats:
|
|
print(f"Platform: {platform_stats['platform']}")
|
|
print(f" Total: {platform_stats['total']}")
|
|
print(f" Completed: {platform_stats['completed']}")
|
|
print(f" Failed: {platform_stats['failed']}")
|
|
print(f" Total Size: {platform_stats['total_size'] / (1024**3):.2f} GB")
|
|
print()
|
|
|
|
db.close() |