""" Celebrity Discovery Router Handles celebrity content discovery features: - Celebrity profiles CRUD - Search presets management - YouTube/RSS search execution - Discovered videos management - Integration with video downloader """ import asyncio import json import re import subprocess from datetime import datetime from typing import Dict, List, Optional import feedparser import httpx from fastapi import APIRouter, BackgroundTasks, Body, Depends, Query, Request from fastapi.responses import Response from pydantic import BaseModel from slowapi import Limiter from slowapi.util import get_remote_address from ..core.dependencies import get_current_user, get_app_state from ..core.exceptions import handle_exceptions, RecordNotFoundError, ValidationError from modules.universal_logger import get_logger logger = get_logger('API') router = APIRouter(prefix="/api/celebrity", tags=["Celebrity Discovery"]) limiter = Limiter(key_func=get_remote_address) # ============================================================================ # HELPER FUNCTIONS # ============================================================================ async def cache_thumbnail_async(video_id: str, thumbnail_url: str, db) -> None: """ Pre-cache thumbnail by fetching from URL and storing in database. This speeds up Internet Discovery page loading. """ try: async with httpx.AsyncClient(timeout=10.0) as client: response = await client.get(thumbnail_url, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' }) if response.status_code == 200 and response.content: with db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute(''' UPDATE celebrity_discovered_videos SET thumbnail_data = ? WHERE video_id = ? ''', (response.content, video_id)) conn.commit() logger.debug(f"Cached thumbnail for {video_id}") except Exception as e: logger.debug(f"Failed to cache thumbnail for {video_id}: {e}") # ============================================================================ # PYDANTIC MODELS # ============================================================================ class CelebrityCreate(BaseModel): name: str image_url: Optional[str] = None notes: Optional[str] = None class CelebrityUpdate(BaseModel): name: Optional[str] = None image_url: Optional[str] = None notes: Optional[str] = None enabled: Optional[bool] = None class SearchPresetCreate(BaseModel): celebrity_id: int name: str source_type: str # 'youtube_channel', 'youtube_search', 'youtube_rss' source_value: str # channel_id, search query, or RSS URL keywords: Optional[List[str]] = None content_type: Optional[str] = 'all' # 'interview', 'red_carpet', 'photoshoot', 'bts', 'premiere', 'all' category: Optional[str] = 'other' platform: Optional[str] = 'youtube' # 'youtube', 'dailymotion' class SearchPresetUpdate(BaseModel): name: Optional[str] = None source_type: Optional[str] = None source_value: Optional[str] = None keywords: Optional[List[str]] = None content_type: Optional[str] = None platform: Optional[str] = None category: Optional[str] = None enabled: Optional[bool] = None check_frequency_hours: Optional[int] = None class VideoStatusUpdate(BaseModel): status: str # 'new', 'queued', 'downloaded', 'ignored', 'watched' class BulkVideoStatusUpdate(BaseModel): video_ids: List[int] status: str # ============================================================================ # HELPER FUNCTIONS # ============================================================================ def slugify(text: str) -> str: """Convert text to URL-safe slug.""" text = text.lower().strip() text = re.sub(r'[^\w\s-]', '', text) text = re.sub(r'[\s_-]+', '-', text) return text def detect_content_type(title: str, description: str = '') -> str: """Detect content type from video title and description.""" text = f"{title} {description}".lower() if any(kw in text for kw in ['interview', 'talks', 'discusses', 'sits down', 'chats']): return 'interview' elif any(kw in text for kw in ['red carpet', 'arrives', 'arrival', 'gala', 'awards']): return 'red_carpet' elif any(kw in text for kw in ['premiere', 'screening', 'opening night']): return 'premiere' elif any(kw in text for kw in ['behind the scenes', 'bts', 'making of', 'on set']): return 'bts' elif any(kw in text for kw in ['photoshoot', 'photo shoot', 'magazine', 'cover shoot']): return 'photoshoot' else: return 'other' async def fetch_youtube_rss(channel_id: str) -> List[Dict]: """Fetch videos from YouTube RSS feed.""" rss_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}" async with httpx.AsyncClient(timeout=30.0) as client: try: response = await client.get(rss_url) if response.status_code != 200: logger.warning(f"RSS fetch failed for {channel_id}: {response.status_code}", module="Celebrity") return [] feed = feedparser.parse(response.text) videos = [] for entry in feed.entries: video_id = entry.get('yt_videoid', '') if not video_id and 'link' in entry: # Extract from URL match = re.search(r'v=([^&]+)', entry.link) if match: video_id = match.group(1) videos.append({ 'video_id': video_id, 'platform': 'youtube', 'url': f"https://www.youtube.com/watch?v={video_id}", 'title': entry.get('title', ''), 'channel_name': feed.feed.get('title', ''), 'channel_id': channel_id, 'thumbnail': entry.get('media_thumbnail', [{}])[0].get('url', '') if entry.get('media_thumbnail') else f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg", 'upload_date': entry.get('published', ''), 'description': entry.get('summary', ''), 'view_count': int(entry.get('media_statistics', {}).get('views', 0)) if entry.get('media_statistics') else 0 }) return videos except Exception as e: logger.error(f"RSS fetch error for {channel_id}: {e}", module="Celebrity") return [] async def search_youtube_ytdlp(query: str, max_results: int = 20) -> List[Dict]: """Search YouTube using yt-dlp.""" try: cmd = [ '/opt/media-downloader/venv/bin/yt-dlp', f'ytsearch{max_results}:{query}', '--dump-json', '--flat-playlist', '--no-warnings', '--ignore-errors' ] process = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=60) videos = [] for line in stdout.decode().strip().split('\n'): if not line: continue try: data = json.loads(line) video_id = data.get('id', '') # Generate YouTube thumbnail URL from video ID (flat-playlist doesn't include it) thumbnail = data.get('thumbnail', '') if not thumbnail and video_id: thumbnail = f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg" videos.append({ 'video_id': video_id, 'platform': 'youtube', 'url': data.get('url', f"https://www.youtube.com/watch?v={video_id}"), 'title': data.get('title', ''), 'channel_name': data.get('channel', data.get('uploader', '')), 'channel_id': data.get('channel_id', ''), 'thumbnail': thumbnail, 'duration': data.get('duration', 0), 'upload_date': data.get('upload_date', ''), 'view_count': data.get('view_count', 0), 'description': data.get('description', '')[:500] if data.get('description') else '' }) except json.JSONDecodeError: continue return videos except asyncio.TimeoutError: logger.warning(f"YouTube search timeout for: {query}", module="Celebrity") return [] except Exception as e: logger.error(f"YouTube search error: {e}", module="Celebrity") return [] async def fetch_video_metadata(video_id: str) -> Dict: """Fetch full metadata for a single video including upload date and resolution. Returns: Dict with video metadata, or empty dict if unavailable. Special keys: - '_error': Error type if fetch failed ('age_restricted', 'unavailable', 'private', 'removed', 'unknown') - '_error_message': Full error message from yt-dlp """ try: cmd = [ '/opt/media-downloader/venv/bin/yt-dlp', f'https://www.youtube.com/watch?v={video_id}', '--dump-json', '--no-download', '--no-warnings' ] process = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=30) if stdout: data = json.loads(stdout.decode().strip()) # Extract max resolution and width from formats max_resolution = 0 max_width = 0 formats = data.get('formats', []) for fmt in formats: height = fmt.get('height') if height and isinstance(height, int) and height > max_resolution: # Only count video formats (not audio-only) if fmt.get('vcodec', 'none') != 'none': max_resolution = height width = fmt.get('width') if width and isinstance(width, int): max_width = width return { 'video_id': video_id, 'upload_date': data.get('upload_date', ''), 'view_count': data.get('view_count', 0), 'duration': data.get('duration', 0), 'description': data.get('description', '')[:500] if data.get('description') else '', 'max_resolution': max_resolution if max_resolution > 0 else None, 'max_width': max_width if max_width > 0 else None } # No stdout - check stderr for error type error_msg = stderr.decode().lower() if stderr else '' # Detect specific error types if 'age' in error_msg or 'sign in to confirm' in error_msg or 'age-restricted' in error_msg: return {'_error': 'age_restricted', '_error_message': error_msg[:200]} elif 'private' in error_msg: return {'_error': 'private', '_error_message': error_msg[:200]} elif 'unavailable' in error_msg or 'not available' in error_msg: return {'_error': 'unavailable', '_error_message': error_msg[:200]} elif 'removed' in error_msg or 'deleted' in error_msg or 'terminated' in error_msg: return {'_error': 'removed', '_error_message': error_msg[:200]} elif error_msg: return {'_error': 'unknown', '_error_message': error_msg[:200]} except Exception as e: logger.warning(f"Failed to fetch metadata for {video_id}: {e}", module="Celebrity") return {} async def enrich_videos_with_resolution(video_ids: List[str] = None, limit: int = 50, delete_unavailable: bool = True): """Background task to fetch resolution for videos that don't have it. Args: video_ids: Optional list of specific video IDs to enrich. If None, fetches oldest videos without resolution. limit: Maximum number of videos to process in one batch. delete_unavailable: If True, delete videos that appear to be unavailable (no metadata returned). """ app_state = get_app_state() try: with app_state.db.get_connection() as conn: cursor = conn.cursor() if video_ids: # Fetch specific videos placeholders = ','.join('?' * len(video_ids)) cursor.execute(f''' SELECT id, video_id, title FROM celebrity_discovered_videos WHERE video_id IN ({placeholders}) AND max_resolution IS NULL LIMIT ? ''', (*video_ids, limit)) else: # Fetch oldest videos without resolution cursor.execute(''' SELECT id, video_id, title FROM celebrity_discovered_videos WHERE max_resolution IS NULL ORDER BY discovered_at ASC LIMIT ? ''', (limit,)) videos = [dict(row) for row in cursor.fetchall()] if not videos: return logger.info(f"Enriching {len(videos)} videos with resolution data", module="Celebrity") updated = 0 deleted = 0 age_restricted = 0 for video in videos: try: metadata = await fetch_video_metadata(video['video_id']) if metadata.get('max_resolution'): with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute(''' UPDATE celebrity_discovered_videos SET max_resolution = ?, max_width = ? WHERE id = ? ''', (metadata['max_resolution'], metadata.get('max_width'), video['id'])) conn.commit() updated += 1 elif metadata.get('_error') == 'age_restricted': # Age-restricted video - keep it, just log age_restricted += 1 logger.debug(f"Age-restricted video (keeping): {video['title'][:50]}... ({video['video_id']})", module="Celebrity") elif metadata.get('_error') in ('unavailable', 'private', 'removed') and delete_unavailable: # Truly unavailable - delete it with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute('DELETE FROM celebrity_discovered_videos WHERE id = ?', (video['id'],)) conn.commit() deleted += 1 logger.info(f"Deleted {metadata.get('_error')} video: {video['title'][:50]}... ({video['video_id']})", module="Celebrity") elif not metadata and delete_unavailable: # Empty response with no error info - likely unavailable with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute('DELETE FROM celebrity_discovered_videos WHERE id = ?', (video['id'],)) conn.commit() deleted += 1 logger.info(f"Deleted unavailable video: {video['title'][:50]}... ({video['video_id']})", module="Celebrity") # Rate limit to avoid API issues await asyncio.sleep(0.5) except Exception as e: logger.warning(f"Failed to enrich video {video['video_id']}: {e}", module="Celebrity") continue log_msg = f"Enriched {updated}/{len(videos)} videos with resolution" if deleted > 0: log_msg += f", deleted {deleted} unavailable" if age_restricted > 0: log_msg += f", {age_restricted} age-restricted (kept)" logger.info(log_msg, module="Celebrity") except Exception as e: logger.error(f"Resolution enrichment failed: {e}", module="Celebrity") async def get_channel_videos_ytdlp(channel_id: str, keyword_filter: str = None, max_results: int = 50) -> List[Dict]: """Get videos from a YouTube channel using yt-dlp.""" try: channel_url = f"https://www.youtube.com/channel/{channel_id}/videos" cmd = [ '/opt/media-downloader/venv/bin/yt-dlp', channel_url, '--dump-json', '--flat-playlist', '--no-warnings', '--ignore-errors', '--playlist-end', str(max_results) ] if keyword_filter: cmd.extend(['--match-title', keyword_filter]) process = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=120) videos = [] for line in stdout.decode().strip().split('\n'): if not line: continue try: data = json.loads(line) videos.append({ 'video_id': data.get('id', ''), 'platform': 'youtube', 'url': data.get('url', f"https://www.youtube.com/watch?v={data.get('id', '')}"), 'title': data.get('title', ''), 'channel_name': data.get('channel', data.get('uploader', '')), 'channel_id': channel_id, 'thumbnail': data.get('thumbnail', f"https://i.ytimg.com/vi/{data.get('id', '')}/hqdefault.jpg"), 'duration': data.get('duration', 0), 'upload_date': data.get('upload_date', ''), 'view_count': data.get('view_count', 0), 'description': data.get('description', '')[:500] if data.get('description') else '' }) except json.JSONDecodeError: continue return videos except asyncio.TimeoutError: logger.warning(f"Channel fetch timeout for: {channel_id}", module="Celebrity") return [] except Exception as e: logger.error(f"Channel fetch error: {e}", module="Celebrity") return [] # ============================================================================ # CELEBRITY PROFILE ENDPOINTS # ============================================================================ @router.get("/profiles") @limiter.limit("60/minute") @handle_exceptions async def get_celebrity_profiles( request: Request, enabled_only: bool = Query(False), tracked_only: bool = Query(False), current_user: Dict = Depends(get_current_user) ): """Get all celebrity profiles.""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() # For tracked_only, return simplified list for appearances config if tracked_only: cursor.execute(''' SELECT id, name, tmdb_person_id, enabled, tmdb_last_sync FROM celebrity_profiles WHERE enabled = 1 ORDER BY name ''') profiles = [] for row in cursor.fetchall(): profiles.append({ 'id': row[0], 'name': row[1], 'tmdb_person_id': row[2], 'enabled': bool(row[3]), 'tmdb_last_sync': row[4] }) return profiles # Normal profiles list if enabled_only: cursor.execute(''' SELECT cp.*, (SELECT COUNT(*) FROM celebrity_search_presets WHERE celebrity_id = cp.id) as preset_count, (SELECT COUNT(*) FROM celebrity_discovered_videos WHERE celebrity_id = cp.id AND status = 'new') as new_videos FROM celebrity_profiles cp WHERE cp.enabled = 1 ORDER BY cp.name ''') else: cursor.execute(''' SELECT cp.*, (SELECT COUNT(*) FROM celebrity_search_presets WHERE celebrity_id = cp.id) as preset_count, (SELECT COUNT(*) FROM celebrity_discovered_videos WHERE celebrity_id = cp.id AND status = 'new') as new_videos FROM celebrity_profiles cp ORDER BY cp.name ''') profiles = [] for row in cursor.fetchall(): profiles.append({ 'id': row['id'], 'name': row['name'], 'slug': row['slug'], 'image_url': row['image_url'], 'notes': row['notes'], 'enabled': bool(row['enabled']), 'preset_count': row['preset_count'], 'new_videos': row['new_videos'], 'created_at': row['created_at'], 'updated_at': row['updated_at'] }) return {"success": True, "profiles": profiles} @router.get("/profiles/{profile_id}") @limiter.limit("60/minute") @handle_exceptions async def get_celebrity_profile( request: Request, profile_id: int, current_user: Dict = Depends(get_current_user) ): """Get a single celebrity profile with presets.""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute('SELECT * FROM celebrity_profiles WHERE id = ?', (profile_id,)) row = cursor.fetchone() if not row: raise RecordNotFoundError("Celebrity profile not found") # Get presets cursor.execute(''' SELECT * FROM celebrity_search_presets WHERE celebrity_id = ? ORDER BY name ''', (profile_id,)) presets = [] for preset_row in cursor.fetchall(): presets.append({ 'id': preset_row['id'], 'name': preset_row['name'], 'source_type': preset_row['source_type'], 'source_value': preset_row['source_value'], 'keywords': json.loads(preset_row['keywords']) if preset_row['keywords'] else [], 'content_type': preset_row['content_type'], 'enabled': bool(preset_row['enabled']), 'last_checked': preset_row['last_checked'], 'results_count': preset_row['results_count'], 'created_at': preset_row['created_at'] }) return { "success": True, "profile": { 'id': row['id'], 'name': row['name'], 'slug': row['slug'], 'image_url': row['image_url'], 'notes': row['notes'], 'enabled': bool(row['enabled']), 'created_at': row['created_at'], 'updated_at': row['updated_at'] }, "presets": presets } @router.post("/profiles") @limiter.limit("20/minute") @handle_exceptions async def create_celebrity_profile( request: Request, body: CelebrityCreate, current_user: Dict = Depends(get_current_user) ): """Create a new celebrity profile.""" app_state = get_app_state() slug = slugify(body.name) with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() # Check for duplicate slug cursor.execute('SELECT id FROM celebrity_profiles WHERE slug = ?', (slug,)) if cursor.fetchone(): raise ValidationError("A celebrity with this name already exists") cursor.execute(''' INSERT INTO celebrity_profiles (name, slug, image_url, notes) VALUES (?, ?, ?, ?) ''', (body.name, slug, body.image_url, body.notes)) profile_id = cursor.lastrowid conn.commit() logger.info(f"Created celebrity profile: {body.name}", module="Celebrity") return { "success": True, "profile_id": profile_id, "slug": slug } @router.put("/profiles/{profile_id}") @limiter.limit("30/minute") @handle_exceptions async def update_celebrity_profile( request: Request, profile_id: int, body: CelebrityUpdate, current_user: Dict = Depends(get_current_user) ): """Update a celebrity profile.""" app_state = get_app_state() with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute('SELECT * FROM celebrity_profiles WHERE id = ?', (profile_id,)) if not cursor.fetchone(): raise RecordNotFoundError("Celebrity profile not found") updates = [] params = [] if body.name is not None: updates.append("name = ?") params.append(body.name) updates.append("slug = ?") params.append(slugify(body.name)) if body.image_url is not None: updates.append("image_url = ?") params.append(body.image_url) if body.notes is not None: updates.append("notes = ?") params.append(body.notes) if body.enabled is not None: updates.append("enabled = ?") params.append(1 if body.enabled else 0) if updates: updates.append("updated_at = CURRENT_TIMESTAMP") params.append(profile_id) cursor.execute(f''' UPDATE celebrity_profiles SET {", ".join(updates)} WHERE id = ? ''', params) conn.commit() return {"success": True} @router.delete("/profiles/{profile_id}") @limiter.limit("10/minute") @handle_exceptions async def delete_celebrity_profile( request: Request, profile_id: int, current_user: Dict = Depends(get_current_user) ): """Delete a celebrity profile and all associated data.""" app_state = get_app_state() with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute('SELECT name FROM celebrity_profiles WHERE id = ?', (profile_id,)) row = cursor.fetchone() if not row: raise RecordNotFoundError("Celebrity profile not found") # Cascade deletes will handle presets and discovered videos cursor.execute('DELETE FROM celebrity_profiles WHERE id = ?', (profile_id,)) conn.commit() logger.info(f"Deleted celebrity profile: {row['name']}", module="Celebrity") return {"success": True} @router.post("/profiles/{profile_id}/toggle") @limiter.limit("30/minute") @handle_exceptions async def toggle_celebrity_tracking( request: Request, profile_id: int, body: Dict = Body(...), current_user: Dict = Depends(get_current_user) ): """Toggle celebrity tracking on/off for appearances monitoring.""" app_state = get_app_state() enabled = body.get('enabled', True) with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute('SELECT name FROM celebrity_profiles WHERE id = ?', (profile_id,)) row = cursor.fetchone() if not row: raise RecordNotFoundError("Celebrity profile not found") cursor.execute(''' UPDATE celebrity_profiles SET enabled = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ? ''', (1 if enabled else 0, profile_id)) conn.commit() logger.info( f"{'Enabled' if enabled else 'Disabled'} tracking for celebrity: {row['name']}", "SUCCESS", module="Celebrity" ) return {"success": True, "enabled": enabled} # ============================================================================ # SEARCH PRESET ENDPOINTS # ============================================================================ @router.get("/presets/categories") @limiter.limit("60/minute") @handle_exceptions async def get_preset_categories( request: Request, current_user: Dict = Depends(get_current_user) ): """Get all preset categories with counts.""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute(''' SELECT COALESCE(category, 'other') as category, COUNT(*) as preset_count, SUM(CASE WHEN enabled = 1 THEN 1 ELSE 0 END) as enabled_count, SUM(results_count) as total_results FROM celebrity_search_presets GROUP BY COALESCE(category, 'other') ORDER BY category ''') categories = [] for row in cursor.fetchall(): categories.append({ 'name': row['category'], 'preset_count': row['preset_count'], 'enabled_count': row['enabled_count'], 'total_results': row['total_results'] or 0 }) return {"success": True, "categories": categories} @router.get("/presets") @limiter.limit("60/minute") @handle_exceptions async def get_search_presets( request: Request, celebrity_id: Optional[int] = None, category: Optional[str] = None, platform: Optional[str] = None, # Filter by platform exclude_source_type: Optional[str] = None, # Exclude specific source type (e.g. youtube_monitor) search: Optional[str] = None, # Search in name, celebrity_name, source_value enabled_only: bool = Query(False), current_user: Dict = Depends(get_current_user) ): """Get search presets, optionally filtered by celebrity, category, platform, or search query.""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() query = ''' SELECT sp.*, cp.name as celebrity_name FROM celebrity_search_presets sp JOIN celebrity_profiles cp ON sp.celebrity_id = cp.id WHERE 1=1 ''' params = [] if celebrity_id: query += ' AND sp.celebrity_id = ?' params.append(celebrity_id) if category: query += ' AND sp.category = ?' params.append(category) if platform: query += ' AND sp.platform = ?' params.append(platform) if exclude_source_type: query += ' AND sp.source_type != ?' params.append(exclude_source_type) if search: search_term = f'%{search}%' query += ' AND (sp.name LIKE ? OR cp.name LIKE ? OR sp.source_value LIKE ?)' params.extend([search_term, search_term, search_term]) if enabled_only: query += ' AND sp.enabled = 1' query += ' ORDER BY cp.name, sp.name' cursor.execute(query, params) presets = [] for row in cursor.fetchall(): presets.append({ 'id': row['id'], 'celebrity_id': row['celebrity_id'], 'celebrity_name': row['celebrity_name'], 'name': row['name'], 'source_type': row['source_type'], 'source_value': row['source_value'], 'keywords': json.loads(row['keywords']) if row['keywords'] else [], 'content_type': row['content_type'], 'category': row['category'] if 'category' in row.keys() else 'other', 'platform': row['platform'] if 'platform' in row.keys() else 'youtube', 'enabled': bool(row['enabled']), 'last_checked': row['last_checked'], 'check_frequency_hours': row['check_frequency_hours'], 'results_count': row['results_count'], 'created_at': row['created_at'] }) return {"success": True, "presets": presets} @router.post("/presets") @limiter.limit("30/minute") @handle_exceptions async def create_search_preset( request: Request, body: SearchPresetCreate, current_user: Dict = Depends(get_current_user) ): """Create a new search preset.""" app_state = get_app_state() valid_source_types = ['youtube_channel', 'youtube_search', 'youtube_rss', 'dailymotion_channel'] if body.source_type not in valid_source_types: raise ValidationError(f"Invalid source_type. Must be one of: {', '.join(valid_source_types)}") with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() # Verify celebrity exists cursor.execute('SELECT id FROM celebrity_profiles WHERE id = ?', (body.celebrity_id,)) if not cursor.fetchone(): raise RecordNotFoundError("Celebrity profile not found") cursor.execute(''' INSERT INTO celebrity_search_presets (celebrity_id, name, source_type, source_value, keywords, content_type, category, platform) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ''', ( body.celebrity_id, body.name, body.source_type, body.source_value, json.dumps(body.keywords) if body.keywords else None, body.content_type, body.category or 'other', body.platform or 'youtube' )) preset_id = cursor.lastrowid conn.commit() logger.info(f"Created search preset: {body.name}", module="Celebrity") return {"success": True, "preset_id": preset_id} @router.post("/presets/bulk") @limiter.limit("10/minute") @handle_exceptions async def create_bulk_presets( request: Request, presets: List[SearchPresetCreate] = Body(...), current_user: Dict = Depends(get_current_user) ): """Create multiple search presets at once.""" app_state = get_app_state() created_ids = [] with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() for preset in presets: if preset.source_type not in ['youtube_channel', 'youtube_search', 'youtube_rss']: continue cursor.execute(''' INSERT OR IGNORE INTO celebrity_search_presets (celebrity_id, name, source_type, source_value, keywords, content_type) VALUES (?, ?, ?, ?, ?, ?) ''', ( preset.celebrity_id, preset.name, preset.source_type, preset.source_value, json.dumps(preset.keywords) if preset.keywords else None, preset.content_type )) if cursor.lastrowid: created_ids.append(cursor.lastrowid) conn.commit() logger.info(f"Created {len(created_ids)} bulk presets", module="Celebrity") return {"success": True, "created_count": len(created_ids), "preset_ids": created_ids} @router.put("/presets/{preset_id}") @limiter.limit("30/minute") @handle_exceptions async def update_search_preset( request: Request, preset_id: int, body: SearchPresetUpdate, current_user: Dict = Depends(get_current_user) ): """Update a search preset.""" app_state = get_app_state() with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute('SELECT * FROM celebrity_search_presets WHERE id = ?', (preset_id,)) if not cursor.fetchone(): raise RecordNotFoundError("Search preset not found") updates = [] params = [] if body.name is not None: updates.append("name = ?") params.append(body.name) if body.source_type is not None: updates.append("source_type = ?") params.append(body.source_type) if body.source_value is not None: updates.append("source_value = ?") params.append(body.source_value) if body.keywords is not None: updates.append("keywords = ?") params.append(json.dumps(body.keywords)) if body.content_type is not None: updates.append("content_type = ?") params.append(body.content_type) if body.category is not None: updates.append("category = ?") params.append(body.category) if body.enabled is not None: updates.append("enabled = ?") params.append(1 if body.enabled else 0) if body.check_frequency_hours is not None: updates.append("check_frequency_hours = ?") params.append(body.check_frequency_hours) if body.platform is not None: updates.append("platform = ?") params.append(body.platform) if updates: updates.append("updated_at = CURRENT_TIMESTAMP") params.append(preset_id) cursor.execute(f''' UPDATE celebrity_search_presets SET {", ".join(updates)} WHERE id = ? ''', params) conn.commit() return {"success": True} @router.delete("/presets/{preset_id}") @limiter.limit("20/minute") @handle_exceptions async def delete_search_preset( request: Request, preset_id: int, current_user: Dict = Depends(get_current_user) ): """Delete a search preset.""" app_state = get_app_state() with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute('DELETE FROM celebrity_search_presets WHERE id = ?', (preset_id,)) if cursor.rowcount == 0: raise RecordNotFoundError("Search preset not found") conn.commit() return {"success": True} # ============================================================================ # SEARCH EXECUTION ENDPOINTS # ============================================================================ @router.post("/presets/{preset_id}/search") @limiter.limit("10/minute") @handle_exceptions async def execute_preset_search( request: Request, preset_id: int, background_tasks: BackgroundTasks, current_user: Dict = Depends(get_current_user) ): """Execute a search preset and discover videos.""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute(''' SELECT sp.*, cp.name as celebrity_name, cp.id as celebrity_id FROM celebrity_search_presets sp JOIN celebrity_profiles cp ON sp.celebrity_id = cp.id WHERE sp.id = ? ''', (preset_id,)) preset = cursor.fetchone() if not preset: raise RecordNotFoundError("Search preset not found") # Execute search based on source type videos = [] if preset['source_type'] == 'youtube_rss': videos = await fetch_youtube_rss(preset['source_value']) elif preset['source_type'] == 'youtube_search': videos = await search_youtube_ytdlp(preset['source_value'], max_results=30) elif preset['source_type'] == 'youtube_channel': # Build keyword filter from preset keywords keyword_filter = None if preset['keywords']: keywords = json.loads(preset['keywords']) if keywords: keyword_filter = '|'.join(keywords) videos = await get_channel_videos_ytdlp(preset['source_value'], keyword_filter, max_results=50) # Filter by keywords if present if preset['keywords']: keywords = json.loads(preset['keywords']) if keywords: filtered = [] for video in videos: title_lower = video.get('title', '').lower() desc_lower = video.get('description', '').lower() if any(kw.lower() in title_lower or kw.lower() in desc_lower for kw in keywords): filtered.append(video) videos = filtered # Filter to only include videos with celebrity name in the title celebrity_name = preset['celebrity_name'].lower() name_parts = celebrity_name.split() filtered_videos = [] for video in videos: title_lower = video.get('title', '').lower() # Stricter matching: require BOTH first AND last name, or the full name # This prevents "Eva Mendes" or "Eva Green" from matching "Eva Longoria" if len(name_parts) >= 2: first_name = name_parts[0] last_name = name_parts[-1] # Must have full name OR (first name AND last name separately) if celebrity_name in title_lower or (first_name in title_lower and last_name in title_lower): filtered_videos.append(video) else: # Single name - require exact match if celebrity_name in title_lower: filtered_videos.append(video) videos = filtered_videos # Filter out blocked YouTube channels celebrity_discovery_settings = app_state.settings.get('celebrity_discovery') or {} blocked_channels_str = celebrity_discovery_settings.get('blocked_youtube_channels', '') if blocked_channels_str: blocked_channels = set( name.strip().lower() for name in blocked_channels_str.split('\n') if name.strip() ) if blocked_channels: videos = [ v for v in videos if (v.get('channel_name') or '').lower() not in blocked_channels ] # Store discovered videos new_count = 0 with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() for video in videos: if not video.get('video_id'): continue # Detect content type content_type = detect_content_type( video.get('title', ''), video.get('description', '') ) # Parse upload date upload_date = None if video.get('upload_date'): try: if len(video['upload_date']) == 8: # YYYYMMDD format upload_date = datetime.strptime(video['upload_date'], '%Y%m%d').isoformat() else: upload_date = video['upload_date'] except (ValueError, TypeError) as e: logger.debug(f"Failed to parse upload_date '{video['upload_date']}': {e}") try: cursor.execute(''' INSERT OR IGNORE INTO celebrity_discovered_videos (preset_id, celebrity_id, video_id, platform, url, title, channel_name, channel_id, thumbnail, duration, upload_date, view_count, description, content_type) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', ( preset_id, preset['celebrity_id'], video['video_id'], video.get('platform', 'youtube'), video.get('url', ''), video.get('title', ''), video.get('channel_name', ''), video.get('channel_id', ''), video.get('thumbnail', ''), video.get('duration', 0), upload_date, video.get('view_count', 0), video.get('description', ''), content_type )) if cursor.rowcount > 0: new_count += 1 # Pre-cache thumbnail for faster page loading thumbnail_url = video.get('thumbnail', '') if thumbnail_url: await cache_thumbnail_async(video['video_id'], thumbnail_url, app_state.db) except Exception as e: logger.warning(f"Error storing video: {e}", module="Celebrity") # Update preset stats cursor.execute(''' UPDATE celebrity_search_presets SET last_checked = CURRENT_TIMESTAMP, results_count = (SELECT COUNT(*) FROM celebrity_discovered_videos WHERE preset_id = ?), updated_at = CURRENT_TIMESTAMP WHERE id = ? ''', (preset_id, preset_id)) conn.commit() logger.info(f"Preset search '{preset['name']}' found {len(videos)} videos, {new_count} new", "SUCCESS", module="Celebrity") return { "success": True, "videos_found": len(videos), "new_videos": new_count, "preset_name": preset['name'] } # In-memory storage for search job progress _search_jobs: Dict[str, Dict] = {} @router.post("/search-all") @limiter.limit("5/minute") @handle_exceptions async def search_all_presets( request: Request, celebrity_id: Optional[int] = Query(None), background: bool = Query(False), background_tasks: BackgroundTasks = None, current_user: Dict = Depends(get_current_user) ): """Execute all enabled search presets (or for a specific celebrity). If background=True, starts the search in background and returns a job_id for tracking. """ app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() if celebrity_id: cursor.execute(''' SELECT id, name FROM celebrity_search_presets WHERE celebrity_id = ? AND enabled = 1 ''', (celebrity_id,)) else: cursor.execute(''' SELECT id, name FROM celebrity_search_presets WHERE enabled = 1 ''') presets = [{'id': row['id'], 'name': row['name']} for row in cursor.fetchall()] if not background: # Synchronous execution (original behavior) results = [] for preset in presets: try: result = await execute_preset_search(request, preset['id'], BackgroundTasks(), current_user) results.append(result) except Exception as e: logger.warning(f"Preset {preset['id']} search failed: {e}", module="Celebrity") results.append({"preset_id": preset['id'], "error": str(e)}) total_new = sum(r.get('new_videos', 0) for r in results if 'new_videos' in r) # Enrich newly discovered videos with resolution data in background if total_new > 0: asyncio.create_task(enrich_videos_with_resolution(limit=min(total_new, 100))) return { "success": True, "presets_searched": len(presets), "total_new_videos": total_new, "results": results } else: # Background execution with progress tracking import uuid job_id = str(uuid.uuid4())[:8] # Initialize job progress _search_jobs[job_id] = { 'status': 'running', 'started_at': datetime.now().isoformat(), 'total_presets': len(presets), 'completed_presets': 0, 'current_preset': presets[0]['name'] if presets else '', 'total_new_videos': 0, 'results': [], 'errors': [] } # Start background task async def run_search(): job = _search_jobs[job_id] for i, preset in enumerate(presets): try: job['current_preset'] = preset['name'] result = await execute_preset_search(request, preset['id'], BackgroundTasks(), current_user) job['results'].append(result) job['total_new_videos'] += result.get('new_videos', 0) except Exception as e: logger.warning(f"Preset {preset['id']} search failed: {e}", module="Celebrity") job['errors'].append({'preset_id': preset['id'], 'preset_name': preset['name'], 'error': str(e)}) job['completed_presets'] = i + 1 job['status'] = 'completed' job['completed_at'] = datetime.now().isoformat() job['current_preset'] = '' # Enrich newly discovered videos with resolution data in background if job['total_new_videos'] > 0: asyncio.create_task(enrich_videos_with_resolution(limit=min(job['total_new_videos'], 100))) # Schedule background task asyncio.create_task(run_search()) return { "success": True, "job_id": job_id, "total_presets": len(presets), "message": "Search started in background" } @router.get("/search-jobs/{job_id}") @limiter.limit("60/minute") @handle_exceptions async def get_search_job_status( request: Request, job_id: str, current_user: Dict = Depends(get_current_user) ): """Get the status of a background search job.""" if job_id not in _search_jobs: raise RecordNotFoundError("Search job not found") return { "success": True, "job": _search_jobs[job_id] } @router.delete("/search-jobs/{job_id}") @limiter.limit("30/minute") @handle_exceptions async def delete_search_job( request: Request, job_id: str, current_user: Dict = Depends(get_current_user) ): """Delete a completed search job from memory.""" if job_id not in _search_jobs: raise RecordNotFoundError("Search job not found") del _search_jobs[job_id] return {"success": True} @router.post("/presets/all/discover-enrich") @limiter.limit("1/minute") @handle_exceptions async def run_discover_enrich_all( request: Request, background_tasks: BackgroundTasks, current_user: Dict = Depends(get_current_user) ): """Run discovery search and enrichment for ALL enabled presets.""" import subprocess app_state = get_app_state() # Count enabled presets with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute('SELECT COUNT(*) as count FROM celebrity_search_presets WHERE enabled = 1') result = cursor.fetchone() if not result or result['count'] == 0: raise RecordNotFoundError("No enabled presets found") preset_count = result['count'] # Run the script in background (no args = all enabled presets) def run_script(): try: result = subprocess.run( ['/opt/media-downloader/venv/bin/python3', '/opt/media-downloader/scripts/discover_and_enrich.py'], capture_output=True, text=True, timeout=7200 # 2 hour timeout for full scan ) logger.info(f"Full Discover & Enrich completed: {result.stdout[-500:] if result.stdout else 'No output'}", module="Celebrity") except subprocess.TimeoutExpired: logger.warning("Full Discover & Enrich timed out", module="Celebrity") except Exception as e: logger.error(f"Full Discover & Enrich failed: {e}", module="Celebrity") background_tasks.add_task(run_script) logger.info(f"Started Full Discover & Enrich ({preset_count} presets)", module="Celebrity") return { "success": True, "message": f"Full discovery and enrichment started ({preset_count} presets)", "preset_count": preset_count } class BatchPresetRequest(BaseModel): preset_ids: List[int] @router.post("/presets/batch/discover-enrich") @limiter.limit("2/minute") @handle_exceptions async def run_discover_enrich_batch( request: Request, body: BatchPresetRequest, background_tasks: BackgroundTasks, current_user: Dict = Depends(get_current_user) ): """Run discovery search and enrichment for a batch of selected presets.""" import subprocess preset_ids = body.preset_ids if not preset_ids: raise ValueError("No preset IDs provided") app_state = get_app_state() # Verify presets exist with app_state.db.get_connection() as conn: cursor = conn.cursor() placeholders = ','.join('?' * len(preset_ids)) cursor.execute(f'SELECT id, name FROM celebrity_search_presets WHERE id IN ({placeholders})', preset_ids) found_presets = cursor.fetchall() if len(found_presets) != len(preset_ids): raise RecordNotFoundError(f"Some presets not found") # Run the script in background with specific preset IDs def run_script(): try: result = subprocess.run( ['/opt/media-downloader/venv/bin/python3', '/opt/media-downloader/scripts/discover_and_enrich.py', '--preset-ids', ','.join(str(p) for p in preset_ids)], capture_output=True, text=True, timeout=3600 # 1 hour timeout for batch ) logger.info(f"Batch Discover & Enrich completed ({len(preset_ids)} presets): {result.stdout[-500:] if result.stdout else 'No output'}", module="Celebrity") except subprocess.TimeoutExpired: logger.warning(f"Batch Discover & Enrich timed out ({len(preset_ids)} presets)", module="Celebrity") except Exception as e: logger.error(f"Batch Discover & Enrich failed: {e}", module="Celebrity") background_tasks.add_task(run_script) logger.info(f"Started Batch Discover & Enrich ({len(preset_ids)} presets)", module="Celebrity") return { "success": True, "message": f"Batch discovery started ({len(preset_ids)} presets)", "preset_count": len(preset_ids) } @router.post("/presets/{preset_id}/discover-enrich") @limiter.limit("5/minute") @handle_exceptions async def run_discover_and_enrich( request: Request, preset_id: int, background_tasks: BackgroundTasks, current_user: Dict = Depends(get_current_user) ): """Run discovery search and enrichment for a specific preset using the discover_and_enrich.py script.""" import subprocess import uuid import re as regex app_state = get_app_state() # Verify preset exists with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute('SELECT id, name FROM celebrity_search_presets WHERE id = ?', (preset_id,)) preset = cursor.fetchone() if not preset: raise RecordNotFoundError("Search preset not found") # Create task ID task_id = str(uuid.uuid4())[:8] # Create task record with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute(''' INSERT INTO background_tasks (id, task_type, status, metadata) VALUES (?, 'discover_enrich', 'running', ?) ''', (task_id, json.dumps({'preset_id': preset_id, 'preset_name': preset['name']}))) conn.commit() # Run the script in background def run_script(): try: result = subprocess.run( ['/opt/media-downloader/venv/bin/python3', '/opt/media-downloader/scripts/discover_and_enrich.py', '--preset', str(preset_id)], capture_output=True, text=True, timeout=600 # 10 minute timeout ) # Parse results from output output = result.stdout or '' new_count = 0 match_count = 0 results_count = 0 # Look for "RESULTS: X results, Y match, Z new" format results_match = regex.search(r'RESULTS:\s*(\d+)\s+results,\s*(\d+)\s+match,\s*(\d+)\s+new', output) if results_match: results_count = int(results_match.group(1)) match_count = int(results_match.group(2)) new_count = int(results_match.group(3)) else: # Fallback to old patterns new_match = regex.search(r'(\d+)\s+new', output) if new_match: new_count = int(new_match.group(1)) match_match = regex.search(r'(\d+)\s+match', output) if match_match: match_count = int(match_match.group(1)) # Update task as completed with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute(''' UPDATE background_tasks SET status = 'completed', completed_at = CURRENT_TIMESTAMP, result = ? WHERE id = ? ''', (json.dumps({'results_count': results_count, 'new_count': new_count, 'match_count': match_count, 'output': output[-500:]}), task_id)) conn.commit() logger.info(f"Discover & Enrich completed for preset {preset_id}: {results_count} results, {match_count} match, {new_count} new", module="Celebrity") except subprocess.TimeoutExpired: with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute(''' UPDATE background_tasks SET status = 'failed', completed_at = CURRENT_TIMESTAMP, result = ? WHERE id = ? ''', (json.dumps({'error': 'Timeout after 10 minutes'}), task_id)) conn.commit() logger.warning(f"Discover & Enrich timed out for preset {preset_id}", module="Celebrity") except Exception as e: with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute(''' UPDATE background_tasks SET status = 'failed', completed_at = CURRENT_TIMESTAMP, result = ? WHERE id = ? ''', (json.dumps({'error': str(e)}), task_id)) conn.commit() logger.error(f"Discover & Enrich failed for preset {preset_id}: {e}", module="Celebrity") background_tasks.add_task(run_script) logger.info(f"Started Discover & Enrich for preset: {preset['name']}", module="Celebrity") return { "success": True, "message": f"Discovery and enrichment started for preset '{preset['name']}'", "preset_id": preset_id, "task_id": task_id } @router.get("/tasks/{task_id}") @handle_exceptions async def get_task_status( request: Request, task_id: str, current_user: Dict = Depends(get_current_user) ): """Get background task status.""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute('SELECT * FROM background_tasks WHERE id = ?', (task_id,)) task = cursor.fetchone() if not task: raise RecordNotFoundError("Task not found") return { "success": True, "task": { "id": task['id'], "task_type": task['task_type'], "status": task['status'], "started_at": task['started_at'], "completed_at": task['completed_at'], "result": json.loads(task['result']) if task['result'] else None, "metadata": json.loads(task['metadata']) if task['metadata'] else None } } @router.post("/presets/category/{category}/discover-enrich") @limiter.limit("2/minute") @handle_exceptions async def run_discover_enrich_category( request: Request, category: str, background_tasks: BackgroundTasks, current_user: Dict = Depends(get_current_user) ): """Run discovery search and enrichment for all presets in a category.""" import subprocess app_state = get_app_state() # Verify category has presets with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute('SELECT COUNT(*) as count FROM celebrity_search_presets WHERE category = ? AND enabled = 1', (category,)) result = cursor.fetchone() if not result or result['count'] == 0: raise RecordNotFoundError(f"No enabled presets found in category '{category}'") preset_count = result['count'] # Run the script in background def run_script(): try: result = subprocess.run( ['/opt/media-downloader/venv/bin/python3', '/opt/media-downloader/scripts/discover_and_enrich.py', '--category', category], capture_output=True, text=True, timeout=1800 # 30 minute timeout for category ) logger.info(f"Discover & Enrich completed for category {category}: {result.stdout[-500:] if result.stdout else 'No output'}", module="Celebrity") except subprocess.TimeoutExpired: logger.warning(f"Discover & Enrich timed out for category {category}", module="Celebrity") except Exception as e: logger.error(f"Discover & Enrich failed for category {category}: {e}", module="Celebrity") background_tasks.add_task(run_script) logger.info(f"Started Discover & Enrich for category: {category} ({preset_count} presets)", module="Celebrity") return { "success": True, "message": f"Discovery and enrichment started for category '{category}' ({preset_count} presets)", "category": category, "preset_count": preset_count } # ============================================================================ # CATEGORY ENDPOINTS # ============================================================================ @router.get("/categories") @limiter.limit("60/minute") @handle_exceptions async def get_categories_with_video_counts( request: Request, celebrity_id: Optional[int] = None, current_user: Dict = Depends(get_current_user) ): """Get preset categories with video counts.""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() # Build query based on filters if celebrity_id: cursor.execute(''' SELECT p.category, COUNT(DISTINCT p.id) as preset_count, COUNT(v.id) as video_count, SUM(CASE WHEN v.status = 'new' THEN 1 ELSE 0 END) as new_count FROM celebrity_search_presets p LEFT JOIN celebrity_discovered_videos v ON p.id = v.preset_id WHERE p.celebrity_id = ? GROUP BY p.category ORDER BY video_count DESC ''', (celebrity_id,)) else: cursor.execute(''' SELECT p.category, COUNT(DISTINCT p.id) as preset_count, COUNT(v.id) as video_count, SUM(CASE WHEN v.status = 'new' THEN 1 ELSE 0 END) as new_count FROM celebrity_search_presets p LEFT JOIN celebrity_discovered_videos v ON p.id = v.preset_id GROUP BY p.category ORDER BY video_count DESC ''') categories = [] for row in cursor.fetchall(): categories.append({ 'category': row['category'] or 'other', 'preset_count': row['preset_count'], 'video_count': row['video_count'], 'new_count': row['new_count'] or 0 }) return {"success": True, "categories": categories} # ============================================================================ # DISCOVERED VIDEOS ENDPOINTS # ============================================================================ @router.get("/sources") @limiter.limit("60/minute") @handle_exceptions async def get_unique_sources( request: Request, celebrity_id: Optional[int] = None, category: Optional[str] = None, preset_ids: Optional[str] = None, status: Optional[str] = None, watched: Optional[str] = None, platform: Optional[str] = None, current_user: Dict = Depends(get_current_user) ): """Get unique channel names (sources) for filter dropdown, filtered by current criteria.""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() query = ''' SELECT DISTINCT v.channel_name FROM celebrity_discovered_videos v JOIN celebrity_search_presets sp ON v.preset_id = sp.id WHERE v.channel_name IS NOT NULL AND v.channel_name != '' ''' params = [] if celebrity_id: query += ' AND v.celebrity_id = ?' params.append(celebrity_id) if category: query += ' AND sp.category = ?' params.append(category) if preset_ids: try: id_list = [int(x.strip()) for x in preset_ids.split(',') if x.strip()] if id_list: placeholders = ','.join(['?' for _ in id_list]) query += f' AND v.preset_id IN ({placeholders})' params.extend(id_list) except ValueError: pass if status: if status == 'not_queued': query += " AND v.status NOT IN ('queued', 'downloaded')" elif status == 'not_downloaded': query += " AND v.status != 'downloaded'" else: query += ' AND v.status = ?' params.append(status) if watched: if watched == 'watched': query += " AND v.status = 'watched'" elif watched == 'unwatched': query += " AND v.status != 'watched'" if platform: query += ' AND v.platform = ?' params.append(platform) query += ' ORDER BY v.channel_name' cursor.execute(query, params) sources = [row['channel_name'] for row in cursor.fetchall()] return {"success": True, "sources": sources} @router.get("/videos") @limiter.limit("60/minute") @handle_exceptions async def get_discovered_videos( request: Request, celebrity_id: Optional[int] = None, preset_id: Optional[int] = None, preset_ids: Optional[str] = None, # Comma-separated list of preset IDs for grouped presets category: Optional[str] = None, status: Optional[str] = None, watched: Optional[str] = None, # 'watched' or 'unwatched' channel_name: Optional[str] = None, # Filter by YouTube channel/source platform: Optional[str] = None, # Filter by platform (youtube, dailymotion) content_type: Optional[str] = None, min_resolution: Optional[int] = None, resolution_sort: Optional[str] = None, # 'highest' or 'lowest' date_sort: Optional[str] = None, # 'discovered_newest', 'discovered_oldest', 'uploaded_newest', 'uploaded_oldest' name_sort: Optional[str] = None, # 'name_asc' or 'name_desc' search: Optional[str] = None, # Search in title and channel_name limit: int = Query(50, ge=1, le=200), offset: int = Query(0, ge=0), current_user: Dict = Depends(get_current_user) ): """Get discovered videos with filters.""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() query = ''' SELECT v.*, cp.name as celebrity_name, sp.name as preset_name, sp.category as preset_category FROM celebrity_discovered_videos v JOIN celebrity_profiles cp ON v.celebrity_id = cp.id JOIN celebrity_search_presets sp ON v.preset_id = sp.id WHERE 1=1 ''' count_query = ''' SELECT COUNT(*) FROM celebrity_discovered_videos v JOIN celebrity_search_presets sp ON v.preset_id = sp.id WHERE 1=1 ''' params = [] if celebrity_id: query += ' AND v.celebrity_id = ?' count_query += ' AND v.celebrity_id = ?' params.append(celebrity_id) if preset_id: query += ' AND v.preset_id = ?' count_query += ' AND v.preset_id = ?' params.append(preset_id) # Support multiple preset IDs (comma-separated) for grouped presets if preset_ids: try: id_list = [int(x.strip()) for x in preset_ids.split(',') if x.strip()] if id_list: placeholders = ','.join(['?' for _ in id_list]) query += f' AND v.preset_id IN ({placeholders})' count_query += f' AND v.preset_id IN ({placeholders})' params.extend(id_list) except ValueError: pass # Invalid preset_ids format, ignore if category: query += ' AND sp.category = ?' count_query += ' AND sp.category = ?' params.append(category) if status: if status == 'not_queued': # Show videos that are not queued or downloaded query += " AND v.status NOT IN ('queued', 'downloaded')" count_query += " AND v.status NOT IN ('queued', 'downloaded')" elif status == 'not_downloaded': # Show videos that are not downloaded (includes queued, new, ignored) query += " AND v.status != 'downloaded'" count_query += " AND v.status != 'downloaded'" else: query += ' AND v.status = ?' count_query += ' AND v.status = ?' params.append(status) if content_type: query += ' AND v.content_type = ?' count_query += ' AND v.content_type = ?' params.append(content_type) if min_resolution: query += ' AND v.max_resolution >= ?' count_query += ' AND v.max_resolution >= ?' params.append(min_resolution) if watched: if watched == 'watched': query += " AND v.status = 'watched'" count_query += " AND v.status = 'watched'" elif watched == 'unwatched': query += " AND v.status != 'watched'" count_query += " AND v.status != 'watched'" if channel_name: # Support pattern-based filtering for Easynews TV/Movies channel_name_stripped = channel_name.strip() if channel_name_stripped == 'Easynews - TV': query += " AND v.channel_name LIKE 'Easynews - tv:%'" count_query += " AND v.channel_name LIKE 'Easynews - tv:%'" elif channel_name_stripped == 'Easynews - Movies': query += " AND v.channel_name LIKE 'Easynews - movie:%'" count_query += " AND v.channel_name LIKE 'Easynews - movie:%'" else: query += ' AND v.channel_name = ?' count_query += ' AND v.channel_name = ?' params.append(channel_name) if platform: query += ' AND v.platform = ?' count_query += ' AND v.platform = ?' params.append(platform) if search: search_term = f'%{search}%' query += ' AND (v.title LIKE ? OR v.channel_name LIKE ? OR v.url LIKE ? OR v.description LIKE ?)' count_query += ' AND (v.title LIKE ? OR v.channel_name LIKE ? OR v.url LIKE ? OR v.description LIKE ?)' params.extend([search_term, search_term, search_term, search_term]) # Get total count cursor.execute(count_query, params) total = cursor.fetchone()[0] # Get videos with ordering # Date sort takes precedence if specified if date_sort == 'discovered_newest': query += ' ORDER BY v.discovered_at DESC LIMIT ? OFFSET ?' elif date_sort == 'discovered_oldest': query += ' ORDER BY v.discovered_at ASC LIMIT ? OFFSET ?' elif date_sort == 'uploaded_newest': query += ' ORDER BY COALESCE(v.upload_date, "1970-01-01") DESC, v.discovered_at DESC LIMIT ? OFFSET ?' elif date_sort == 'uploaded_oldest': query += ' ORDER BY CASE WHEN v.upload_date IS NULL THEN 1 ELSE 0 END, v.upload_date ASC, v.discovered_at DESC LIMIT ? OFFSET ?' elif date_sort == 'watched_newest': query += ' ORDER BY COALESCE(v.status_updated_at, v.discovered_at) DESC LIMIT ? OFFSET ?' elif resolution_sort == 'highest': query += ' ORDER BY COALESCE(v.max_resolution, 0) DESC, v.discovered_at DESC LIMIT ? OFFSET ?' elif resolution_sort == 'lowest': query += ' ORDER BY CASE WHEN v.max_resolution IS NULL THEN 1 ELSE 0 END, v.max_resolution ASC, v.discovered_at DESC LIMIT ? OFFSET ?' elif name_sort == 'name_asc': query += ' ORDER BY v.title ASC, v.discovered_at DESC LIMIT ? OFFSET ?' elif name_sort == 'name_desc': query += ' ORDER BY v.title DESC, v.discovered_at DESC LIMIT ? OFFSET ?' else: query += ' ORDER BY v.discovered_at DESC LIMIT ? OFFSET ?' params.extend([limit, offset]) cursor.execute(query, params) videos = [] for row in cursor.fetchall(): videos.append({ 'id': row['id'], 'preset_id': row['preset_id'], 'preset_name': row['preset_name'], 'preset_category': row['preset_category'] if 'preset_category' in row.keys() else 'other', 'celebrity_id': row['celebrity_id'], 'celebrity_name': row['celebrity_name'], 'video_id': row['video_id'], 'platform': row['platform'], 'url': row['url'], 'title': row['title'], 'channel_name': row['channel_name'], 'channel_id': row['channel_id'], 'thumbnail': row['thumbnail'], 'duration': row['duration'], 'upload_date': row['upload_date'], 'view_count': row['view_count'], 'description': row['description'], 'content_type': row['content_type'], 'status': row['status'], 'discovered_at': row['discovered_at'], 'downloaded_path': row['downloaded_path'], 'max_resolution': row['max_resolution'] if 'max_resolution' in row.keys() else None, 'max_width': row['max_width'] if 'max_width' in row.keys() else None, 'metadata': row['metadata'] if 'metadata' in row.keys() else None }) # Filter out blocked YouTube channels celebrity_discovery_settings = app_state.settings.get('celebrity_discovery') or {} blocked_channels_str = celebrity_discovery_settings.get('blocked_youtube_channels', '') if blocked_channels_str: # Parse newline-separated list of blocked channel names (case-insensitive) blocked_channels = set( name.strip().lower() for name in blocked_channels_str.split('\n') if name.strip() ) if blocked_channels: original_count = len(videos) videos = [ v for v in videos if (v.get('channel_name') or '').lower() not in blocked_channels ] filtered_count = original_count - len(videos) if filtered_count > 0: # Adjust total count for filtered results total = max(0, total - filtered_count) return { "success": True, "videos": videos, "total": total, "limit": limit, "offset": offset } @router.put("/videos/{video_id}/status") @limiter.limit("60/minute") @handle_exceptions async def update_video_status( request: Request, video_id: int, body: VideoStatusUpdate, current_user: Dict = Depends(get_current_user) ): """Update the status of a discovered video.""" app_state = get_app_state() valid_statuses = ['new', 'queued', 'downloaded', 'ignored', 'watched'] if body.status not in valid_statuses: raise ValidationError(f"Invalid status. Must be one of: {', '.join(valid_statuses)}") with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute(''' UPDATE celebrity_discovered_videos SET status = ?, status_updated_at = CURRENT_TIMESTAMP WHERE id = ? ''', (body.status, video_id)) if cursor.rowcount == 0: raise RecordNotFoundError("Video not found") conn.commit() return {"success": True} @router.get("/videos/{video_id}/stream") @handle_exceptions async def stream_discovered_video( request: Request, video_id: int, current_user: Dict = Depends(get_current_user) ): """ Stream a downloaded discovered video file. Returns the video file with proper Range support for seeking. Only works for videos that have been downloaded (have downloaded_path). """ import os from starlette.responses import StreamingResponse app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute('SELECT downloaded_path FROM celebrity_discovered_videos WHERE id = ?', (video_id,)) row = cursor.fetchone() if not row: raise RecordNotFoundError("Video not found") file_path = row['downloaded_path'] if not file_path or not os.path.exists(file_path): raise RecordNotFoundError("Downloaded file not found") file_size = os.path.getsize(file_path) # Determine content type ext = os.path.splitext(file_path)[1].lower() content_type = { '.mp4': 'video/mp4', '.webm': 'video/webm', '.mkv': 'video/x-matroska', '.mov': 'video/quicktime', '.avi': 'video/x-msvideo', }.get(ext, 'video/mp4') # Handle Range requests for seeking range_header = request.headers.get("Range") start = 0 end = file_size - 1 if range_header: range_match = range_header.replace("bytes=", "").split("-") if range_match[0]: start = int(range_match[0]) if len(range_match) > 1 and range_match[1]: end = min(int(range_match[1]), file_size - 1) content_length = end - start + 1 def file_stream_generator(): with open(file_path, 'rb') as f: f.seek(start) remaining = content_length while remaining > 0: chunk_size = min(65536, remaining) chunk = f.read(chunk_size) if not chunk: break remaining -= len(chunk) yield chunk headers = { "Accept-Ranges": "bytes", "Content-Length": str(content_length), "Cache-Control": "private, max-age=3600", } if range_header: headers["Content-Range"] = f"bytes {start}-{end}/{file_size}" return StreamingResponse( file_stream_generator(), status_code=206, media_type=content_type, headers=headers ) return StreamingResponse( file_stream_generator(), media_type=content_type, headers=headers ) @router.put("/videos/bulk-status") @limiter.limit("30/minute") @handle_exceptions async def update_bulk_video_status( request: Request, body: BulkVideoStatusUpdate, current_user: Dict = Depends(get_current_user) ): """Update status for multiple videos.""" app_state = get_app_state() valid_statuses = ['new', 'queued', 'downloaded', 'ignored', 'watched'] if body.status not in valid_statuses: raise ValidationError(f"Invalid status. Must be one of: {', '.join(valid_statuses)}") with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() placeholders = ','.join('?' * len(body.video_ids)) cursor.execute(f''' UPDATE celebrity_discovered_videos SET status = ?, status_updated_at = CURRENT_TIMESTAMP WHERE id IN ({placeholders}) ''', [body.status] + body.video_ids) updated = cursor.rowcount conn.commit() return {"success": True, "updated_count": updated} @router.delete("/videos/{video_id}") @limiter.limit("30/minute") @handle_exceptions async def delete_discovered_video( request: Request, video_id: int, current_user: Dict = Depends(get_current_user) ): """Delete a discovered video.""" app_state = get_app_state() with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute('DELETE FROM celebrity_discovered_videos WHERE id = ?', (video_id,)) if cursor.rowcount == 0: raise RecordNotFoundError("Video not found") conn.commit() return {"success": True} @router.post("/cleanup-unavailable") @limiter.limit("5/minute") @handle_exceptions async def cleanup_unavailable_videos( request: Request, limit: int = 100, check_all: bool = False, current_user: Dict = Depends(get_current_user) ): """Check and delete videos that are no longer available on YouTube. Args: limit: Maximum number of videos to check check_all: If True, check ALL videos (not just those without resolution) """ if check_all: # Check all videos regardless of resolution status asyncio.create_task(_cleanup_all_videos(limit=limit)) else: # Only check videos without resolution (faster, catches most issues) asyncio.create_task(enrich_videos_with_resolution(limit=limit, delete_unavailable=True)) return { "success": True, "message": f"Cleanup started - checking up to {limit} videos for availability" } async def _cleanup_all_videos(limit: int = 100): """Check all videos for availability and delete unavailable ones.""" app_state = get_app_state() try: with app_state.db.get_connection() as conn: cursor = conn.cursor() # Get oldest videos first (more likely to be unavailable) cursor.execute(''' SELECT id, video_id, title, platform FROM celebrity_discovered_videos ORDER BY discovered_at ASC LIMIT ? ''', (limit,)) videos = [dict(row) for row in cursor.fetchall()] if not videos: return logger.info(f"Checking {len(videos)} videos for availability", module="Celebrity") deleted = 0 checked = 0 age_restricted = 0 for video in videos: try: # Only check YouTube videos (dailymotion would need different handling) if video.get('platform', 'youtube') != 'youtube': continue metadata = await fetch_video_metadata(video['video_id']) checked += 1 if metadata.get('_error') == 'age_restricted': # Age-restricted video - keep it age_restricted += 1 logger.debug(f"Age-restricted video (keeping): {video['title'][:50]}... ({video['video_id']})", module="Celebrity") elif metadata.get('_error') in ('unavailable', 'private', 'removed'): # Truly unavailable - delete it with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute('DELETE FROM celebrity_discovered_videos WHERE id = ?', (video['id'],)) conn.commit() deleted += 1 logger.info(f"Deleted {metadata.get('_error')} video: {video['title'][:50]}... ({video['video_id']})", module="Celebrity") elif not metadata or (not metadata.get('max_resolution') and not metadata.get('_error')): # Empty response - likely unavailable with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute('DELETE FROM celebrity_discovered_videos WHERE id = ?', (video['id'],)) conn.commit() deleted += 1 logger.info(f"Deleted unavailable video: {video['title'][:50]}... ({video['video_id']})", module="Celebrity") # Rate limit to avoid API issues await asyncio.sleep(0.5) except Exception as e: logger.warning(f"Error checking video {video['video_id']}: {e}", module="Celebrity") continue log_msg = f"Availability check complete: checked {checked}, deleted {deleted} unavailable" if age_restricted > 0: log_msg += f", {age_restricted} age-restricted (kept)" logger.info(log_msg, module="Celebrity") except Exception as e: logger.error(f"Availability cleanup failed: {e}", module="Celebrity") # ============================================================================ # STATS ENDPOINTS # ============================================================================ @router.get("/stats") @limiter.limit("60/minute") @handle_exceptions async def get_celebrity_stats( request: Request, current_user: Dict = Depends(get_current_user) ): """Get overall celebrity discovery statistics.""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() # Total counts cursor.execute('SELECT COUNT(*) FROM celebrity_profiles') total_celebrities = cursor.fetchone()[0] cursor.execute('SELECT COUNT(*) FROM celebrity_search_presets') total_presets = cursor.fetchone()[0] # Count active monitored YouTube channels cursor.execute("SELECT COUNT(*) FROM youtube_channel_monitors WHERE status = 'active'") monitored_channels = cursor.fetchone()[0] cursor.execute('SELECT COUNT(*) FROM celebrity_discovered_videos') total_videos = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM celebrity_discovered_videos WHERE status = 'new'") new_videos = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM celebrity_discovered_videos WHERE status = 'downloaded'") downloaded_videos = cursor.fetchone()[0] # Videos by content type cursor.execute(''' SELECT content_type, COUNT(*) as count FROM celebrity_discovered_videos GROUP BY content_type ''') by_content_type = {row['content_type']: row['count'] for row in cursor.fetchall()} # Videos by status cursor.execute(''' SELECT status, COUNT(*) as count FROM celebrity_discovered_videos GROUP BY status ''') by_status = {row['status']: row['count'] for row in cursor.fetchall()} # Recent discoveries cursor.execute(''' SELECT v.*, cp.name as celebrity_name FROM celebrity_discovered_videos v JOIN celebrity_profiles cp ON v.celebrity_id = cp.id ORDER BY v.discovered_at DESC LIMIT 10 ''') recent = [] for row in cursor.fetchall(): recent.append({ 'id': row['id'], 'title': row['title'], 'celebrity_name': row['celebrity_name'], 'channel_name': row['channel_name'], 'thumbnail': row['thumbnail'], 'discovered_at': row['discovered_at'] }) return { "success": True, "stats": { "total_celebrities": total_celebrities, "total_presets": total_presets, "monitored_channels": monitored_channels, "total_videos": total_videos, "new_videos": new_videos, "downloaded_videos": downloaded_videos, "by_content_type": by_content_type, "by_status": by_status }, "recent_discoveries": recent } @router.post("/fetch-dates") @limiter.limit("2/minute") @handle_exceptions async def fetch_missing_dates( request: Request, limit: int = Query(50, ge=1, le=200), current_user: Dict = Depends(get_current_user) ): """Fetch upload dates for videos that don't have them.""" app_state = get_app_state() # Get videos without dates with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute(''' SELECT id, video_id FROM celebrity_discovered_videos WHERE upload_date IS NULL ORDER BY discovered_at DESC LIMIT ? ''', (limit,)) videos = cursor.fetchall() if not videos: return {"success": True, "message": "All videos have dates", "updated": 0} updated = 0 for video in videos: try: metadata = await fetch_video_metadata(video['video_id']) if metadata.get('upload_date'): # Parse and store the date upload_date = metadata['upload_date'] if len(upload_date) == 8: # YYYYMMDD format upload_date = datetime.strptime(upload_date, '%Y%m%d').isoformat() with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute(''' UPDATE celebrity_discovered_videos SET upload_date = ?, view_count = COALESCE(?, view_count), duration = COALESCE(?, duration) WHERE id = ? ''', (upload_date, metadata.get('view_count'), metadata.get('duration'), video['id'])) conn.commit() updated += 1 # Small delay to avoid rate limiting await asyncio.sleep(0.5) except Exception as e: logger.warning(f"Failed to fetch date for video {video['id']}: {e}", module="Celebrity") continue return { "success": True, "message": f"Updated {updated} of {len(videos)} videos", "updated": updated, "total_checked": len(videos) } # ============================================================================ # THUMBNAIL CACHING ENDPOINTS # ============================================================================ async def download_and_cache_thumbnail(thumbnail_url: str) -> Optional[bytes]: """Download a thumbnail and return the binary data for caching. Always returns JPEG format. Converts webp to jpg if needed. For YouTube, prefers jpg URL over webp. """ if not thumbnail_url: return None # For YouTube, convert webp URLs to jpg URLs url_to_fetch = thumbnail_url if 'ytimg.com' in thumbnail_url: # Convert vi_webp to vi for jpg format url_to_fetch = thumbnail_url.replace('/vi_webp/', '/vi/') # Also try to get higher quality by using hqdefault if we have sddefault if 'sddefault' in url_to_fetch: url_to_fetch = url_to_fetch.replace('sddefault', 'hqdefault') try: async with httpx.AsyncClient(timeout=10.0) as client: response = await client.get( url_to_fetch, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } ) if response.status_code == 200: content = response.content content_type = response.headers.get('content-type', '') # Convert webp to jpg if needed if 'webp' in content_type or url_to_fetch.endswith('.webp'): try: from PIL import Image import io img = Image.open(io.BytesIO(content)) if img.mode in ('RGBA', 'P'): img = img.convert('RGB') output = io.BytesIO() img.save(output, format='JPEG', quality=85) content = output.getvalue() except Exception as e: logger.warning(f"Failed to convert webp to jpg: {e}", module="Celebrity") return content except Exception as e: logger.warning(f"Failed to cache thumbnail: {e}", module="Celebrity") return None @router.get("/thumbnail/{video_id}") @limiter.limit("500/minute") @handle_exceptions async def get_celebrity_video_thumbnail( request: Request, video_id: str, current_user: Dict = Depends(get_current_user) ): """ Serve cached video thumbnail from database. Falls back to fetching and caching if not available. """ app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute(''' SELECT thumbnail_data, thumbnail FROM celebrity_discovered_videos WHERE video_id = ? ''', (video_id,)) row = cursor.fetchone() if not row: raise RecordNotFoundError("Video not found", {"video_id": video_id}) thumbnail_data = row['thumbnail_data'] thumbnail_url = row['thumbnail'] # Serve cached data if available if thumbnail_data: return Response( content=thumbnail_data, media_type='image/jpeg', headers={ 'Cache-Control': 'public, max-age=86400', 'Access-Control-Allow-Origin': '*' } ) # Fetch and cache if not available if thumbnail_url: thumbnail_data = await download_and_cache_thumbnail(thumbnail_url) if thumbnail_data: # Cache in database with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute(''' UPDATE celebrity_discovered_videos SET thumbnail_data = ? WHERE video_id = ? ''', (thumbnail_data, video_id)) conn.commit() return Response( content=thumbnail_data, media_type='image/jpeg', headers={ 'Cache-Control': 'public, max-age=86400', 'Access-Control-Allow-Origin': '*' } ) # If all else fails, return 404 raise RecordNotFoundError("Thumbnail not available", {"video_id": video_id}) # ============================================================================ # DOWNLOAD QUEUE ENDPOINTS # ============================================================================ @router.post("/queue/add") @limiter.limit("60/minute") @handle_exceptions async def add_video_to_download_queue( request: Request, current_user: Dict = Depends(get_current_user) ): """ Add a celebrity discovered video to the main download queue. This updates the video status to 'queued' and adds it to the video_download_queue table. """ data = await request.json() app_state = get_app_state() video_id = data.get('video_id') url = data.get('url') if not video_id or not url: raise ValidationError("video_id and url are required") with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() # Check if already in queue cursor.execute(''' SELECT id FROM video_download_queue WHERE video_id = ? AND platform = ? ''', (video_id, data.get('platform', 'youtube'))) if cursor.fetchone(): raise ValidationError("Video already exists in download queue") # Build metadata JSON with extra info metadata_json = json.dumps({ 'thumbnail': data.get('thumbnail', ''), 'view_count': data.get('view_count', 0) }) # Add to video_download_queue table cursor.execute(''' INSERT INTO video_download_queue ( platform, video_id, url, title, channel_name, thumbnail, duration, upload_date, view_count, max_resolution, description, source_type, source_id, source_name, priority, metadata ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', ( data.get('platform', 'youtube'), video_id, url, data.get('title', ''), data.get('channel_name', ''), data.get('thumbnail', ''), data.get('duration', 0), data.get('upload_date'), data.get('view_count', 0), data.get('max_resolution'), data.get('description', ''), 'celebrity', data.get('source_id'), data.get('source_name', ''), 5, metadata_json )) # Update celebrity_discovered_videos status cursor.execute(''' UPDATE celebrity_discovered_videos SET status = 'queued', status_updated_at = CURRENT_TIMESTAMP WHERE video_id = ? ''', (video_id,)) conn.commit() return {"success": True, "message": "Video added to download queue"} @router.post("/queue/bulk-add") @limiter.limit("30/minute") @handle_exceptions async def bulk_add_videos_to_download_queue( request: Request, current_user: Dict = Depends(get_current_user) ): """ Add multiple celebrity discovered videos to the main download queue. """ data = await request.json() items = data.get('items', []) if not items: raise ValidationError("items array is required") app_state = get_app_state() added_count = 0 skipped_count = 0 with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() for item in items: video_id = item.get('video_id') url = item.get('url') if not video_id or not url: skipped_count += 1 continue # Check if already in queue cursor.execute(''' SELECT id FROM video_download_queue WHERE video_id = ? AND platform = ? ''', (video_id, item.get('platform', 'youtube'))) if cursor.fetchone(): skipped_count += 1 continue try: # Build metadata JSON with extra info metadata_json = json.dumps({ 'thumbnail': item.get('thumbnail', ''), 'view_count': item.get('view_count', 0) }) # Add to video_download_queue table cursor.execute(''' INSERT INTO video_download_queue ( platform, video_id, url, title, channel_name, thumbnail, duration, upload_date, view_count, max_resolution, description, source_type, source_id, source_name, priority, metadata ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', ( item.get('platform', 'youtube'), video_id, url, item.get('title', ''), item.get('channel_name', ''), item.get('thumbnail', ''), item.get('duration', 0), item.get('upload_date'), item.get('view_count', 0), item.get('max_resolution'), item.get('description', ''), 'celebrity', item.get('source_id'), item.get('source_name', ''), 5, metadata_json )) # Update celebrity_discovered_videos status cursor.execute(''' UPDATE celebrity_discovered_videos SET status = 'queued', status_updated_at = CURRENT_TIMESTAMP WHERE video_id = ? ''', (video_id,)) added_count += 1 except Exception as e: logger.warning(f"Error adding video {video_id} to queue: {e}", module="Celebrity") skipped_count += 1 conn.commit() return { "success": True, "added_count": added_count, "skipped_count": skipped_count, "message": f"Added {added_count} videos to download queue" }