media-downloader/modules/universal_video_downloader.py

#!/usr/bin/env python3
"""
Universal Video Downloader Module - Downloads videos from YouTube, Vimeo, Dailymotion, Bilibili, and more
"""

import os
import re
import json
import subprocess
import hashlib
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional, Tuple
from modules.universal_logger import get_logger

logger = get_logger('UniversalVideoDownloader')

# Cookie/auth error patterns that indicate expired or invalid cookies
COOKIE_ERROR_PATTERNS = [
    r'sign in to confirm',
    r'login required',
    r'cookies.*expired',
    r'please sign in',
    r'authentication required',
    r'private video',
    r'video is unavailable.*sign in',
    r'age-restricted.*sign in',
    r'members-only content',
    r'this video is available to this channel',
    r'confirm your age',
]

# Browser User-Agent strings (updated Dec 2024)
BROWSER_USER_AGENTS = {
    'edge': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
    'chrome': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
    'firefox': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
    'safari': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15',
}

# Default anti-bot settings
DEFAULT_ANTIBOT_SETTINGS = {
    'browser': 'edge',
    'custom_user_agent': '',
    'limit_rate': '2M',
    'throttled_rate': '100K',
    'sleep_requests_min': 1,
    'sleep_requests_max': 3,
    'retries': 10,
    'fragment_retries': 10,
    'concurrent_fragments': 1,
    'socket_timeout': 30,
    'enabled': True,
}


def is_cookie_error(output: str) -> bool:
    """Check if output contains cookie/auth error patterns."""
    if not output:
        return False
    output_lower = output.lower()
    for pattern in COOKIE_ERROR_PATTERNS:
        if re.search(pattern, output_lower):
            return True
    return False


def get_antibot_settings(unified_db) -> dict:
    """Get anti-bot settings from database or return defaults."""
    if not unified_db:
        return DEFAULT_ANTIBOT_SETTINGS.copy()

    try:
        import json
        with unified_db.get_connection() as conn:
            cursor = conn.cursor()
            cursor.execute("SELECT value FROM settings WHERE key = 'antibot_settings'")
            row = cursor.fetchone()
            if row:
                settings = json.loads(row[0])
                # Merge with defaults to ensure all keys exist
                merged = DEFAULT_ANTIBOT_SETTINGS.copy()
                merged.update(settings)
                return merged
    except Exception:
        pass

    return DEFAULT_ANTIBOT_SETTINGS.copy()


def get_user_agent(settings: dict) -> str:
    """Get the user agent string based on settings."""
    browser = settings.get('browser', 'edge')

    if browser == 'custom':
        custom_ua = settings.get('custom_user_agent', '').strip()
        if custom_ua:
            return custom_ua
        # Fall back to edge if custom is empty
        return BROWSER_USER_AGENTS['edge']

    return BROWSER_USER_AGENTS.get(browser, BROWSER_USER_AGENTS['edge'])


def format_datetime_for_db(dt: datetime = None) -> str:
    """Format datetime for database storage using space separator (not ISO T separator).

    This ensures consistent string sorting in SQLite since 'T' > ' ' would cause
    ISO format dates to sort incorrectly with space-separated dates.

    Uses UTC time for consistency with other parts of the system.
    """
    if dt is None:
        dt = datetime.utcnow()
    return dt.strftime('%Y-%m-%d %H:%M:%S')


# Platform configurations
PLATFORMS = {
    'youtube': {
        'name': 'YouTube',
        'color': 'red',
        'base_path': '/opt/immich/md/youtube',
        'url_patterns': [
            r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})',
            r'youtube\.com/shorts/([a-zA-Z0-9_-]{11})',
        ],
        'id_pattern': r'^[a-zA-Z0-9_-]{11}$'
    },
    'vimeo': {
        'name': 'Vimeo',
        'color': 'blue',
        'base_path': '/opt/immich/md/vimeo',
        'url_patterns': [
            r'vimeo\.com/(\d+)',
            r'vimeo\.com/video/(\d+)',
            r'vimeo\.com/channels/[^/]+/(\d+)',
        ],
        'id_pattern': r'^\d+$'
    },
    'dailymotion': {
        'name': 'Dailymotion',
        'color': 'cyan',
        'base_path': '/opt/immich/md/dailymotion',
        'url_patterns': [
            r'dailymotion\.com/video/([a-zA-Z0-9]+)',
            r'dai\.ly/([a-zA-Z0-9]+)',
        ],
        'id_pattern': r'^[a-zA-Z0-9]+$'
    },
    'bilibili': {
        'name': 'Bilibili',
        'color': 'pink',
        'base_path': '/opt/immich/md/bilibili',
        'url_patterns': [
            r'bilibili\.com/video/(BV[a-zA-Z0-9]+)',
            r'bilibili\.com/video/(av\d+)',
            r'b23\.tv/([a-zA-Z0-9]+)',
        ],
        'id_pattern': r'^(BV[a-zA-Z0-9]+|av\d+)$'
    }
}

# Sites that should use gallery-dl instead of yt-dlp (image/gallery focused)
GALLERY_DL_SITES = {
    'erome': {
        'name': 'Erome',
        'color': 'purple',
        'base_path': '/opt/immich/md/erome',
        'url_patterns': [r'erome\.com/a/([a-zA-Z0-9]+)', r'erome\.com/([a-zA-Z0-9_-]+)$'],
    },
    'bunkr': {
        'name': 'Bunkr',
        'color': 'blue',
        'base_path': '/opt/immich/md/bunkr',
        'url_patterns': [r'bunkr\.\w+/a/([a-zA-Z0-9]+)', r'bunkr\.\w+/v/([a-zA-Z0-9]+)'],
    },
    'cyberdrop': {
        'name': 'Cyberdrop',
        'color': 'cyan',
        'base_path': '/opt/immich/md/cyberdrop',
        'url_patterns': [r'cyberdrop\.\w+/a/([a-zA-Z0-9]+)'],
    },
    'kemono': {
        'name': 'Kemono',
        'color': 'green',
        'base_path': '/opt/immich/md/kemono',
        'url_patterns': [r'kemono\.\w+/([^/]+)/user/(\d+)'],
    },
    'coomer': {
        'name': 'Coomer',
        'color': 'pink',
        'base_path': '/opt/immich/md/coomer',
        'url_patterns': [r'coomer\.\w+/([^/]+)/user/(\d+)'],
    },
    'pixeldrain': {
        'name': 'Pixeldrain',
        'color': 'indigo',
        'base_path': '/opt/immich/md/pixeldrain',
        'url_patterns': [r'pixeldrain\.com/u/([a-zA-Z0-9]+)', r'pixeldrain\.com/l/([a-zA-Z0-9]+)'],
    },
    'gofile': {
        'name': 'GoFile',
        'color': 'yellow',
        'base_path': '/opt/immich/md/gofile',
        'url_patterns': [r'gofile\.io/d/([a-zA-Z0-9]+)'],
    },
    'imgbox': {
        'name': 'ImgBox',
        'color': 'gray',
        'base_path': '/opt/immich/md/imgbox',
        'url_patterns': [r'imgbox\.com/g/([a-zA-Z0-9]+)'],
    },
    'imagebam': {
        'name': 'ImageBam',
        'color': 'orange',
        'base_path': '/opt/immich/md/imagebam',
        'url_patterns': [r'imagebam\.com/gallery/([a-zA-Z0-9]+)'],
    },
    'fapello': {
        'name': 'Fapello',
        'color': 'red',
        'base_path': '/opt/immich/md/fapello',
        'url_patterns': [r'fapello\.com/([a-zA-Z0-9_-]+)'],
    },
    'imagefap': {
        'name': 'ImageFap',
        'color': 'green',
        'base_path': '/opt/immich/md/imagefap',
        'url_patterns': [r'imagefap\.com/pictures/(\d+)', r'imagefap\.com/gallery/(\d+)'],
    },
    'rule34': {
        'name': 'Rule34',
        'color': 'green',
        'base_path': '/opt/immich/md/rule34',
        'url_patterns': [r'rule34\.(xxx|us|paheal)'],
    },
    'e621': {
        'name': 'e621',
        'color': 'blue',
        'base_path': '/opt/immich/md/e621',
        'url_patterns': [r'e621\.net'],
    },
    'nhentai': {
        'name': 'nHentai',
        'color': 'pink',
        'base_path': '/opt/immich/md/nhentai',
        'url_patterns': [r'nhentai\.net/g/(\d+)'],
    },
    'hitomi': {
        'name': 'Hitomi',
        'color': 'pink',
        'base_path': '/opt/immich/md/hitomi',
        'url_patterns': [r'hitomi\.la'],
    },
    'gelbooru': {
        'name': 'Gelbooru',
        'color': 'blue',
        'base_path': '/opt/immich/md/gelbooru',
        'url_patterns': [r'gelbooru\.com'],
    },
    'danbooru': {
        'name': 'Danbooru',
        'color': 'blue',
        'base_path': '/opt/immich/md/danbooru',
        'url_patterns': [r'danbooru\.donmai\.us'],
    },
    'deviantart': {
        'name': 'DeviantArt',
        'color': 'green',
        'base_path': '/opt/immich/md/deviantart',
        'url_patterns': [r'deviantart\.com'],
    },
    'artstation': {
        'name': 'ArtStation',
        'color': 'blue',
        'base_path': '/opt/immich/md/artstation',
        'url_patterns': [r'artstation\.com'],
    },
    'pixiv': {
        'name': 'Pixiv',
        'color': 'blue',
        'base_path': '/opt/immich/md/pixiv',
        'url_patterns': [r'pixiv\.net'],
    },
    'furaffinity': {
        'name': 'FurAffinity',
        'color': 'orange',
        'base_path': '/opt/immich/md/furaffinity',
        'url_patterns': [r'furaffinity\.net'],
    },
    'catbox': {
        'name': 'Catbox',
        'color': 'purple',
        'base_path': '/opt/immich/md/catbox',
        'url_patterns': [r'catbox\.moe', r'files\.catbox\.moe'],
    },
}


class UniversalVideoDownloader:
    """Downloads videos from multiple platforms using yt-dlp and gallery-dl"""

    # Default base directory for all downloads
    DEFAULT_BASE_DIR = '/opt/immich/md'

    def __init__(self, platform: str = 'youtube', base_path: Path = None, unified_db=None, cookies_file: str = None):
        """
        Initialize Universal Video Downloader

        Args:
            platform: Platform name (youtube, vimeo, dailymotion, bilibili, or gallery-dl sites)
            base_path: Base path for downloads (default: from settings or platform config)
            unified_db: UnifiedDatabase instance (required)
            cookies_file: Path to cookies file for yt-dlp (optional)
        """
        self.cookies_file = cookies_file
        # Check if platform is a gallery-dl site
        self.is_gallery_dl = platform in GALLERY_DL_SITES

        if platform not in PLATFORMS and platform not in GALLERY_DL_SITES:
            raise ValueError(f"Unsupported platform: {platform}. Supported: {', '.join(list(PLATFORMS.keys()) + list(GALLERY_DL_SITES.keys()))}")

        self.platform = platform

        if self.is_gallery_dl:
            self.platform_config = GALLERY_DL_SITES[platform]
        else:
            self.platform_config = PLATFORMS[platform]

        # Set base path - check settings first, then use default
        if base_path:
            self.base_path = Path(base_path)
        else:
            # Try to get base directory from settings
            config_base_dir = self._get_configured_base_dir(unified_db)
            self.base_path = Path(config_base_dir) / platform

        self.base_path.mkdir(parents=True, exist_ok=True)

        # Load video downloader settings
        self.video_settings = self._get_video_downloader_settings(unified_db)

        # Initialize universal logger
        self.logger = get_logger('UniversalVideoDownloader')

        # Always use unified database adapter
        if not unified_db:
            raise ValueError("Universal video downloader requires unified_db")

        self.unified_db = unified_db

        # Initialize activity status manager for real-time updates
        from modules.activity_status import get_activity_manager
        self.activity_manager = get_activity_manager(unified_db)

    def _get_video_downloader_settings(self, unified_db) -> dict:
        """Get video downloader settings from database."""
        defaults = {
            'base_path': '',
            'max_concurrent': 3,
            'cache_thumbnails': True,
            'auto_generate_thumbnails': True,
            'embed_metadata': True
        }
        if not unified_db:
            return defaults

        try:
            import json
            with unified_db.get_connection() as conn:
                cursor = conn.cursor()
                cursor.execute("SELECT value FROM settings WHERE key = 'video_downloader'")
                row = cursor.fetchone()
                if row:
                    settings = json.loads(row[0])
                    defaults.update(settings)
        except Exception:
            pass

        return defaults

    def _get_configured_base_dir(self, unified_db) -> str:
        """Get base download directory from settings or use default."""
        if not unified_db:
            return self.DEFAULT_BASE_DIR

        try:
            import json
            with unified_db.get_connection() as conn:
                cursor = conn.cursor()
                # First check video_downloader.base_path
                cursor.execute("SELECT value FROM settings WHERE key = 'video_downloader'")
                row = cursor.fetchone()
                if row:
                    settings = json.loads(row[0])
                    base_path = settings.get('base_path')
                    if base_path:
                        return base_path

                # Fall back to download_settings.base_directory
                cursor.execute("SELECT value FROM settings WHERE key = 'download_settings'")
                row = cursor.fetchone()
                if row:
                    settings = json.loads(row[0])
                    base_dir = settings.get('base_directory')
                    if base_dir:
                        return base_dir
        except Exception:
            pass

        return self.DEFAULT_BASE_DIR

    def _get_ytdlp_base_cmd(self) -> list:
        """Get base yt-dlp command with cookies if configured."""
        cmd = ['/opt/media-downloader/venv/bin/yt-dlp']
        # Enable remote EJS components for YouTube n-challenge solving (deno required)
        cmd.extend(['--remote-components', 'ejs:github'])
        if self.cookies_file:
            cmd.extend(['--cookies', self.cookies_file])
        return cmd

    def _get_gallery_dl_base_cmd(self) -> list:
        """Get base gallery-dl command with cookies if configured."""
        cmd = ['/opt/media-downloader/venv/bin/gallery-dl']
        if self.cookies_file:
            cmd.extend(['--cookies', self.cookies_file])
        return cmd

    def log(self, message: str, level: str = "info", module: str = "Download"):
        """Log a message with level

        Args:
            message: The message to log
            level: Log level ('debug', 'info', 'warning', 'error', 'success')
            module: Module name for logging
        """
        level = level.lower()
        self.logger.log(f"[{self.platform_config['name']}] {message}", level.upper(), module=module)

    def detect_platform(self, url: str) -> Optional[str]:
        """Detect platform from URL

        Args:
            url: Video URL

        Returns:
            Platform name or None if not detected
        """
        # Check yt-dlp platforms first
        for platform, config in PLATFORMS.items():
            for pattern in config['url_patterns']:
                if re.search(pattern, url, re.IGNORECASE):
                    return platform

        # Check gallery-dl sites
        for platform, config in GALLERY_DL_SITES.items():
            for pattern in config['url_patterns']:
                if re.search(pattern, url, re.IGNORECASE):
                    return platform

        return None

    @staticmethod
    def detect_gallery_dl_site(url: str) -> Optional[str]:
        """Detect if URL is a gallery-dl supported site

        Args:
            url: URL to check

        Returns:
            Site name or None if not a gallery-dl site
        """
        for site, config in GALLERY_DL_SITES.items():
            for pattern in config['url_patterns']:
                if re.search(pattern, url, re.IGNORECASE):
                    return site
        return None

    def extract_video_id(self, url: str) -> Optional[str]:
        """Extract video ID from URL

        Args:
            url: Video URL

        Returns:
            Video ID or None if not found
        """
        # Try patterns for current platform
        for pattern in self.platform_config['url_patterns']:
            match = re.search(pattern, url, re.IGNORECASE)
            if match:
                return match.group(1)

        # If URL is just the video ID
        if re.match(self.platform_config['id_pattern'], url):
            return url

        return None

    def _is_already_downloaded(self, video_id: str) -> bool:
        """Check if a video has already been downloaded

        Args:
            video_id: Video ID

        Returns:
            True if already downloaded
        """
        try:
            with self.unified_db.get_connection() as conn:
                cursor = conn.cursor()
                cursor.execute('''
                    SELECT COUNT(*) as count FROM video_downloads
                    WHERE platform = ? AND video_id = ?
                ''', (self.platform, video_id))
                result = cursor.fetchone()
                return result['count'] > 0
        except Exception as e:
            self.log(f"Error checking if video already downloaded: {e}", "error", "Database")
            return False

    def _record_download(self, video_id: str, url: str, title: str,
                        file_path: str, uploader: str = None,
                        upload_date: Optional[datetime] = None,
                        duration: int = None, file_size: int = None,
                        metadata: Dict = None):
        """Record a successful download in the database

        Args:
            video_id: Video ID
            url: Original URL
            title: Video title
            file_path: Path to downloaded file
            uploader: Channel/uploader name
            upload_date: Upload date
            duration: Duration in seconds
            file_size: File size in bytes
            metadata: Additional metadata
        """
        try:
            # Prepare metadata for JSON serialization
            metadata_serializable = None
            if metadata:
                metadata_serializable = dict(metadata)
                # Convert datetime objects to ISO format strings
                if 'upload_date' in metadata_serializable and isinstance(metadata_serializable['upload_date'], datetime):
                    metadata_serializable['upload_date'] = metadata_serializable['upload_date'].isoformat()

            with self.unified_db.get_connection() as conn:
                cursor = conn.cursor()

                # Check if we have cached thumbnail from preview list
                cursor.execute('''
                    SELECT thumbnail_data FROM video_preview_list
                    WHERE platform = ? AND video_id = ?
                ''', (self.platform, video_id))
                preview_row = cursor.fetchone()
                thumbnail_data = preview_row[0] if preview_row else None

                # Also check video_download_queue (for downloads initiated from queue)
                if not thumbnail_data:
                    cursor.execute('''
                        SELECT thumbnail_data FROM video_download_queue
                        WHERE platform = ? AND video_id = ?
                    ''', (self.platform, video_id))
                    queue_row = cursor.fetchone()
                    if queue_row and queue_row[0]:
                        thumbnail_data = queue_row[0]

                # Fallback: fetch thumbnail from URL if not in cache
                if not thumbnail_data and metadata:
                    thumbnail_url = metadata.get('thumbnail')
                    if thumbnail_url:
                        thumbnail_data = self._fetch_thumbnail(thumbnail_url, video_id)

                cursor.execute('''
                    INSERT INTO video_downloads
                    (platform, video_id, url, title, uploader, upload_date, duration, file_path, file_size, metadata, download_date, thumbnail_data)
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                ''', (
                    self.platform,
                    video_id,
                    url,
                    title,
                    uploader,
                    format_datetime_for_db(upload_date) if upload_date else None,
                    duration,
                    file_path,
                    file_size,
                    json.dumps(metadata_serializable) if metadata_serializable else None,
                    format_datetime_for_db(),
                    thumbnail_data
                ))
                conn.commit()
                self.log(f"Recorded download: {title}", "success", "Database")
        except Exception as e:
            self.log(f"Error recording download: {e}", "error", "Database")

    def _fetch_thumbnail(self, thumbnail_url: str, video_id: str) -> Optional[bytes]:
        """Fetch thumbnail from URL and return binary data.

        Args:
            thumbnail_url: URL of the thumbnail
            video_id: Video ID for logging

        Returns:
            Thumbnail binary data or None on failure
        """
        import requests

        if not thumbnail_url:
            return None

        try:
            # For YouTube, try maxresdefault first (1280x720, no black bars), fallback to hqdefault
            url_to_fetch = thumbnail_url
            if 'ytimg.com' in thumbnail_url:
                # Try maxresdefault first (best quality, no letterboxing)
                for quality in ['maxresdefault', 'hqdefault']:
                    url_to_fetch = f"https://i.ytimg.com/vi/{video_id}/{quality}.jpg"
                    response = requests.get(
                        url_to_fetch,
                        headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'},
                        timeout=10
                    )
                    if response.status_code == 200 and len(response.content) > 1000:
                        self.log(f"Fetched {quality} thumbnail for {video_id}", "debug", "Database")
                        return response.content
                return None

            response = requests.get(
                url_to_fetch,
                headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'},
                timeout=10
            )

            if response.status_code == 200 and len(response.content) > 1000:
                self.log(f"Fetched thumbnail for {video_id}", "debug", "Database")
                return response.content

        except Exception as e:
            self.log(f"Failed to fetch thumbnail for {video_id}: {e}", "warning", "Database")

        return None

    def get_video_info(self, url: str) -> Optional[Dict]:
        """Get video metadata using yt-dlp without downloading

        Args:
            url: Video URL

        Returns:
            Dictionary with video info or None on error
        """
        try:
            self.log(f"Fetching video info for: {url}", "info", "Core")

            cmd = self._get_ytdlp_base_cmd() + [
                '--dump-json',
                '--no-playlist',
                url
            ]

            result = subprocess.run(
                cmd,
                capture_output=True,
                text=True,
                timeout=30
            )

            if result.returncode != 0:
                self.log(f"Failed to fetch video info: {result.stderr}", "error", "Core")
                return None

            info = json.loads(result.stdout)

            # Extract upload date
            upload_date = None
            if 'upload_date' in info and info['upload_date']:
                try:
                    upload_date = datetime.strptime(info['upload_date'], '%Y%m%d')
                except Exception as e:
                    self.log(f"Error parsing upload date: {e}", "warning", "Core")

            # Extract video ID from info
            video_id = info.get('id') or self.extract_video_id(url)

            return {
                'video_id': video_id,
                'title': info.get('title'),
                'uploader': info.get('uploader') or info.get('channel') or info.get('creator'),
                'upload_date': upload_date,
                'duration': info.get('duration'),
                'description': info.get('description'),
                'thumbnail': info.get('thumbnail'),
                'view_count': info.get('view_count'),
                'like_count': info.get('like_count'),
            }

        except subprocess.TimeoutExpired:
            self.log("Timeout fetching video info", "error", "Core")
            return None
        except Exception as e:
            self.log(f"Error fetching video info: {e}", "error", "Core")
            return None

    def get_playlist_info(self, url: str) -> Optional[Dict]:
        """Get playlist info including all video entries

        Args:
            url: Playlist URL

        Returns:
            Dictionary with playlist info and video entries or None on error
        """
        try:
            self.log(f"Fetching playlist info for: {url}", "info", "Core")

            cmd = self._get_ytdlp_base_cmd() + [
                '--dump-json',
                '--flat-playlist',  # Only fetch metadata, not full video info
                url
            ]

            result = subprocess.run(
                cmd,
                capture_output=True,
                text=True,
                timeout=60
            )

            if result.returncode != 0:
                self.log(f"Failed to fetch playlist info: {result.stderr}", "error", "Core")
                return None

            # Parse JSONL output (one JSON object per line)
            videos = []
            lines = result.stdout.strip().split('\n')

            for line in lines:
                if not line.strip():
                    continue
                try:
                    entry = json.loads(line)

                    # Skip non-video entries
                    if entry.get('_type') == 'playlist':
                        continue

                    videos.append({
                        'video_id': entry.get('id'),
                        'title': entry.get('title'),
                        'uploader': entry.get('uploader') or entry.get('channel'),
                        'upload_date': None,  # Not available in flat-playlist
                        'duration': entry.get('duration'),
                        'description': '',
                        'thumbnail': entry.get('thumbnail'),
                        'view_count': entry.get('view_count'),
                        'like_count': entry.get('like_count'),
                        'url': entry.get('url') or entry.get('webpage_url'),
                    })
                except json.JSONDecodeError:
                    continue

            if not videos:
                self.log("No videos found in playlist", "warning", "Core")
                return None

            return {
                'is_playlist': True,
                'playlist_count': len(videos),
                'playlist_videos': videos
            }

        except subprocess.TimeoutExpired:
            self.log("Timeout fetching playlist info", "error", "Core")
            return None
        except Exception as e:
            self.log(f"Error fetching playlist info: {e}", "error", "Core")
            return None

    def get_gallery_info(self, url: str) -> Optional[Dict]:
        """Get gallery/album info using gallery-dl

        Args:
            url: Gallery URL

        Returns:
            Dictionary with gallery info or None on error
        """
        try:
            self.log(f"Fetching gallery info for: {url}", "info", "Core")

            cmd = self._get_gallery_dl_base_cmd() + [
                '--dump-json',
                '--no-download',
                url
            ]

            result = subprocess.run(
                cmd,
                capture_output=True,
                text=True,
                timeout=60
            )

            if result.returncode != 0:
                self.log(f"Failed to fetch gallery info: {result.stderr}", "error", "Core")
                return None

            # Parse JSON output
            try:
                entries = json.loads(result.stdout)
            except json.JSONDecodeError:
                self.log("Failed to parse gallery-dl JSON output", "error", "Core")
                return None

            if not entries:
                self.log("No entries found in gallery", "warning", "Core")
                return None

            # gallery-dl output format:
            # - Entry with [2, {album_metadata}] = album info
            # - Entry with [3, "url", {file_metadata}] = file entries
            album_metadata = {}
            file_entries = []
            first_thumbnail = None

            for entry in entries:
                if isinstance(entry, list) and len(entry) >= 2:
                    entry_type = entry[0]
                    if entry_type == 2 and isinstance(entry[1], dict):
                        # Album metadata
                        album_metadata = entry[1]
                    elif entry_type == 3 and len(entry) >= 3:
                        # File entry: [3, url, metadata]
                        file_url = entry[1]
                        file_meta = entry[2] if isinstance(entry[2], dict) else {}
                        file_entries.append({
                            'url': file_url,
                            'extension': file_meta.get('extension', ''),
                            'filename': file_meta.get('filename', '')
                        })
                        # Get first image as thumbnail
                        if not first_thumbnail and file_meta.get('extension', '').lower() in ['jpg', 'jpeg', 'png', 'gif', 'webp']:
                            first_thumbnail = file_url

            if not file_entries and not album_metadata:
                self.log("No valid entries found in gallery", "warning", "Core")
                return None

            # Generate a unique ID for the gallery
            gallery_id = album_metadata.get('album_id') or hashlib.sha256(url.encode()).hexdigest()[:12]

            # Count media types
            video_extensions = ['mp4', 'webm', 'mov', 'avi', 'mkv', 'm4v']
            video_count = sum(1 for e in file_entries if e.get('extension', '').lower() in video_extensions)
            image_count = len(file_entries) - video_count

            # Get title from metadata
            title = (album_metadata.get('title') or
                    album_metadata.get('album') or
                    album_metadata.get('gallery') or
                    f"Gallery {gallery_id}")

            return {
                'video_id': gallery_id,
                'title': title,
                'uploader': album_metadata.get('user') or album_metadata.get('uploader') or album_metadata.get('author', ''),
                'upload_date': album_metadata.get('date'),
                'duration': 0,
                'description': album_metadata.get('description', ''),
                'thumbnail': first_thumbnail or (file_entries[0]['url'] if file_entries else ''),
                'view_count': 0,
                'like_count': 0,
                'is_gallery': True,
                'file_count': len(file_entries),
                'image_count': image_count,
                'video_count': video_count,
                'url': url,
                'tags': album_metadata.get('tags', []),
            }

        except subprocess.TimeoutExpired:
            self.log("Timeout fetching gallery info", "error", "Core")
            return None
        except Exception as e:
            self.log(f"Error fetching gallery info: {e}", "error", "Core")
            return None

    def download_gallery(self, url: str, progress_callback=None, gallery_info: Dict = None) -> Tuple[bool, Optional[str], Optional[Dict]]:
        """Download a gallery/album using gallery-dl

        Args:
            url: Gallery URL
            progress_callback: Optional callback for progress updates (message, percentage, speed, eta)
            gallery_info: Optional pre-fetched gallery info from get_gallery_info()

        Returns:
            Tuple of (success, output_directory, metadata)
        """
        try:
            # Use album ID from gallery_info if available, otherwise generate hash
            gallery_id = gallery_info.get('video_id') if gallery_info else None
            if not gallery_id:
                gallery_id = hashlib.sha256(url.encode()).hexdigest()[:12]

            self.log(f"Starting gallery download: {url}", "info", "Core")

            if progress_callback:
                progress_callback(f"Starting gallery download...", 0, None, None)

            # Get uploader for subfolder organization
            uploader = gallery_info.get('uploader', '') if gallery_info else ''
            if not uploader:
                uploader = 'unknown'
            # Sanitize channel name for filesystem
            safe_channel = re.sub(r'[<>:"/\\|?*]', '', uploader)
            safe_channel = re.sub(r'\s+', ' ', safe_channel).strip('. ')[:50] or 'unknown'

            # Create output directory under channel subfolder
            channel_dir = self.base_path / safe_channel
            output_dir = channel_dir / gallery_id
            output_dir.mkdir(parents=True, exist_ok=True)

            # Build gallery-dl command
            cmd = self._get_gallery_dl_base_cmd() + [
                '--directory', str(output_dir),
                '--filename', '{filename}.{extension}',
                '--write-metadata',
                '--write-info-json',
                url
            ]

            # Run gallery-dl with progress tracking
            process = subprocess.Popen(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                text=True,
                bufsize=1
            )

            downloaded_files = []
            total_files = 0
            current_file = 0

            for line in iter(process.stdout.readline, ''):
                line = line.strip()
                if not line:
                    continue

                self.log(line, "debug", "Download")

                # Parse progress from gallery-dl output
                if line.startswith('#'):
                    # Extract total count from "# 1/10" format
                    match = re.search(r'#\s*(\d+)/(\d+)', line)
                    if match:
                        current_file = int(match.group(1))
                        total_files = int(match.group(2))
                        percentage = int((current_file / total_files) * 100)
                        if progress_callback:
                            progress_callback(f"Downloading file {current_file}/{total_files}", percentage, None, None)

                elif 'Downloading' in line or 'Saving' in line:
                    if progress_callback:
                        progress_callback(line, 50 if total_files == 0 else int((current_file / total_files) * 100), None, None)

                # Track downloaded files
                if output_dir.exists():
                    current_files = list(output_dir.glob('*'))
                    downloaded_files = [f for f in current_files if f.is_file() and not f.name.endswith('.json')]

            process.wait()

            if process.returncode != 0:
                self.log(f"Gallery download failed with code {process.returncode}", "error", "Core")
                if progress_callback:
                    progress_callback("Download failed", 0, None, None)
                return False, None, None

            # Get final list of downloaded files
            downloaded_files = [f for f in output_dir.glob('*') if f.is_file() and not f.name.endswith('.json')]

            if not downloaded_files:
                self.log("No files were downloaded", "error", "Core")
                return False, None, None

            # Parse upload_date from gallery_info
            upload_date = None
            if gallery_info and gallery_info.get('upload_date'):
                ud = gallery_info['upload_date']
                if isinstance(ud, datetime):
                    upload_date = ud
                elif isinstance(ud, str):
                    # Try parsing common date formats
                    for fmt in ['%Y-%m-%d %H:%M:%S', '%Y-%m-%d', '%Y-%m-%dT%H:%M:%S']:
                        try:
                            upload_date = datetime.strptime(ud, fmt)
                            break
                        except ValueError:
                            continue

            # Set file timestamps to upload date (same as yt-dlp)
            if upload_date:
                timestamp = upload_date.timestamp()
                for file_path in downloaded_files:
                    os.utime(file_path, (timestamp, timestamp))
                self.log(f"Set file timestamps to {upload_date}", "info", "Core")

            # Calculate total size
            total_size = sum(f.stat().st_size for f in downloaded_files)

            # Use gallery_info if available for better metadata
            metadata = {
                'video_id': gallery_id,
                'title': gallery_info.get('title', f"Gallery {gallery_id}") if gallery_info else f"Gallery {gallery_id}",
                'uploader': gallery_info.get('uploader', '') if gallery_info else '',
                'upload_date': upload_date or datetime.now(),
                'duration': 0,
                'description': gallery_info.get('description', '') if gallery_info else '',
                'thumbnail': gallery_info.get('thumbnail', '') if gallery_info else '',
                'view_count': gallery_info.get('view_count', 0) if gallery_info else 0,
                'like_count': gallery_info.get('like_count', 0) if gallery_info else 0,
                'is_gallery': True,
                'file_count': len(downloaded_files),
                'total_size': total_size,
                'files': [str(f) for f in downloaded_files],
                'tags': gallery_info.get('tags', []) if gallery_info else [],
            }

            self.log(f"Gallery download complete: {len(downloaded_files)} files, {total_size} bytes", "success", "Core")

            if progress_callback:
                progress_callback(f"Downloaded {len(downloaded_files)} files", 100, None, None)

            # Record to video_downloads table
            self._record_download(
                video_id=gallery_id,
                url=url,
                title=metadata.get('title', f"Gallery {gallery_id}"),
                file_path=str(output_dir),
                uploader=metadata.get('uploader', ''),
                upload_date=upload_date,
                duration=0,
                file_size=total_size,
                metadata=metadata
            )

            # Also add to general downloads table for Media/Downloads page
            url_hash = hashlib.sha256(url.encode()).hexdigest()
            post_date = format_datetime_for_db(upload_date) if upload_date else format_datetime_for_db()
            with self.unified_db.get_connection() as conn:
                cursor = conn.cursor()
                cursor.execute('''
                    INSERT OR REPLACE INTO downloads
                    (url_hash, url, platform, source, post_date, download_date, status, file_path, filename)
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
                ''', (
                    url_hash,
                    url,
                    self.platform,
                    metadata.get('uploader', ''),
                    post_date,
                    format_datetime_for_db(),
                    'completed',
                    str(output_dir),
                    gallery_id
                ))
                conn.commit()

            # Add each file to file_inventory for Media page (same as yt-dlp)
            created_date = format_datetime_for_db(upload_date) if upload_date else format_datetime_for_db()
            for file_path in downloaded_files:
                file_stat = file_path.stat()
                ext = file_path.suffix.lower()
                content_type = 'video' if ext in ['.mp4', '.webm', '.mov', '.avi', '.mkv'] else 'image'

                # Prepare metadata for JSON serialization
                file_metadata = {
                    'gallery_id': gallery_id,
                    'title': metadata.get('title', ''),
                    'uploader': metadata.get('uploader', ''),
                    'tags': metadata.get('tags', []),
                    'url': url,
                }

                self.unified_db.upsert_file_inventory(
                    file_path=str(file_path),
                    filename=file_path.name,
                    platform=self.platform,
                    source=metadata.get('uploader', ''),
                    content_type=content_type,
                    file_size=file_stat.st_size,
                    location='final',
                    metadata=file_metadata,
                    created_date=created_date
                )
            self.log(f"Added {len(downloaded_files)} files to file_inventory", "info", "Database")

            return True, str(output_dir), metadata

        except Exception as e:
            self.log(f"Error downloading gallery: {e}", "error", "Core")
            if progress_callback:
                progress_callback(f"Error: {str(e)}", 0, None, None)
            return False, None, None

    def download_video(self, url: str, progress_callback=None, update_activity: bool = True) -> Tuple[bool, Optional[str], Optional[Dict]]:
        """Download a video with metadata extraction

        Args:
            url: Video URL
            progress_callback: Optional callback for progress updates (message, percentage)
            update_activity: Whether to update the activity_status table (set False for queue downloads)

        Returns:
            Tuple of (success, file_path, metadata)
        """
        try:
            # Extract video ID
            video_id = self.extract_video_id(url)
            if not video_id:
                self.log(f"Invalid {self.platform_config['name']} URL: {url}", "error", "Core")
                return False, None, None

            # Check if already downloaded
            if self._is_already_downloaded(video_id):
                self.log(f"Video {video_id} already downloaded, skipping", "info", "Core")
                return False, None, {'error': 'Already downloaded'}

            # Update activity status (only for scheduler-driven downloads, not queue)
            activity_key = f'{self.platform}_downloader'
            if update_activity:
                self.activity_manager.update_status(f'Downloading: {url}')

            if progress_callback:
                progress_callback("Fetching video metadata...", 5)

            # Get video info first
            info = self.get_video_info(url)
            if not info:
                if update_activity:
                    self.activity_manager.update_status('Idle')
                return False, None, {'error': 'Failed to fetch video info'}

            self.log(f"Downloading: {info['title']}", "info", "Core")

            if progress_callback:
                progress_callback(f"Downloading: {info['title']}", 10)

            # Generate output filename with date prefix
            upload_date = info.get('upload_date')
            if upload_date:
                date_prefix = upload_date.strftime('%Y%m%d')
            else:
                date_prefix = datetime.now().strftime('%Y%m%d')

            # Sanitize title for filename
            safe_title = re.sub(r'[<>:"/\\|?*]', '_', info['title'][:100])

            # Get channel/uploader for subfolder organization
            uploader = info.get('uploader') or info.get('channel') or info.get('creator') or 'unknown'
            # Sanitize channel name for filesystem
            safe_channel = re.sub(r'[<>:"/\\|?*]', '', uploader)
            safe_channel = re.sub(r'\s+', ' ', safe_channel).strip('. ')[:50] or 'unknown'

            # Create channel subfolder
            channel_dir = self.base_path / safe_channel
            channel_dir.mkdir(parents=True, exist_ok=True)

            output_template = str(channel_dir / f"{date_prefix}_{safe_title}_{video_id}.%(ext)s")

            # Get anti-bot settings
            antibot = get_antibot_settings(self.unified_db)

            # Build base command
            cmd = self._get_ytdlp_base_cmd() + [
                '--no-playlist',
                '--format', 'bestvideo+bestaudio/best',
                '--merge-output-format', 'mp4',
                '--output', output_template,
            ]

            # Add metadata embedding based on settings
            if self.video_settings.get('embed_metadata', True):
                cmd.append('--add-metadata')

            # Add thumbnail embedding based on settings
            if self.video_settings.get('cache_thumbnails', True):
                cmd.append('--embed-thumbnail')

            # Add anti-bot measures if enabled
            if antibot.get('enabled', True):
                # User agent
                user_agent = get_user_agent(antibot)
                cmd.extend(['--user-agent', user_agent])

                # Rate limiting
                if antibot.get('limit_rate'):
                    cmd.extend(['--limit-rate', antibot['limit_rate']])

                # Throttle detection
                if antibot.get('throttled_rate'):
                    cmd.extend(['--throttled-rate', antibot['throttled_rate']])

                # Sleep between requests
                sleep_min = antibot.get('sleep_requests_min', 1)
                sleep_max = antibot.get('sleep_requests_max', 3)
                cmd.extend(['--sleep-requests', str(sleep_min)])
                # Use sleep-interval for delays between downloads (with max variant)
                if sleep_max > sleep_min:
                    cmd.extend(['--sleep-interval', str(sleep_min), '--max-sleep-interval', str(sleep_max)])

                # Concurrent fragments
                cmd.extend(['--concurrent-fragments', str(antibot.get('concurrent_fragments', 1))])

                # Retries
                cmd.extend(['--retries', str(antibot.get('retries', 10))])
                cmd.extend(['--fragment-retries', str(antibot.get('fragment_retries', 10))])

                # Socket timeout
                cmd.extend(['--socket-timeout', str(antibot.get('socket_timeout', 30))])

                # Don't abort on errors
                cmd.append('--no-abort-on-error')

            # Add URL last
            cmd.append(url)

            if progress_callback:
                progress_callback("Downloading video...", 20)

            # Run download with progress tracking
            process = subprocess.Popen(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                text=True
            )

            # Collect output for error detection
            output_lines = []

            # Parse yt-dlp output for progress
            for line in process.stdout:
                output_lines.append(line)

                # Look for [download] XX.X% lines
                # Format: [download]  45.2% of 123.45MiB at  2.5MiB/s ETA 00:32
                if '[download]' in line and '%' in line:
                    try:
                        percent_match = re.search(r'(\d+\.?\d*)%', line)
                        speed_match = re.search(r'at\s+([\d.]+\s*\w+/s)', line)
                        eta_match = re.search(r'ETA\s+([\d:]+)', line)

                        if percent_match:
                            percent = float(percent_match.group(1))
                            # Scale to 20-90% range
                            scaled_percent = 20 + (percent * 0.7)

                            speed = speed_match.group(1) if speed_match else None
                            eta = eta_match.group(1) if eta_match else None

                            if progress_callback:
                                # Build message with speed/ETA if available
                                msg = f"Downloading: {percent:.1f}%"
                                if speed:
                                    msg += f" • {speed}"
                                if eta:
                                    msg += f" • ETA {eta}"
                                progress_callback(msg, int(scaled_percent), speed, eta)
                    except (ValueError, KeyError, TypeError):
                        pass

            process.wait()

            # Check for cookie/auth errors in output
            full_output = ''.join(output_lines)
            if process.returncode != 0 and is_cookie_error(full_output):
                self.log("Download failed: Cookie/authentication error detected", "error", "Core")
                if update_activity:
                    self.activity_manager.update_status('Idle')
                return False, None, {'error': 'Cookie expired', 'cookie_error': True}

            if process.returncode != 0:
                self.log("Download failed", "error", "Core")
                if update_activity:
                    self.activity_manager.update_status('Idle')
                return False, None, {'error': 'Download failed'}

            if progress_callback:
                progress_callback("Processing metadata...", 95)

            # Find the downloaded file
            # Escape glob special characters (brackets, etc.) in the pattern
            import glob as glob_module
            escaped_prefix = glob_module.escape(f"{date_prefix}_{safe_title}_{video_id}")
            expected_pattern = f"{escaped_prefix}.*"
            downloaded_files = list(channel_dir.glob(expected_pattern))

            if not downloaded_files:
                self.log("Downloaded file not found", "error", "Core")
                if update_activity:
                    self.activity_manager.update_status('Idle')
                return False, None, {'error': 'File not found after download'}

            file_path = downloaded_files[0]

            # Set file timestamp to upload date
            if upload_date:
                timestamp = upload_date.timestamp()
                os.utime(file_path, (timestamp, timestamp))
                self.log(f"Set file timestamp to {upload_date}", "info", "Core")

            # Get file size
            file_size = file_path.stat().st_size

            # Get video dimensions using yt-dlp metadata
            width = info.get('width')
            height = info.get('height')

            # Record download in video_downloads table
            self._record_download(
                video_id=video_id,
                url=url,
                title=info['title'],
                file_path=str(file_path),
                uploader=info.get('uploader'),
                upload_date=upload_date,
                duration=info.get('duration'),
                file_size=file_size,
                metadata=info
            )

            # Also add to general downloads table for Media/Downloads page queries
            # post_date = upload date, download_date = today
            url_hash = hashlib.sha256(url.encode()).hexdigest()
            with self.unified_db.get_connection() as conn:
                cursor = conn.cursor()
                cursor.execute('''
                    INSERT OR IGNORE INTO downloads
                    (url_hash, url, platform, source, post_date, download_date, status, file_path, filename)
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
                ''', (
                    url_hash,
                    url,
                    self.platform,
                    info.get('uploader'),
                    format_datetime_for_db(upload_date) if upload_date else None,
                    format_datetime_for_db(),
                    'completed',
                    str(file_path),
                    file_path.name
                ))
                conn.commit()
            self.log(f"Added to downloads table: {file_path.name}", "info", "Database")

            # Add to file inventory for media gallery
            download_time = format_datetime_for_db()

            # Prepare metadata for JSON serialization (convert datetime to string)
            metadata_serializable = dict(info)
            if 'upload_date' in metadata_serializable and metadata_serializable['upload_date']:
                metadata_serializable['upload_date'] = format_datetime_for_db(metadata_serializable['upload_date']) if isinstance(metadata_serializable['upload_date'], datetime) else metadata_serializable['upload_date']

            self.unified_db.upsert_file_inventory(
                file_path=str(file_path),
                filename=file_path.name,
                platform=self.platform,
                source=info.get('uploader'),
                content_type='video',
                file_size=file_size,
                width=width,
                height=height,
                location='final',
                metadata=metadata_serializable,
                created_date=download_time,
                video_id=info.get('id')  # For YouTube thumbnail lookup
            )
            self.log(f"Added to file inventory: {file_path.name}", "info", "Database")

            if progress_callback:
                progress_callback("Download complete!", 100)

            self.log(f"Successfully downloaded: {file_path.name}", "success", "Core")
            if update_activity:
                self.activity_manager.update_status('Idle')

            return True, str(file_path), info

        except Exception as e:
            self.log(f"Error downloading video: {e}", "error", "Core")
            if update_activity:
                self.activity_manager.update_status('Idle')
            return False, None, {'error': str(e)}


def main():
    """Test function"""
    from modules.unified_database import UnifiedDatabase

    db = UnifiedDatabase()

    print("Available platforms:")
    for key, config in PLATFORMS.items():
        print(f"  {key}: {config['name']}")

    platform = input("\nSelect platform: ").lower()
    if platform not in PLATFORMS:
        print(f"Invalid platform. Choose from: {', '.join(PLATFORMS.keys())}")
        return

    downloader = UniversalVideoDownloader(platform=platform, unified_db=db)

    # Test URL
    test_url = input(f"Enter {PLATFORMS[platform]['name']} URL: ")

    def progress(msg, pct):
        print(f"[{pct}%] {msg}")

    success, file_path, metadata = downloader.download_video(test_url, progress)

    if success:
        print(f"\nSuccess! Downloaded to: {file_path}")
    else:
        print(f"\nFailed: {metadata.get('error', 'Unknown error')}")


if __name__ == '__main__':
    main()