1434 lines
54 KiB
Python
1434 lines
54 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Universal Video Downloader Module - Downloads videos from YouTube, Vimeo, Dailymotion, Bilibili, and more
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import json
|
|
import subprocess
|
|
import hashlib
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import Dict, List, Optional, Tuple
|
|
from modules.universal_logger import get_logger
|
|
|
|
logger = get_logger('UniversalVideoDownloader')
|
|
|
|
# Cookie/auth error patterns that indicate expired or invalid cookies
|
|
COOKIE_ERROR_PATTERNS = [
|
|
r'sign in to confirm',
|
|
r'login required',
|
|
r'cookies.*expired',
|
|
r'please sign in',
|
|
r'authentication required',
|
|
r'private video',
|
|
r'video is unavailable.*sign in',
|
|
r'age-restricted.*sign in',
|
|
r'members-only content',
|
|
r'this video is available to this channel',
|
|
r'confirm your age',
|
|
]
|
|
|
|
# Browser User-Agent strings (updated Dec 2024)
|
|
BROWSER_USER_AGENTS = {
|
|
'edge': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
|
|
'chrome': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
'firefox': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
|
|
'safari': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15',
|
|
}
|
|
|
|
# Default anti-bot settings
|
|
DEFAULT_ANTIBOT_SETTINGS = {
|
|
'browser': 'edge',
|
|
'custom_user_agent': '',
|
|
'limit_rate': '2M',
|
|
'throttled_rate': '100K',
|
|
'sleep_requests_min': 1,
|
|
'sleep_requests_max': 3,
|
|
'retries': 10,
|
|
'fragment_retries': 10,
|
|
'concurrent_fragments': 1,
|
|
'socket_timeout': 30,
|
|
'enabled': True,
|
|
}
|
|
|
|
|
|
def is_cookie_error(output: str) -> bool:
|
|
"""Check if output contains cookie/auth error patterns."""
|
|
if not output:
|
|
return False
|
|
output_lower = output.lower()
|
|
for pattern in COOKIE_ERROR_PATTERNS:
|
|
if re.search(pattern, output_lower):
|
|
return True
|
|
return False
|
|
|
|
|
|
def get_antibot_settings(unified_db) -> dict:
|
|
"""Get anti-bot settings from database or return defaults."""
|
|
if not unified_db:
|
|
return DEFAULT_ANTIBOT_SETTINGS.copy()
|
|
|
|
try:
|
|
import json
|
|
with unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT value FROM settings WHERE key = 'antibot_settings'")
|
|
row = cursor.fetchone()
|
|
if row:
|
|
settings = json.loads(row[0])
|
|
# Merge with defaults to ensure all keys exist
|
|
merged = DEFAULT_ANTIBOT_SETTINGS.copy()
|
|
merged.update(settings)
|
|
return merged
|
|
except Exception:
|
|
pass
|
|
|
|
return DEFAULT_ANTIBOT_SETTINGS.copy()
|
|
|
|
|
|
def get_user_agent(settings: dict) -> str:
|
|
"""Get the user agent string based on settings."""
|
|
browser = settings.get('browser', 'edge')
|
|
|
|
if browser == 'custom':
|
|
custom_ua = settings.get('custom_user_agent', '').strip()
|
|
if custom_ua:
|
|
return custom_ua
|
|
# Fall back to edge if custom is empty
|
|
return BROWSER_USER_AGENTS['edge']
|
|
|
|
return BROWSER_USER_AGENTS.get(browser, BROWSER_USER_AGENTS['edge'])
|
|
|
|
|
|
def format_datetime_for_db(dt: datetime = None) -> str:
|
|
"""Format datetime for database storage using space separator (not ISO T separator).
|
|
|
|
This ensures consistent string sorting in SQLite since 'T' > ' ' would cause
|
|
ISO format dates to sort incorrectly with space-separated dates.
|
|
|
|
Uses UTC time for consistency with other parts of the system.
|
|
"""
|
|
if dt is None:
|
|
dt = datetime.utcnow()
|
|
return dt.strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
|
|
# Platform configurations
|
|
PLATFORMS = {
|
|
'youtube': {
|
|
'name': 'YouTube',
|
|
'color': 'red',
|
|
'base_path': '/opt/immich/md/youtube',
|
|
'url_patterns': [
|
|
r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})',
|
|
r'youtube\.com/shorts/([a-zA-Z0-9_-]{11})',
|
|
],
|
|
'id_pattern': r'^[a-zA-Z0-9_-]{11}$'
|
|
},
|
|
'vimeo': {
|
|
'name': 'Vimeo',
|
|
'color': 'blue',
|
|
'base_path': '/opt/immich/md/vimeo',
|
|
'url_patterns': [
|
|
r'vimeo\.com/(\d+)',
|
|
r'vimeo\.com/video/(\d+)',
|
|
r'vimeo\.com/channels/[^/]+/(\d+)',
|
|
],
|
|
'id_pattern': r'^\d+$'
|
|
},
|
|
'dailymotion': {
|
|
'name': 'Dailymotion',
|
|
'color': 'cyan',
|
|
'base_path': '/opt/immich/md/dailymotion',
|
|
'url_patterns': [
|
|
r'dailymotion\.com/video/([a-zA-Z0-9]+)',
|
|
r'dai\.ly/([a-zA-Z0-9]+)',
|
|
],
|
|
'id_pattern': r'^[a-zA-Z0-9]+$'
|
|
},
|
|
'bilibili': {
|
|
'name': 'Bilibili',
|
|
'color': 'pink',
|
|
'base_path': '/opt/immich/md/bilibili',
|
|
'url_patterns': [
|
|
r'bilibili\.com/video/(BV[a-zA-Z0-9]+)',
|
|
r'bilibili\.com/video/(av\d+)',
|
|
r'b23\.tv/([a-zA-Z0-9]+)',
|
|
],
|
|
'id_pattern': r'^(BV[a-zA-Z0-9]+|av\d+)$'
|
|
}
|
|
}
|
|
|
|
# Sites that should use gallery-dl instead of yt-dlp (image/gallery focused)
|
|
GALLERY_DL_SITES = {
|
|
'erome': {
|
|
'name': 'Erome',
|
|
'color': 'purple',
|
|
'base_path': '/opt/immich/md/erome',
|
|
'url_patterns': [r'erome\.com/a/([a-zA-Z0-9]+)', r'erome\.com/([a-zA-Z0-9_-]+)$'],
|
|
},
|
|
'bunkr': {
|
|
'name': 'Bunkr',
|
|
'color': 'blue',
|
|
'base_path': '/opt/immich/md/bunkr',
|
|
'url_patterns': [r'bunkr\.\w+/a/([a-zA-Z0-9]+)', r'bunkr\.\w+/v/([a-zA-Z0-9]+)'],
|
|
},
|
|
'cyberdrop': {
|
|
'name': 'Cyberdrop',
|
|
'color': 'cyan',
|
|
'base_path': '/opt/immich/md/cyberdrop',
|
|
'url_patterns': [r'cyberdrop\.\w+/a/([a-zA-Z0-9]+)'],
|
|
},
|
|
'kemono': {
|
|
'name': 'Kemono',
|
|
'color': 'green',
|
|
'base_path': '/opt/immich/md/kemono',
|
|
'url_patterns': [r'kemono\.\w+/([^/]+)/user/(\d+)'],
|
|
},
|
|
'coomer': {
|
|
'name': 'Coomer',
|
|
'color': 'pink',
|
|
'base_path': '/opt/immich/md/coomer',
|
|
'url_patterns': [r'coomer\.\w+/([^/]+)/user/(\d+)'],
|
|
},
|
|
'pixeldrain': {
|
|
'name': 'Pixeldrain',
|
|
'color': 'indigo',
|
|
'base_path': '/opt/immich/md/pixeldrain',
|
|
'url_patterns': [r'pixeldrain\.com/u/([a-zA-Z0-9]+)', r'pixeldrain\.com/l/([a-zA-Z0-9]+)'],
|
|
},
|
|
'gofile': {
|
|
'name': 'GoFile',
|
|
'color': 'yellow',
|
|
'base_path': '/opt/immich/md/gofile',
|
|
'url_patterns': [r'gofile\.io/d/([a-zA-Z0-9]+)'],
|
|
},
|
|
'imgbox': {
|
|
'name': 'ImgBox',
|
|
'color': 'gray',
|
|
'base_path': '/opt/immich/md/imgbox',
|
|
'url_patterns': [r'imgbox\.com/g/([a-zA-Z0-9]+)'],
|
|
},
|
|
'imagebam': {
|
|
'name': 'ImageBam',
|
|
'color': 'orange',
|
|
'base_path': '/opt/immich/md/imagebam',
|
|
'url_patterns': [r'imagebam\.com/gallery/([a-zA-Z0-9]+)'],
|
|
},
|
|
'fapello': {
|
|
'name': 'Fapello',
|
|
'color': 'red',
|
|
'base_path': '/opt/immich/md/fapello',
|
|
'url_patterns': [r'fapello\.com/([a-zA-Z0-9_-]+)'],
|
|
},
|
|
'imagefap': {
|
|
'name': 'ImageFap',
|
|
'color': 'green',
|
|
'base_path': '/opt/immich/md/imagefap',
|
|
'url_patterns': [r'imagefap\.com/pictures/(\d+)', r'imagefap\.com/gallery/(\d+)'],
|
|
},
|
|
'rule34': {
|
|
'name': 'Rule34',
|
|
'color': 'green',
|
|
'base_path': '/opt/immich/md/rule34',
|
|
'url_patterns': [r'rule34\.(xxx|us|paheal)'],
|
|
},
|
|
'e621': {
|
|
'name': 'e621',
|
|
'color': 'blue',
|
|
'base_path': '/opt/immich/md/e621',
|
|
'url_patterns': [r'e621\.net'],
|
|
},
|
|
'nhentai': {
|
|
'name': 'nHentai',
|
|
'color': 'pink',
|
|
'base_path': '/opt/immich/md/nhentai',
|
|
'url_patterns': [r'nhentai\.net/g/(\d+)'],
|
|
},
|
|
'hitomi': {
|
|
'name': 'Hitomi',
|
|
'color': 'pink',
|
|
'base_path': '/opt/immich/md/hitomi',
|
|
'url_patterns': [r'hitomi\.la'],
|
|
},
|
|
'gelbooru': {
|
|
'name': 'Gelbooru',
|
|
'color': 'blue',
|
|
'base_path': '/opt/immich/md/gelbooru',
|
|
'url_patterns': [r'gelbooru\.com'],
|
|
},
|
|
'danbooru': {
|
|
'name': 'Danbooru',
|
|
'color': 'blue',
|
|
'base_path': '/opt/immich/md/danbooru',
|
|
'url_patterns': [r'danbooru\.donmai\.us'],
|
|
},
|
|
'deviantart': {
|
|
'name': 'DeviantArt',
|
|
'color': 'green',
|
|
'base_path': '/opt/immich/md/deviantart',
|
|
'url_patterns': [r'deviantart\.com'],
|
|
},
|
|
'artstation': {
|
|
'name': 'ArtStation',
|
|
'color': 'blue',
|
|
'base_path': '/opt/immich/md/artstation',
|
|
'url_patterns': [r'artstation\.com'],
|
|
},
|
|
'pixiv': {
|
|
'name': 'Pixiv',
|
|
'color': 'blue',
|
|
'base_path': '/opt/immich/md/pixiv',
|
|
'url_patterns': [r'pixiv\.net'],
|
|
},
|
|
'furaffinity': {
|
|
'name': 'FurAffinity',
|
|
'color': 'orange',
|
|
'base_path': '/opt/immich/md/furaffinity',
|
|
'url_patterns': [r'furaffinity\.net'],
|
|
},
|
|
'catbox': {
|
|
'name': 'Catbox',
|
|
'color': 'purple',
|
|
'base_path': '/opt/immich/md/catbox',
|
|
'url_patterns': [r'catbox\.moe', r'files\.catbox\.moe'],
|
|
},
|
|
}
|
|
|
|
|
|
class UniversalVideoDownloader:
|
|
"""Downloads videos from multiple platforms using yt-dlp and gallery-dl"""
|
|
|
|
# Default base directory for all downloads
|
|
DEFAULT_BASE_DIR = '/opt/immich/md'
|
|
|
|
def __init__(self, platform: str = 'youtube', base_path: Path = None, unified_db=None, cookies_file: str = None):
|
|
"""
|
|
Initialize Universal Video Downloader
|
|
|
|
Args:
|
|
platform: Platform name (youtube, vimeo, dailymotion, bilibili, or gallery-dl sites)
|
|
base_path: Base path for downloads (default: from settings or platform config)
|
|
unified_db: UnifiedDatabase instance (required)
|
|
cookies_file: Path to cookies file for yt-dlp (optional)
|
|
"""
|
|
self.cookies_file = cookies_file
|
|
# Check if platform is a gallery-dl site
|
|
self.is_gallery_dl = platform in GALLERY_DL_SITES
|
|
|
|
if platform not in PLATFORMS and platform not in GALLERY_DL_SITES:
|
|
raise ValueError(f"Unsupported platform: {platform}. Supported: {', '.join(list(PLATFORMS.keys()) + list(GALLERY_DL_SITES.keys()))}")
|
|
|
|
self.platform = platform
|
|
|
|
if self.is_gallery_dl:
|
|
self.platform_config = GALLERY_DL_SITES[platform]
|
|
else:
|
|
self.platform_config = PLATFORMS[platform]
|
|
|
|
# Set base path - check settings first, then use default
|
|
if base_path:
|
|
self.base_path = Path(base_path)
|
|
else:
|
|
# Try to get base directory from settings
|
|
config_base_dir = self._get_configured_base_dir(unified_db)
|
|
self.base_path = Path(config_base_dir) / platform
|
|
|
|
self.base_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Load video downloader settings
|
|
self.video_settings = self._get_video_downloader_settings(unified_db)
|
|
|
|
# Initialize universal logger
|
|
self.logger = get_logger('UniversalVideoDownloader')
|
|
|
|
# Always use unified database adapter
|
|
if not unified_db:
|
|
raise ValueError("Universal video downloader requires unified_db")
|
|
|
|
self.unified_db = unified_db
|
|
|
|
# Initialize activity status manager for real-time updates
|
|
from modules.activity_status import get_activity_manager
|
|
self.activity_manager = get_activity_manager(unified_db)
|
|
|
|
def _get_video_downloader_settings(self, unified_db) -> dict:
|
|
"""Get video downloader settings from database."""
|
|
defaults = {
|
|
'base_path': '',
|
|
'max_concurrent': 3,
|
|
'cache_thumbnails': True,
|
|
'auto_generate_thumbnails': True,
|
|
'embed_metadata': True
|
|
}
|
|
if not unified_db:
|
|
return defaults
|
|
|
|
try:
|
|
import json
|
|
with unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT value FROM settings WHERE key = 'video_downloader'")
|
|
row = cursor.fetchone()
|
|
if row:
|
|
settings = json.loads(row[0])
|
|
defaults.update(settings)
|
|
except Exception:
|
|
pass
|
|
|
|
return defaults
|
|
|
|
def _get_configured_base_dir(self, unified_db) -> str:
|
|
"""Get base download directory from settings or use default."""
|
|
if not unified_db:
|
|
return self.DEFAULT_BASE_DIR
|
|
|
|
try:
|
|
import json
|
|
with unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
# First check video_downloader.base_path
|
|
cursor.execute("SELECT value FROM settings WHERE key = 'video_downloader'")
|
|
row = cursor.fetchone()
|
|
if row:
|
|
settings = json.loads(row[0])
|
|
base_path = settings.get('base_path')
|
|
if base_path:
|
|
return base_path
|
|
|
|
# Fall back to download_settings.base_directory
|
|
cursor.execute("SELECT value FROM settings WHERE key = 'download_settings'")
|
|
row = cursor.fetchone()
|
|
if row:
|
|
settings = json.loads(row[0])
|
|
base_dir = settings.get('base_directory')
|
|
if base_dir:
|
|
return base_dir
|
|
except Exception:
|
|
pass
|
|
|
|
return self.DEFAULT_BASE_DIR
|
|
|
|
def _get_ytdlp_base_cmd(self) -> list:
|
|
"""Get base yt-dlp command with cookies if configured."""
|
|
cmd = ['/opt/media-downloader/venv/bin/yt-dlp']
|
|
# Enable remote EJS components for YouTube n-challenge solving (deno required)
|
|
cmd.extend(['--remote-components', 'ejs:github'])
|
|
if self.cookies_file:
|
|
cmd.extend(['--cookies', self.cookies_file])
|
|
return cmd
|
|
|
|
def _get_gallery_dl_base_cmd(self) -> list:
|
|
"""Get base gallery-dl command with cookies if configured."""
|
|
cmd = ['/opt/media-downloader/venv/bin/gallery-dl']
|
|
if self.cookies_file:
|
|
cmd.extend(['--cookies', self.cookies_file])
|
|
return cmd
|
|
|
|
def log(self, message: str, level: str = "info", module: str = "Download"):
|
|
"""Log a message with level
|
|
|
|
Args:
|
|
message: The message to log
|
|
level: Log level ('debug', 'info', 'warning', 'error', 'success')
|
|
module: Module name for logging
|
|
"""
|
|
level = level.lower()
|
|
self.logger.log(f"[{self.platform_config['name']}] {message}", level.upper(), module=module)
|
|
|
|
def detect_platform(self, url: str) -> Optional[str]:
|
|
"""Detect platform from URL
|
|
|
|
Args:
|
|
url: Video URL
|
|
|
|
Returns:
|
|
Platform name or None if not detected
|
|
"""
|
|
# Check yt-dlp platforms first
|
|
for platform, config in PLATFORMS.items():
|
|
for pattern in config['url_patterns']:
|
|
if re.search(pattern, url, re.IGNORECASE):
|
|
return platform
|
|
|
|
# Check gallery-dl sites
|
|
for platform, config in GALLERY_DL_SITES.items():
|
|
for pattern in config['url_patterns']:
|
|
if re.search(pattern, url, re.IGNORECASE):
|
|
return platform
|
|
|
|
return None
|
|
|
|
@staticmethod
|
|
def detect_gallery_dl_site(url: str) -> Optional[str]:
|
|
"""Detect if URL is a gallery-dl supported site
|
|
|
|
Args:
|
|
url: URL to check
|
|
|
|
Returns:
|
|
Site name or None if not a gallery-dl site
|
|
"""
|
|
for site, config in GALLERY_DL_SITES.items():
|
|
for pattern in config['url_patterns']:
|
|
if re.search(pattern, url, re.IGNORECASE):
|
|
return site
|
|
return None
|
|
|
|
def extract_video_id(self, url: str) -> Optional[str]:
|
|
"""Extract video ID from URL
|
|
|
|
Args:
|
|
url: Video URL
|
|
|
|
Returns:
|
|
Video ID or None if not found
|
|
"""
|
|
# Try patterns for current platform
|
|
for pattern in self.platform_config['url_patterns']:
|
|
match = re.search(pattern, url, re.IGNORECASE)
|
|
if match:
|
|
return match.group(1)
|
|
|
|
# If URL is just the video ID
|
|
if re.match(self.platform_config['id_pattern'], url):
|
|
return url
|
|
|
|
return None
|
|
|
|
def _is_already_downloaded(self, video_id: str) -> bool:
|
|
"""Check if a video has already been downloaded
|
|
|
|
Args:
|
|
video_id: Video ID
|
|
|
|
Returns:
|
|
True if already downloaded
|
|
"""
|
|
try:
|
|
with self.unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT COUNT(*) as count FROM video_downloads
|
|
WHERE platform = ? AND video_id = ?
|
|
''', (self.platform, video_id))
|
|
result = cursor.fetchone()
|
|
return result['count'] > 0
|
|
except Exception as e:
|
|
self.log(f"Error checking if video already downloaded: {e}", "error", "Database")
|
|
return False
|
|
|
|
def _record_download(self, video_id: str, url: str, title: str,
|
|
file_path: str, uploader: str = None,
|
|
upload_date: Optional[datetime] = None,
|
|
duration: int = None, file_size: int = None,
|
|
metadata: Dict = None):
|
|
"""Record a successful download in the database
|
|
|
|
Args:
|
|
video_id: Video ID
|
|
url: Original URL
|
|
title: Video title
|
|
file_path: Path to downloaded file
|
|
uploader: Channel/uploader name
|
|
upload_date: Upload date
|
|
duration: Duration in seconds
|
|
file_size: File size in bytes
|
|
metadata: Additional metadata
|
|
"""
|
|
try:
|
|
# Prepare metadata for JSON serialization
|
|
metadata_serializable = None
|
|
if metadata:
|
|
metadata_serializable = dict(metadata)
|
|
# Convert datetime objects to ISO format strings
|
|
if 'upload_date' in metadata_serializable and isinstance(metadata_serializable['upload_date'], datetime):
|
|
metadata_serializable['upload_date'] = metadata_serializable['upload_date'].isoformat()
|
|
|
|
with self.unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Check if we have cached thumbnail from preview list
|
|
cursor.execute('''
|
|
SELECT thumbnail_data FROM video_preview_list
|
|
WHERE platform = ? AND video_id = ?
|
|
''', (self.platform, video_id))
|
|
preview_row = cursor.fetchone()
|
|
thumbnail_data = preview_row[0] if preview_row else None
|
|
|
|
# Also check video_download_queue (for downloads initiated from queue)
|
|
if not thumbnail_data:
|
|
cursor.execute('''
|
|
SELECT thumbnail_data FROM video_download_queue
|
|
WHERE platform = ? AND video_id = ?
|
|
''', (self.platform, video_id))
|
|
queue_row = cursor.fetchone()
|
|
if queue_row and queue_row[0]:
|
|
thumbnail_data = queue_row[0]
|
|
|
|
# Fallback: fetch thumbnail from URL if not in cache
|
|
if not thumbnail_data and metadata:
|
|
thumbnail_url = metadata.get('thumbnail')
|
|
if thumbnail_url:
|
|
thumbnail_data = self._fetch_thumbnail(thumbnail_url, video_id)
|
|
|
|
cursor.execute('''
|
|
INSERT INTO video_downloads
|
|
(platform, video_id, url, title, uploader, upload_date, duration, file_path, file_size, metadata, download_date, thumbnail_data)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
''', (
|
|
self.platform,
|
|
video_id,
|
|
url,
|
|
title,
|
|
uploader,
|
|
format_datetime_for_db(upload_date) if upload_date else None,
|
|
duration,
|
|
file_path,
|
|
file_size,
|
|
json.dumps(metadata_serializable) if metadata_serializable else None,
|
|
format_datetime_for_db(),
|
|
thumbnail_data
|
|
))
|
|
conn.commit()
|
|
self.log(f"Recorded download: {title}", "success", "Database")
|
|
except Exception as e:
|
|
self.log(f"Error recording download: {e}", "error", "Database")
|
|
|
|
def _fetch_thumbnail(self, thumbnail_url: str, video_id: str) -> Optional[bytes]:
|
|
"""Fetch thumbnail from URL and return binary data.
|
|
|
|
Args:
|
|
thumbnail_url: URL of the thumbnail
|
|
video_id: Video ID for logging
|
|
|
|
Returns:
|
|
Thumbnail binary data or None on failure
|
|
"""
|
|
import requests
|
|
|
|
if not thumbnail_url:
|
|
return None
|
|
|
|
try:
|
|
# For YouTube, try maxresdefault first (1280x720, no black bars), fallback to hqdefault
|
|
url_to_fetch = thumbnail_url
|
|
if 'ytimg.com' in thumbnail_url:
|
|
# Try maxresdefault first (best quality, no letterboxing)
|
|
for quality in ['maxresdefault', 'hqdefault']:
|
|
url_to_fetch = f"https://i.ytimg.com/vi/{video_id}/{quality}.jpg"
|
|
response = requests.get(
|
|
url_to_fetch,
|
|
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'},
|
|
timeout=10
|
|
)
|
|
if response.status_code == 200 and len(response.content) > 1000:
|
|
self.log(f"Fetched {quality} thumbnail for {video_id}", "debug", "Database")
|
|
return response.content
|
|
return None
|
|
|
|
response = requests.get(
|
|
url_to_fetch,
|
|
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'},
|
|
timeout=10
|
|
)
|
|
|
|
if response.status_code == 200 and len(response.content) > 1000:
|
|
self.log(f"Fetched thumbnail for {video_id}", "debug", "Database")
|
|
return response.content
|
|
|
|
except Exception as e:
|
|
self.log(f"Failed to fetch thumbnail for {video_id}: {e}", "warning", "Database")
|
|
|
|
return None
|
|
|
|
def get_video_info(self, url: str) -> Optional[Dict]:
|
|
"""Get video metadata using yt-dlp without downloading
|
|
|
|
Args:
|
|
url: Video URL
|
|
|
|
Returns:
|
|
Dictionary with video info or None on error
|
|
"""
|
|
try:
|
|
self.log(f"Fetching video info for: {url}", "info", "Core")
|
|
|
|
cmd = self._get_ytdlp_base_cmd() + [
|
|
'--dump-json',
|
|
'--no-playlist',
|
|
url
|
|
]
|
|
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=30
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
self.log(f"Failed to fetch video info: {result.stderr}", "error", "Core")
|
|
return None
|
|
|
|
info = json.loads(result.stdout)
|
|
|
|
# Extract upload date
|
|
upload_date = None
|
|
if 'upload_date' in info and info['upload_date']:
|
|
try:
|
|
upload_date = datetime.strptime(info['upload_date'], '%Y%m%d')
|
|
except Exception as e:
|
|
self.log(f"Error parsing upload date: {e}", "warning", "Core")
|
|
|
|
# Extract video ID from info
|
|
video_id = info.get('id') or self.extract_video_id(url)
|
|
|
|
return {
|
|
'video_id': video_id,
|
|
'title': info.get('title'),
|
|
'uploader': info.get('uploader') or info.get('channel') or info.get('creator'),
|
|
'upload_date': upload_date,
|
|
'duration': info.get('duration'),
|
|
'description': info.get('description'),
|
|
'thumbnail': info.get('thumbnail'),
|
|
'view_count': info.get('view_count'),
|
|
'like_count': info.get('like_count'),
|
|
}
|
|
|
|
except subprocess.TimeoutExpired:
|
|
self.log("Timeout fetching video info", "error", "Core")
|
|
return None
|
|
except Exception as e:
|
|
self.log(f"Error fetching video info: {e}", "error", "Core")
|
|
return None
|
|
|
|
def get_playlist_info(self, url: str) -> Optional[Dict]:
|
|
"""Get playlist info including all video entries
|
|
|
|
Args:
|
|
url: Playlist URL
|
|
|
|
Returns:
|
|
Dictionary with playlist info and video entries or None on error
|
|
"""
|
|
try:
|
|
self.log(f"Fetching playlist info for: {url}", "info", "Core")
|
|
|
|
cmd = self._get_ytdlp_base_cmd() + [
|
|
'--dump-json',
|
|
'--flat-playlist', # Only fetch metadata, not full video info
|
|
url
|
|
]
|
|
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=60
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
self.log(f"Failed to fetch playlist info: {result.stderr}", "error", "Core")
|
|
return None
|
|
|
|
# Parse JSONL output (one JSON object per line)
|
|
videos = []
|
|
lines = result.stdout.strip().split('\n')
|
|
|
|
for line in lines:
|
|
if not line.strip():
|
|
continue
|
|
try:
|
|
entry = json.loads(line)
|
|
|
|
# Skip non-video entries
|
|
if entry.get('_type') == 'playlist':
|
|
continue
|
|
|
|
videos.append({
|
|
'video_id': entry.get('id'),
|
|
'title': entry.get('title'),
|
|
'uploader': entry.get('uploader') or entry.get('channel'),
|
|
'upload_date': None, # Not available in flat-playlist
|
|
'duration': entry.get('duration'),
|
|
'description': '',
|
|
'thumbnail': entry.get('thumbnail'),
|
|
'view_count': entry.get('view_count'),
|
|
'like_count': entry.get('like_count'),
|
|
'url': entry.get('url') or entry.get('webpage_url'),
|
|
})
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
if not videos:
|
|
self.log("No videos found in playlist", "warning", "Core")
|
|
return None
|
|
|
|
return {
|
|
'is_playlist': True,
|
|
'playlist_count': len(videos),
|
|
'playlist_videos': videos
|
|
}
|
|
|
|
except subprocess.TimeoutExpired:
|
|
self.log("Timeout fetching playlist info", "error", "Core")
|
|
return None
|
|
except Exception as e:
|
|
self.log(f"Error fetching playlist info: {e}", "error", "Core")
|
|
return None
|
|
|
|
def get_gallery_info(self, url: str) -> Optional[Dict]:
|
|
"""Get gallery/album info using gallery-dl
|
|
|
|
Args:
|
|
url: Gallery URL
|
|
|
|
Returns:
|
|
Dictionary with gallery info or None on error
|
|
"""
|
|
try:
|
|
self.log(f"Fetching gallery info for: {url}", "info", "Core")
|
|
|
|
cmd = self._get_gallery_dl_base_cmd() + [
|
|
'--dump-json',
|
|
'--no-download',
|
|
url
|
|
]
|
|
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=60
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
self.log(f"Failed to fetch gallery info: {result.stderr}", "error", "Core")
|
|
return None
|
|
|
|
# Parse JSON output
|
|
try:
|
|
entries = json.loads(result.stdout)
|
|
except json.JSONDecodeError:
|
|
self.log("Failed to parse gallery-dl JSON output", "error", "Core")
|
|
return None
|
|
|
|
if not entries:
|
|
self.log("No entries found in gallery", "warning", "Core")
|
|
return None
|
|
|
|
# gallery-dl output format:
|
|
# - Entry with [2, {album_metadata}] = album info
|
|
# - Entry with [3, "url", {file_metadata}] = file entries
|
|
album_metadata = {}
|
|
file_entries = []
|
|
first_thumbnail = None
|
|
|
|
for entry in entries:
|
|
if isinstance(entry, list) and len(entry) >= 2:
|
|
entry_type = entry[0]
|
|
if entry_type == 2 and isinstance(entry[1], dict):
|
|
# Album metadata
|
|
album_metadata = entry[1]
|
|
elif entry_type == 3 and len(entry) >= 3:
|
|
# File entry: [3, url, metadata]
|
|
file_url = entry[1]
|
|
file_meta = entry[2] if isinstance(entry[2], dict) else {}
|
|
file_entries.append({
|
|
'url': file_url,
|
|
'extension': file_meta.get('extension', ''),
|
|
'filename': file_meta.get('filename', '')
|
|
})
|
|
# Get first image as thumbnail
|
|
if not first_thumbnail and file_meta.get('extension', '').lower() in ['jpg', 'jpeg', 'png', 'gif', 'webp']:
|
|
first_thumbnail = file_url
|
|
|
|
if not file_entries and not album_metadata:
|
|
self.log("No valid entries found in gallery", "warning", "Core")
|
|
return None
|
|
|
|
# Generate a unique ID for the gallery
|
|
gallery_id = album_metadata.get('album_id') or hashlib.sha256(url.encode()).hexdigest()[:12]
|
|
|
|
# Count media types
|
|
video_extensions = ['mp4', 'webm', 'mov', 'avi', 'mkv', 'm4v']
|
|
video_count = sum(1 for e in file_entries if e.get('extension', '').lower() in video_extensions)
|
|
image_count = len(file_entries) - video_count
|
|
|
|
# Get title from metadata
|
|
title = (album_metadata.get('title') or
|
|
album_metadata.get('album') or
|
|
album_metadata.get('gallery') or
|
|
f"Gallery {gallery_id}")
|
|
|
|
return {
|
|
'video_id': gallery_id,
|
|
'title': title,
|
|
'uploader': album_metadata.get('user') or album_metadata.get('uploader') or album_metadata.get('author', ''),
|
|
'upload_date': album_metadata.get('date'),
|
|
'duration': 0,
|
|
'description': album_metadata.get('description', ''),
|
|
'thumbnail': first_thumbnail or (file_entries[0]['url'] if file_entries else ''),
|
|
'view_count': 0,
|
|
'like_count': 0,
|
|
'is_gallery': True,
|
|
'file_count': len(file_entries),
|
|
'image_count': image_count,
|
|
'video_count': video_count,
|
|
'url': url,
|
|
'tags': album_metadata.get('tags', []),
|
|
}
|
|
|
|
except subprocess.TimeoutExpired:
|
|
self.log("Timeout fetching gallery info", "error", "Core")
|
|
return None
|
|
except Exception as e:
|
|
self.log(f"Error fetching gallery info: {e}", "error", "Core")
|
|
return None
|
|
|
|
def download_gallery(self, url: str, progress_callback=None, gallery_info: Dict = None) -> Tuple[bool, Optional[str], Optional[Dict]]:
|
|
"""Download a gallery/album using gallery-dl
|
|
|
|
Args:
|
|
url: Gallery URL
|
|
progress_callback: Optional callback for progress updates (message, percentage, speed, eta)
|
|
gallery_info: Optional pre-fetched gallery info from get_gallery_info()
|
|
|
|
Returns:
|
|
Tuple of (success, output_directory, metadata)
|
|
"""
|
|
try:
|
|
# Use album ID from gallery_info if available, otherwise generate hash
|
|
gallery_id = gallery_info.get('video_id') if gallery_info else None
|
|
if not gallery_id:
|
|
gallery_id = hashlib.sha256(url.encode()).hexdigest()[:12]
|
|
|
|
self.log(f"Starting gallery download: {url}", "info", "Core")
|
|
|
|
if progress_callback:
|
|
progress_callback(f"Starting gallery download...", 0, None, None)
|
|
|
|
# Get uploader for subfolder organization
|
|
uploader = gallery_info.get('uploader', '') if gallery_info else ''
|
|
if not uploader:
|
|
uploader = 'unknown'
|
|
# Sanitize channel name for filesystem
|
|
safe_channel = re.sub(r'[<>:"/\\|?*]', '', uploader)
|
|
safe_channel = re.sub(r'\s+', ' ', safe_channel).strip('. ')[:50] or 'unknown'
|
|
|
|
# Create output directory under channel subfolder
|
|
channel_dir = self.base_path / safe_channel
|
|
output_dir = channel_dir / gallery_id
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Build gallery-dl command
|
|
cmd = self._get_gallery_dl_base_cmd() + [
|
|
'--directory', str(output_dir),
|
|
'--filename', '{filename}.{extension}',
|
|
'--write-metadata',
|
|
'--write-info-json',
|
|
url
|
|
]
|
|
|
|
# Run gallery-dl with progress tracking
|
|
process = subprocess.Popen(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
text=True,
|
|
bufsize=1
|
|
)
|
|
|
|
downloaded_files = []
|
|
total_files = 0
|
|
current_file = 0
|
|
|
|
for line in iter(process.stdout.readline, ''):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
self.log(line, "debug", "Download")
|
|
|
|
# Parse progress from gallery-dl output
|
|
if line.startswith('#'):
|
|
# Extract total count from "# 1/10" format
|
|
match = re.search(r'#\s*(\d+)/(\d+)', line)
|
|
if match:
|
|
current_file = int(match.group(1))
|
|
total_files = int(match.group(2))
|
|
percentage = int((current_file / total_files) * 100)
|
|
if progress_callback:
|
|
progress_callback(f"Downloading file {current_file}/{total_files}", percentage, None, None)
|
|
|
|
elif 'Downloading' in line or 'Saving' in line:
|
|
if progress_callback:
|
|
progress_callback(line, 50 if total_files == 0 else int((current_file / total_files) * 100), None, None)
|
|
|
|
# Track downloaded files
|
|
if output_dir.exists():
|
|
current_files = list(output_dir.glob('*'))
|
|
downloaded_files = [f for f in current_files if f.is_file() and not f.name.endswith('.json')]
|
|
|
|
process.wait()
|
|
|
|
if process.returncode != 0:
|
|
self.log(f"Gallery download failed with code {process.returncode}", "error", "Core")
|
|
if progress_callback:
|
|
progress_callback("Download failed", 0, None, None)
|
|
return False, None, None
|
|
|
|
# Get final list of downloaded files
|
|
downloaded_files = [f for f in output_dir.glob('*') if f.is_file() and not f.name.endswith('.json')]
|
|
|
|
if not downloaded_files:
|
|
self.log("No files were downloaded", "error", "Core")
|
|
return False, None, None
|
|
|
|
# Parse upload_date from gallery_info
|
|
upload_date = None
|
|
if gallery_info and gallery_info.get('upload_date'):
|
|
ud = gallery_info['upload_date']
|
|
if isinstance(ud, datetime):
|
|
upload_date = ud
|
|
elif isinstance(ud, str):
|
|
# Try parsing common date formats
|
|
for fmt in ['%Y-%m-%d %H:%M:%S', '%Y-%m-%d', '%Y-%m-%dT%H:%M:%S']:
|
|
try:
|
|
upload_date = datetime.strptime(ud, fmt)
|
|
break
|
|
except ValueError:
|
|
continue
|
|
|
|
# Set file timestamps to upload date (same as yt-dlp)
|
|
if upload_date:
|
|
timestamp = upload_date.timestamp()
|
|
for file_path in downloaded_files:
|
|
os.utime(file_path, (timestamp, timestamp))
|
|
self.log(f"Set file timestamps to {upload_date}", "info", "Core")
|
|
|
|
# Calculate total size
|
|
total_size = sum(f.stat().st_size for f in downloaded_files)
|
|
|
|
# Use gallery_info if available for better metadata
|
|
metadata = {
|
|
'video_id': gallery_id,
|
|
'title': gallery_info.get('title', f"Gallery {gallery_id}") if gallery_info else f"Gallery {gallery_id}",
|
|
'uploader': gallery_info.get('uploader', '') if gallery_info else '',
|
|
'upload_date': upload_date or datetime.now(),
|
|
'duration': 0,
|
|
'description': gallery_info.get('description', '') if gallery_info else '',
|
|
'thumbnail': gallery_info.get('thumbnail', '') if gallery_info else '',
|
|
'view_count': gallery_info.get('view_count', 0) if gallery_info else 0,
|
|
'like_count': gallery_info.get('like_count', 0) if gallery_info else 0,
|
|
'is_gallery': True,
|
|
'file_count': len(downloaded_files),
|
|
'total_size': total_size,
|
|
'files': [str(f) for f in downloaded_files],
|
|
'tags': gallery_info.get('tags', []) if gallery_info else [],
|
|
}
|
|
|
|
self.log(f"Gallery download complete: {len(downloaded_files)} files, {total_size} bytes", "success", "Core")
|
|
|
|
if progress_callback:
|
|
progress_callback(f"Downloaded {len(downloaded_files)} files", 100, None, None)
|
|
|
|
# Record to video_downloads table
|
|
self._record_download(
|
|
video_id=gallery_id,
|
|
url=url,
|
|
title=metadata.get('title', f"Gallery {gallery_id}"),
|
|
file_path=str(output_dir),
|
|
uploader=metadata.get('uploader', ''),
|
|
upload_date=upload_date,
|
|
duration=0,
|
|
file_size=total_size,
|
|
metadata=metadata
|
|
)
|
|
|
|
# Also add to general downloads table for Media/Downloads page
|
|
url_hash = hashlib.sha256(url.encode()).hexdigest()
|
|
post_date = format_datetime_for_db(upload_date) if upload_date else format_datetime_for_db()
|
|
with self.unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
INSERT OR REPLACE INTO downloads
|
|
(url_hash, url, platform, source, post_date, download_date, status, file_path, filename)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
''', (
|
|
url_hash,
|
|
url,
|
|
self.platform,
|
|
metadata.get('uploader', ''),
|
|
post_date,
|
|
format_datetime_for_db(),
|
|
'completed',
|
|
str(output_dir),
|
|
gallery_id
|
|
))
|
|
conn.commit()
|
|
|
|
# Add each file to file_inventory for Media page (same as yt-dlp)
|
|
created_date = format_datetime_for_db(upload_date) if upload_date else format_datetime_for_db()
|
|
for file_path in downloaded_files:
|
|
file_stat = file_path.stat()
|
|
ext = file_path.suffix.lower()
|
|
content_type = 'video' if ext in ['.mp4', '.webm', '.mov', '.avi', '.mkv'] else 'image'
|
|
|
|
# Prepare metadata for JSON serialization
|
|
file_metadata = {
|
|
'gallery_id': gallery_id,
|
|
'title': metadata.get('title', ''),
|
|
'uploader': metadata.get('uploader', ''),
|
|
'tags': metadata.get('tags', []),
|
|
'url': url,
|
|
}
|
|
|
|
self.unified_db.upsert_file_inventory(
|
|
file_path=str(file_path),
|
|
filename=file_path.name,
|
|
platform=self.platform,
|
|
source=metadata.get('uploader', ''),
|
|
content_type=content_type,
|
|
file_size=file_stat.st_size,
|
|
location='final',
|
|
metadata=file_metadata,
|
|
created_date=created_date
|
|
)
|
|
self.log(f"Added {len(downloaded_files)} files to file_inventory", "info", "Database")
|
|
|
|
return True, str(output_dir), metadata
|
|
|
|
except Exception as e:
|
|
self.log(f"Error downloading gallery: {e}", "error", "Core")
|
|
if progress_callback:
|
|
progress_callback(f"Error: {str(e)}", 0, None, None)
|
|
return False, None, None
|
|
|
|
def download_video(self, url: str, progress_callback=None, update_activity: bool = True) -> Tuple[bool, Optional[str], Optional[Dict]]:
|
|
"""Download a video with metadata extraction
|
|
|
|
Args:
|
|
url: Video URL
|
|
progress_callback: Optional callback for progress updates (message, percentage)
|
|
update_activity: Whether to update the activity_status table (set False for queue downloads)
|
|
|
|
Returns:
|
|
Tuple of (success, file_path, metadata)
|
|
"""
|
|
try:
|
|
# Extract video ID
|
|
video_id = self.extract_video_id(url)
|
|
if not video_id:
|
|
self.log(f"Invalid {self.platform_config['name']} URL: {url}", "error", "Core")
|
|
return False, None, None
|
|
|
|
# Check if already downloaded
|
|
if self._is_already_downloaded(video_id):
|
|
self.log(f"Video {video_id} already downloaded, skipping", "info", "Core")
|
|
return False, None, {'error': 'Already downloaded'}
|
|
|
|
# Update activity status (only for scheduler-driven downloads, not queue)
|
|
activity_key = f'{self.platform}_downloader'
|
|
if update_activity:
|
|
self.activity_manager.update_status(f'Downloading: {url}')
|
|
|
|
if progress_callback:
|
|
progress_callback("Fetching video metadata...", 5)
|
|
|
|
# Get video info first
|
|
info = self.get_video_info(url)
|
|
if not info:
|
|
if update_activity:
|
|
self.activity_manager.update_status('Idle')
|
|
return False, None, {'error': 'Failed to fetch video info'}
|
|
|
|
self.log(f"Downloading: {info['title']}", "info", "Core")
|
|
|
|
if progress_callback:
|
|
progress_callback(f"Downloading: {info['title']}", 10)
|
|
|
|
# Generate output filename with date prefix
|
|
upload_date = info.get('upload_date')
|
|
if upload_date:
|
|
date_prefix = upload_date.strftime('%Y%m%d')
|
|
else:
|
|
date_prefix = datetime.now().strftime('%Y%m%d')
|
|
|
|
# Sanitize title for filename
|
|
safe_title = re.sub(r'[<>:"/\\|?*]', '_', info['title'][:100])
|
|
|
|
# Get channel/uploader for subfolder organization
|
|
uploader = info.get('uploader') or info.get('channel') or info.get('creator') or 'unknown'
|
|
# Sanitize channel name for filesystem
|
|
safe_channel = re.sub(r'[<>:"/\\|?*]', '', uploader)
|
|
safe_channel = re.sub(r'\s+', ' ', safe_channel).strip('. ')[:50] or 'unknown'
|
|
|
|
# Create channel subfolder
|
|
channel_dir = self.base_path / safe_channel
|
|
channel_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
output_template = str(channel_dir / f"{date_prefix}_{safe_title}_{video_id}.%(ext)s")
|
|
|
|
# Get anti-bot settings
|
|
antibot = get_antibot_settings(self.unified_db)
|
|
|
|
# Build base command
|
|
cmd = self._get_ytdlp_base_cmd() + [
|
|
'--no-playlist',
|
|
'--format', 'bestvideo+bestaudio/best',
|
|
'--merge-output-format', 'mp4',
|
|
'--output', output_template,
|
|
]
|
|
|
|
# Add metadata embedding based on settings
|
|
if self.video_settings.get('embed_metadata', True):
|
|
cmd.append('--add-metadata')
|
|
|
|
# Add thumbnail embedding based on settings
|
|
if self.video_settings.get('cache_thumbnails', True):
|
|
cmd.append('--embed-thumbnail')
|
|
|
|
# Add anti-bot measures if enabled
|
|
if antibot.get('enabled', True):
|
|
# User agent
|
|
user_agent = get_user_agent(antibot)
|
|
cmd.extend(['--user-agent', user_agent])
|
|
|
|
# Rate limiting
|
|
if antibot.get('limit_rate'):
|
|
cmd.extend(['--limit-rate', antibot['limit_rate']])
|
|
|
|
# Throttle detection
|
|
if antibot.get('throttled_rate'):
|
|
cmd.extend(['--throttled-rate', antibot['throttled_rate']])
|
|
|
|
# Sleep between requests
|
|
sleep_min = antibot.get('sleep_requests_min', 1)
|
|
sleep_max = antibot.get('sleep_requests_max', 3)
|
|
cmd.extend(['--sleep-requests', str(sleep_min)])
|
|
# Use sleep-interval for delays between downloads (with max variant)
|
|
if sleep_max > sleep_min:
|
|
cmd.extend(['--sleep-interval', str(sleep_min), '--max-sleep-interval', str(sleep_max)])
|
|
|
|
# Concurrent fragments
|
|
cmd.extend(['--concurrent-fragments', str(antibot.get('concurrent_fragments', 1))])
|
|
|
|
# Retries
|
|
cmd.extend(['--retries', str(antibot.get('retries', 10))])
|
|
cmd.extend(['--fragment-retries', str(antibot.get('fragment_retries', 10))])
|
|
|
|
# Socket timeout
|
|
cmd.extend(['--socket-timeout', str(antibot.get('socket_timeout', 30))])
|
|
|
|
# Don't abort on errors
|
|
cmd.append('--no-abort-on-error')
|
|
|
|
# Add URL last
|
|
cmd.append(url)
|
|
|
|
if progress_callback:
|
|
progress_callback("Downloading video...", 20)
|
|
|
|
# Run download with progress tracking
|
|
process = subprocess.Popen(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
text=True
|
|
)
|
|
|
|
# Collect output for error detection
|
|
output_lines = []
|
|
|
|
# Parse yt-dlp output for progress
|
|
for line in process.stdout:
|
|
output_lines.append(line)
|
|
|
|
# Look for [download] XX.X% lines
|
|
# Format: [download] 45.2% of 123.45MiB at 2.5MiB/s ETA 00:32
|
|
if '[download]' in line and '%' in line:
|
|
try:
|
|
percent_match = re.search(r'(\d+\.?\d*)%', line)
|
|
speed_match = re.search(r'at\s+([\d.]+\s*\w+/s)', line)
|
|
eta_match = re.search(r'ETA\s+([\d:]+)', line)
|
|
|
|
if percent_match:
|
|
percent = float(percent_match.group(1))
|
|
# Scale to 20-90% range
|
|
scaled_percent = 20 + (percent * 0.7)
|
|
|
|
speed = speed_match.group(1) if speed_match else None
|
|
eta = eta_match.group(1) if eta_match else None
|
|
|
|
if progress_callback:
|
|
# Build message with speed/ETA if available
|
|
msg = f"Downloading: {percent:.1f}%"
|
|
if speed:
|
|
msg += f" • {speed}"
|
|
if eta:
|
|
msg += f" • ETA {eta}"
|
|
progress_callback(msg, int(scaled_percent), speed, eta)
|
|
except (ValueError, KeyError, TypeError):
|
|
pass
|
|
|
|
process.wait()
|
|
|
|
# Check for cookie/auth errors in output
|
|
full_output = ''.join(output_lines)
|
|
if process.returncode != 0 and is_cookie_error(full_output):
|
|
self.log("Download failed: Cookie/authentication error detected", "error", "Core")
|
|
if update_activity:
|
|
self.activity_manager.update_status('Idle')
|
|
return False, None, {'error': 'Cookie expired', 'cookie_error': True}
|
|
|
|
if process.returncode != 0:
|
|
self.log("Download failed", "error", "Core")
|
|
if update_activity:
|
|
self.activity_manager.update_status('Idle')
|
|
return False, None, {'error': 'Download failed'}
|
|
|
|
if progress_callback:
|
|
progress_callback("Processing metadata...", 95)
|
|
|
|
# Find the downloaded file
|
|
# Escape glob special characters (brackets, etc.) in the pattern
|
|
import glob as glob_module
|
|
escaped_prefix = glob_module.escape(f"{date_prefix}_{safe_title}_{video_id}")
|
|
expected_pattern = f"{escaped_prefix}.*"
|
|
downloaded_files = list(channel_dir.glob(expected_pattern))
|
|
|
|
if not downloaded_files:
|
|
self.log("Downloaded file not found", "error", "Core")
|
|
if update_activity:
|
|
self.activity_manager.update_status('Idle')
|
|
return False, None, {'error': 'File not found after download'}
|
|
|
|
file_path = downloaded_files[0]
|
|
|
|
# Set file timestamp to upload date
|
|
if upload_date:
|
|
timestamp = upload_date.timestamp()
|
|
os.utime(file_path, (timestamp, timestamp))
|
|
self.log(f"Set file timestamp to {upload_date}", "info", "Core")
|
|
|
|
# Get file size
|
|
file_size = file_path.stat().st_size
|
|
|
|
# Get video dimensions using yt-dlp metadata
|
|
width = info.get('width')
|
|
height = info.get('height')
|
|
|
|
# Record download in video_downloads table
|
|
self._record_download(
|
|
video_id=video_id,
|
|
url=url,
|
|
title=info['title'],
|
|
file_path=str(file_path),
|
|
uploader=info.get('uploader'),
|
|
upload_date=upload_date,
|
|
duration=info.get('duration'),
|
|
file_size=file_size,
|
|
metadata=info
|
|
)
|
|
|
|
# Also add to general downloads table for Media/Downloads page queries
|
|
# post_date = upload date, download_date = today
|
|
url_hash = hashlib.sha256(url.encode()).hexdigest()
|
|
with self.unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO downloads
|
|
(url_hash, url, platform, source, post_date, download_date, status, file_path, filename)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
''', (
|
|
url_hash,
|
|
url,
|
|
self.platform,
|
|
info.get('uploader'),
|
|
format_datetime_for_db(upload_date) if upload_date else None,
|
|
format_datetime_for_db(),
|
|
'completed',
|
|
str(file_path),
|
|
file_path.name
|
|
))
|
|
conn.commit()
|
|
self.log(f"Added to downloads table: {file_path.name}", "info", "Database")
|
|
|
|
# Add to file inventory for media gallery
|
|
download_time = format_datetime_for_db()
|
|
|
|
# Prepare metadata for JSON serialization (convert datetime to string)
|
|
metadata_serializable = dict(info)
|
|
if 'upload_date' in metadata_serializable and metadata_serializable['upload_date']:
|
|
metadata_serializable['upload_date'] = format_datetime_for_db(metadata_serializable['upload_date']) if isinstance(metadata_serializable['upload_date'], datetime) else metadata_serializable['upload_date']
|
|
|
|
self.unified_db.upsert_file_inventory(
|
|
file_path=str(file_path),
|
|
filename=file_path.name,
|
|
platform=self.platform,
|
|
source=info.get('uploader'),
|
|
content_type='video',
|
|
file_size=file_size,
|
|
width=width,
|
|
height=height,
|
|
location='final',
|
|
metadata=metadata_serializable,
|
|
created_date=download_time,
|
|
video_id=info.get('id') # For YouTube thumbnail lookup
|
|
)
|
|
self.log(f"Added to file inventory: {file_path.name}", "info", "Database")
|
|
|
|
if progress_callback:
|
|
progress_callback("Download complete!", 100)
|
|
|
|
self.log(f"Successfully downloaded: {file_path.name}", "success", "Core")
|
|
if update_activity:
|
|
self.activity_manager.update_status('Idle')
|
|
|
|
return True, str(file_path), info
|
|
|
|
except Exception as e:
|
|
self.log(f"Error downloading video: {e}", "error", "Core")
|
|
if update_activity:
|
|
self.activity_manager.update_status('Idle')
|
|
return False, None, {'error': str(e)}
|
|
|
|
|
|
def main():
|
|
"""Test function"""
|
|
from modules.unified_database import UnifiedDatabase
|
|
|
|
db = UnifiedDatabase()
|
|
|
|
print("Available platforms:")
|
|
for key, config in PLATFORMS.items():
|
|
print(f" {key}: {config['name']}")
|
|
|
|
platform = input("\nSelect platform: ").lower()
|
|
if platform not in PLATFORMS:
|
|
print(f"Invalid platform. Choose from: {', '.join(PLATFORMS.keys())}")
|
|
return
|
|
|
|
downloader = UniversalVideoDownloader(platform=platform, unified_db=db)
|
|
|
|
# Test URL
|
|
test_url = input(f"Enter {PLATFORMS[platform]['name']} URL: ")
|
|
|
|
def progress(msg, pct):
|
|
print(f"[{pct}%] {msg}")
|
|
|
|
success, file_path, metadata = downloader.download_video(test_url, progress)
|
|
|
|
if success:
|
|
print(f"\nSuccess! Downloaded to: {file_path}")
|
|
else:
|
|
print(f"\nFailed: {metadata.get('error', 'Unknown error')}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|