""" Embed Downloader - Downloads embedded videos from posts using yt-dlp Supports: YouTube, Vimeo, Dailymotion, Twitch, and many other platforms """ import asyncio import json import os import subprocess from pathlib import Path from typing import Dict, Optional from modules.base_module import LoggingMixin class EmbedDownloader(LoggingMixin): """ Download embedded videos from posts using yt-dlp Wrapper around yt-dlp for downloading videos from various platforms embedded in creator posts. """ # Quality presets for yt-dlp QUALITY_PRESETS = { 'best': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best', '1080p': 'bestvideo[height<=1080][ext=mp4]+bestaudio[ext=m4a]/best[height<=1080][ext=mp4]/best', '720p': 'bestvideo[height<=720][ext=mp4]+bestaudio[ext=m4a]/best[height<=720][ext=mp4]/best', '480p': 'bestvideo[height<=480][ext=mp4]+bestaudio[ext=m4a]/best[height<=480][ext=mp4]/best', 'audio': 'bestaudio[ext=m4a]/bestaudio/best', } def __init__(self, ytdlp_path: str = None, log_callback=None): self._init_logger('PaidContent', log_callback, default_module='Embed') # Find yt-dlp executable self.ytdlp_path = ytdlp_path or self._find_ytdlp() if not self.ytdlp_path: self.log("yt-dlp not found, embed downloading will be disabled", 'warning') def _find_ytdlp(self) -> Optional[str]: """Find yt-dlp executable""" # Check common locations common_paths = [ '/usr/local/bin/yt-dlp', '/usr/bin/yt-dlp', '/opt/homebrew/bin/yt-dlp', os.path.expanduser('~/.local/bin/yt-dlp'), ] for path in common_paths: if os.path.isfile(path) and os.access(path, os.X_OK): return path # Try to find via which try: result = subprocess.run(['which', 'yt-dlp'], capture_output=True, text=True) if result.returncode == 0: return result.stdout.strip() except Exception: pass return None def is_available(self) -> bool: """Check if yt-dlp is available""" return self.ytdlp_path is not None async def download(self, url: str, output_dir: Path, quality: str = 'best', filename_template: str = None) -> Dict: """ Download video from URL Args: url: Video URL to download output_dir: Directory to save the video quality: Quality preset ('best', '1080p', '720p', '480p', 'audio') filename_template: Optional custom filename template Returns: Dict with success status and file info """ if not self.is_available(): return { 'success': False, 'error': 'yt-dlp not available' } try: # Create output directory output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) # Build output template if filename_template: output_template = str(output_dir / filename_template) else: output_template = str(output_dir / 'embed_%(title).50s_%(id)s.%(ext)s') # Get format string format_str = self.QUALITY_PRESETS.get(quality, self.QUALITY_PRESETS['best']) # Build command cmd = [ self.ytdlp_path, '--no-playlist', '--no-warnings', '-f', format_str, '--merge-output-format', 'mp4', '-o', output_template, '--print-json', # Output JSON with video info url ] self.log(f"Downloading embed: {url}", 'debug') # Run yt-dlp result = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) stdout, stderr = await result.communicate() if result.returncode != 0: error_msg = stderr.decode('utf-8', errors='replace').strip() # Try to extract useful error message if 'Video unavailable' in error_msg: error_msg = 'Video unavailable or private' elif 'age-restricted' in error_msg.lower(): error_msg = 'Video is age-restricted' elif 'members only' in error_msg.lower(): error_msg = 'Video is members-only' elif len(error_msg) > 200: error_msg = error_msg[:200] + '...' self.log(f"yt-dlp failed: {error_msg}", 'warning') return { 'success': False, 'error': error_msg or f'yt-dlp exited with code {result.returncode}' } # Parse output JSON stdout_text = stdout.decode('utf-8', errors='replace') video_info = None for line in stdout_text.strip().split('\n'): try: video_info = json.loads(line) break except json.JSONDecodeError: continue if not video_info: # Try to find the downloaded file files = list(output_dir.glob('embed_*')) if files: file_path = files[0] return { 'success': True, 'file_path': str(file_path), 'filename': file_path.name, 'file_size': file_path.stat().st_size if file_path.exists() else None } return { 'success': False, 'error': 'Could not parse yt-dlp output' } # Extract file info file_path = video_info.get('_filename') or video_info.get('filename') # Handle potential path issues if file_path: file_path = Path(file_path) if not file_path.exists(): # Try to find the file possible_files = list(output_dir.glob(f"*{video_info.get('id', '')}*")) if possible_files: file_path = possible_files[0] return { 'success': True, 'file_path': str(file_path) if file_path else None, 'filename': file_path.name if file_path else None, 'file_size': file_path.stat().st_size if file_path and file_path.exists() else video_info.get('filesize'), 'title': video_info.get('title'), 'duration': video_info.get('duration'), 'uploader': video_info.get('uploader'), 'upload_date': video_info.get('upload_date'), 'video_id': video_info.get('id'), 'platform': video_info.get('extractor_key', video_info.get('extractor', 'unknown')).lower() } except asyncio.TimeoutError: return { 'success': False, 'error': 'Download timed out' } except Exception as e: self.log(f"Error downloading embed: {e}", 'error') return { 'success': False, 'error': str(e) } async def get_video_info(self, url: str) -> Dict: """ Get video information without downloading Args: url: Video URL Returns: Dict with video metadata """ if not self.is_available(): return {'success': False, 'error': 'yt-dlp not available'} try: cmd = [ self.ytdlp_path, '--no-playlist', '--no-warnings', '-j', # Output JSON '--no-download', url ] result = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) stdout, stderr = await result.communicate() if result.returncode != 0: error_msg = stderr.decode('utf-8', errors='replace').strip() return { 'success': False, 'error': error_msg or f'yt-dlp exited with code {result.returncode}' } video_info = json.loads(stdout.decode('utf-8')) return { 'success': True, 'title': video_info.get('title'), 'duration': video_info.get('duration'), 'uploader': video_info.get('uploader'), 'upload_date': video_info.get('upload_date'), 'view_count': video_info.get('view_count'), 'like_count': video_info.get('like_count'), 'description': video_info.get('description'), 'thumbnail': video_info.get('thumbnail'), 'video_id': video_info.get('id'), 'platform': video_info.get('extractor_key', video_info.get('extractor', 'unknown')).lower(), 'formats': len(video_info.get('formats', [])) } except Exception as e: self.log(f"Error getting video info: {e}", 'error') return { 'success': False, 'error': str(e) } @staticmethod def detect_platform(url: str) -> Optional[str]: """Detect video platform from URL""" url_lower = url.lower() if 'youtube.com' in url_lower or 'youtu.be' in url_lower: return 'youtube' elif 'vimeo.com' in url_lower: return 'vimeo' elif 'dailymotion.com' in url_lower: return 'dailymotion' elif 'twitch.tv' in url_lower: return 'twitch' elif 'twitter.com' in url_lower or 'x.com' in url_lower: return 'twitter' elif 'tiktok.com' in url_lower: return 'tiktok' elif 'instagram.com' in url_lower: return 'instagram' elif 'reddit.com' in url_lower: return 'reddit' return None @staticmethod def is_supported_url(url: str) -> bool: """Check if URL is from a supported platform""" return EmbedDownloader.detect_platform(url) is not None