298 lines
10 KiB
Python
298 lines
10 KiB
Python
"""
|
|
Embed Downloader - Downloads embedded videos from posts using yt-dlp
|
|
Supports: YouTube, Vimeo, Dailymotion, Twitch, and many other platforms
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import os
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import Dict, Optional
|
|
|
|
from modules.base_module import LoggingMixin
|
|
|
|
|
|
class EmbedDownloader(LoggingMixin):
|
|
"""
|
|
Download embedded videos from posts using yt-dlp
|
|
|
|
Wrapper around yt-dlp for downloading videos from various platforms
|
|
embedded in creator posts.
|
|
"""
|
|
|
|
# Quality presets for yt-dlp
|
|
QUALITY_PRESETS = {
|
|
'best': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
|
|
'1080p': 'bestvideo[height<=1080][ext=mp4]+bestaudio[ext=m4a]/best[height<=1080][ext=mp4]/best',
|
|
'720p': 'bestvideo[height<=720][ext=mp4]+bestaudio[ext=m4a]/best[height<=720][ext=mp4]/best',
|
|
'480p': 'bestvideo[height<=480][ext=mp4]+bestaudio[ext=m4a]/best[height<=480][ext=mp4]/best',
|
|
'audio': 'bestaudio[ext=m4a]/bestaudio/best',
|
|
}
|
|
|
|
def __init__(self, ytdlp_path: str = None, log_callback=None):
|
|
self._init_logger('PaidContent', log_callback, default_module='Embed')
|
|
|
|
# Find yt-dlp executable
|
|
self.ytdlp_path = ytdlp_path or self._find_ytdlp()
|
|
if not self.ytdlp_path:
|
|
self.log("yt-dlp not found, embed downloading will be disabled", 'warning')
|
|
|
|
def _find_ytdlp(self) -> Optional[str]:
|
|
"""Find yt-dlp executable"""
|
|
# Check common locations
|
|
common_paths = [
|
|
'/usr/local/bin/yt-dlp',
|
|
'/usr/bin/yt-dlp',
|
|
'/opt/homebrew/bin/yt-dlp',
|
|
os.path.expanduser('~/.local/bin/yt-dlp'),
|
|
]
|
|
|
|
for path in common_paths:
|
|
if os.path.isfile(path) and os.access(path, os.X_OK):
|
|
return path
|
|
|
|
# Try to find via which
|
|
try:
|
|
result = subprocess.run(['which', 'yt-dlp'], capture_output=True, text=True)
|
|
if result.returncode == 0:
|
|
return result.stdout.strip()
|
|
except Exception:
|
|
pass
|
|
|
|
return None
|
|
|
|
def is_available(self) -> bool:
|
|
"""Check if yt-dlp is available"""
|
|
return self.ytdlp_path is not None
|
|
|
|
async def download(self, url: str, output_dir: Path, quality: str = 'best',
|
|
filename_template: str = None) -> Dict:
|
|
"""
|
|
Download video from URL
|
|
|
|
Args:
|
|
url: Video URL to download
|
|
output_dir: Directory to save the video
|
|
quality: Quality preset ('best', '1080p', '720p', '480p', 'audio')
|
|
filename_template: Optional custom filename template
|
|
|
|
Returns:
|
|
Dict with success status and file info
|
|
"""
|
|
if not self.is_available():
|
|
return {
|
|
'success': False,
|
|
'error': 'yt-dlp not available'
|
|
}
|
|
|
|
try:
|
|
# Create output directory
|
|
output_dir = Path(output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Build output template
|
|
if filename_template:
|
|
output_template = str(output_dir / filename_template)
|
|
else:
|
|
output_template = str(output_dir / 'embed_%(title).50s_%(id)s.%(ext)s')
|
|
|
|
# Get format string
|
|
format_str = self.QUALITY_PRESETS.get(quality, self.QUALITY_PRESETS['best'])
|
|
|
|
# Build command
|
|
cmd = [
|
|
self.ytdlp_path,
|
|
'--no-playlist',
|
|
'--no-warnings',
|
|
'-f', format_str,
|
|
'--merge-output-format', 'mp4',
|
|
'-o', output_template,
|
|
'--print-json', # Output JSON with video info
|
|
url
|
|
]
|
|
|
|
self.log(f"Downloading embed: {url}", 'debug')
|
|
|
|
# Run yt-dlp
|
|
result = await asyncio.create_subprocess_exec(
|
|
*cmd,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE
|
|
)
|
|
|
|
stdout, stderr = await result.communicate()
|
|
|
|
if result.returncode != 0:
|
|
error_msg = stderr.decode('utf-8', errors='replace').strip()
|
|
# Try to extract useful error message
|
|
if 'Video unavailable' in error_msg:
|
|
error_msg = 'Video unavailable or private'
|
|
elif 'age-restricted' in error_msg.lower():
|
|
error_msg = 'Video is age-restricted'
|
|
elif 'members only' in error_msg.lower():
|
|
error_msg = 'Video is members-only'
|
|
elif len(error_msg) > 200:
|
|
error_msg = error_msg[:200] + '...'
|
|
|
|
self.log(f"yt-dlp failed: {error_msg}", 'warning')
|
|
return {
|
|
'success': False,
|
|
'error': error_msg or f'yt-dlp exited with code {result.returncode}'
|
|
}
|
|
|
|
# Parse output JSON
|
|
stdout_text = stdout.decode('utf-8', errors='replace')
|
|
video_info = None
|
|
|
|
for line in stdout_text.strip().split('\n'):
|
|
try:
|
|
video_info = json.loads(line)
|
|
break
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
if not video_info:
|
|
# Try to find the downloaded file
|
|
files = list(output_dir.glob('embed_*'))
|
|
if files:
|
|
file_path = files[0]
|
|
return {
|
|
'success': True,
|
|
'file_path': str(file_path),
|
|
'filename': file_path.name,
|
|
'file_size': file_path.stat().st_size if file_path.exists() else None
|
|
}
|
|
return {
|
|
'success': False,
|
|
'error': 'Could not parse yt-dlp output'
|
|
}
|
|
|
|
# Extract file info
|
|
file_path = video_info.get('_filename') or video_info.get('filename')
|
|
|
|
# Handle potential path issues
|
|
if file_path:
|
|
file_path = Path(file_path)
|
|
if not file_path.exists():
|
|
# Try to find the file
|
|
possible_files = list(output_dir.glob(f"*{video_info.get('id', '')}*"))
|
|
if possible_files:
|
|
file_path = possible_files[0]
|
|
|
|
return {
|
|
'success': True,
|
|
'file_path': str(file_path) if file_path else None,
|
|
'filename': file_path.name if file_path else None,
|
|
'file_size': file_path.stat().st_size if file_path and file_path.exists() else video_info.get('filesize'),
|
|
'title': video_info.get('title'),
|
|
'duration': video_info.get('duration'),
|
|
'uploader': video_info.get('uploader'),
|
|
'upload_date': video_info.get('upload_date'),
|
|
'video_id': video_info.get('id'),
|
|
'platform': video_info.get('extractor_key', video_info.get('extractor', 'unknown')).lower()
|
|
}
|
|
|
|
except asyncio.TimeoutError:
|
|
return {
|
|
'success': False,
|
|
'error': 'Download timed out'
|
|
}
|
|
except Exception as e:
|
|
self.log(f"Error downloading embed: {e}", 'error')
|
|
return {
|
|
'success': False,
|
|
'error': str(e)
|
|
}
|
|
|
|
async def get_video_info(self, url: str) -> Dict:
|
|
"""
|
|
Get video information without downloading
|
|
|
|
Args:
|
|
url: Video URL
|
|
|
|
Returns:
|
|
Dict with video metadata
|
|
"""
|
|
if not self.is_available():
|
|
return {'success': False, 'error': 'yt-dlp not available'}
|
|
|
|
try:
|
|
cmd = [
|
|
self.ytdlp_path,
|
|
'--no-playlist',
|
|
'--no-warnings',
|
|
'-j', # Output JSON
|
|
'--no-download',
|
|
url
|
|
]
|
|
|
|
result = await asyncio.create_subprocess_exec(
|
|
*cmd,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE
|
|
)
|
|
|
|
stdout, stderr = await result.communicate()
|
|
|
|
if result.returncode != 0:
|
|
error_msg = stderr.decode('utf-8', errors='replace').strip()
|
|
return {
|
|
'success': False,
|
|
'error': error_msg or f'yt-dlp exited with code {result.returncode}'
|
|
}
|
|
|
|
video_info = json.loads(stdout.decode('utf-8'))
|
|
|
|
return {
|
|
'success': True,
|
|
'title': video_info.get('title'),
|
|
'duration': video_info.get('duration'),
|
|
'uploader': video_info.get('uploader'),
|
|
'upload_date': video_info.get('upload_date'),
|
|
'view_count': video_info.get('view_count'),
|
|
'like_count': video_info.get('like_count'),
|
|
'description': video_info.get('description'),
|
|
'thumbnail': video_info.get('thumbnail'),
|
|
'video_id': video_info.get('id'),
|
|
'platform': video_info.get('extractor_key', video_info.get('extractor', 'unknown')).lower(),
|
|
'formats': len(video_info.get('formats', []))
|
|
}
|
|
|
|
except Exception as e:
|
|
self.log(f"Error getting video info: {e}", 'error')
|
|
return {
|
|
'success': False,
|
|
'error': str(e)
|
|
}
|
|
|
|
@staticmethod
|
|
def detect_platform(url: str) -> Optional[str]:
|
|
"""Detect video platform from URL"""
|
|
url_lower = url.lower()
|
|
|
|
if 'youtube.com' in url_lower or 'youtu.be' in url_lower:
|
|
return 'youtube'
|
|
elif 'vimeo.com' in url_lower:
|
|
return 'vimeo'
|
|
elif 'dailymotion.com' in url_lower:
|
|
return 'dailymotion'
|
|
elif 'twitch.tv' in url_lower:
|
|
return 'twitch'
|
|
elif 'twitter.com' in url_lower or 'x.com' in url_lower:
|
|
return 'twitter'
|
|
elif 'tiktok.com' in url_lower:
|
|
return 'tiktok'
|
|
elif 'instagram.com' in url_lower:
|
|
return 'instagram'
|
|
elif 'reddit.com' in url_lower:
|
|
return 'reddit'
|
|
|
|
return None
|
|
|
|
@staticmethod
|
|
def is_supported_url(url: str) -> bool:
|
|
"""Check if URL is from a supported platform"""
|
|
return EmbedDownloader.detect_platform(url) is not None
|