Initial commit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Todd
2026-03-29 22:42:55 -04:00
commit 0d7b2b1aab
389 changed files with 280296 additions and 0 deletions

View File

@@ -0,0 +1,297 @@
"""
Embed Downloader - Downloads embedded videos from posts using yt-dlp
Supports: YouTube, Vimeo, Dailymotion, Twitch, and many other platforms
"""
import asyncio
import json
import os
import subprocess
from pathlib import Path
from typing import Dict, Optional
from modules.base_module import LoggingMixin
class EmbedDownloader(LoggingMixin):
"""
Download embedded videos from posts using yt-dlp
Wrapper around yt-dlp for downloading videos from various platforms
embedded in creator posts.
"""
# Quality presets for yt-dlp
QUALITY_PRESETS = {
'best': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
'1080p': 'bestvideo[height<=1080][ext=mp4]+bestaudio[ext=m4a]/best[height<=1080][ext=mp4]/best',
'720p': 'bestvideo[height<=720][ext=mp4]+bestaudio[ext=m4a]/best[height<=720][ext=mp4]/best',
'480p': 'bestvideo[height<=480][ext=mp4]+bestaudio[ext=m4a]/best[height<=480][ext=mp4]/best',
'audio': 'bestaudio[ext=m4a]/bestaudio/best',
}
def __init__(self, ytdlp_path: str = None, log_callback=None):
self._init_logger('PaidContent', log_callback, default_module='Embed')
# Find yt-dlp executable
self.ytdlp_path = ytdlp_path or self._find_ytdlp()
if not self.ytdlp_path:
self.log("yt-dlp not found, embed downloading will be disabled", 'warning')
def _find_ytdlp(self) -> Optional[str]:
"""Find yt-dlp executable"""
# Check common locations
common_paths = [
'/usr/local/bin/yt-dlp',
'/usr/bin/yt-dlp',
'/opt/homebrew/bin/yt-dlp',
os.path.expanduser('~/.local/bin/yt-dlp'),
]
for path in common_paths:
if os.path.isfile(path) and os.access(path, os.X_OK):
return path
# Try to find via which
try:
result = subprocess.run(['which', 'yt-dlp'], capture_output=True, text=True)
if result.returncode == 0:
return result.stdout.strip()
except Exception:
pass
return None
def is_available(self) -> bool:
"""Check if yt-dlp is available"""
return self.ytdlp_path is not None
async def download(self, url: str, output_dir: Path, quality: str = 'best',
filename_template: str = None) -> Dict:
"""
Download video from URL
Args:
url: Video URL to download
output_dir: Directory to save the video
quality: Quality preset ('best', '1080p', '720p', '480p', 'audio')
filename_template: Optional custom filename template
Returns:
Dict with success status and file info
"""
if not self.is_available():
return {
'success': False,
'error': 'yt-dlp not available'
}
try:
# Create output directory
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
# Build output template
if filename_template:
output_template = str(output_dir / filename_template)
else:
output_template = str(output_dir / 'embed_%(title).50s_%(id)s.%(ext)s')
# Get format string
format_str = self.QUALITY_PRESETS.get(quality, self.QUALITY_PRESETS['best'])
# Build command
cmd = [
self.ytdlp_path,
'--no-playlist',
'--no-warnings',
'-f', format_str,
'--merge-output-format', 'mp4',
'-o', output_template,
'--print-json', # Output JSON with video info
url
]
self.log(f"Downloading embed: {url}", 'debug')
# Run yt-dlp
result = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await result.communicate()
if result.returncode != 0:
error_msg = stderr.decode('utf-8', errors='replace').strip()
# Try to extract useful error message
if 'Video unavailable' in error_msg:
error_msg = 'Video unavailable or private'
elif 'age-restricted' in error_msg.lower():
error_msg = 'Video is age-restricted'
elif 'members only' in error_msg.lower():
error_msg = 'Video is members-only'
elif len(error_msg) > 200:
error_msg = error_msg[:200] + '...'
self.log(f"yt-dlp failed: {error_msg}", 'warning')
return {
'success': False,
'error': error_msg or f'yt-dlp exited with code {result.returncode}'
}
# Parse output JSON
stdout_text = stdout.decode('utf-8', errors='replace')
video_info = None
for line in stdout_text.strip().split('\n'):
try:
video_info = json.loads(line)
break
except json.JSONDecodeError:
continue
if not video_info:
# Try to find the downloaded file
files = list(output_dir.glob('embed_*'))
if files:
file_path = files[0]
return {
'success': True,
'file_path': str(file_path),
'filename': file_path.name,
'file_size': file_path.stat().st_size if file_path.exists() else None
}
return {
'success': False,
'error': 'Could not parse yt-dlp output'
}
# Extract file info
file_path = video_info.get('_filename') or video_info.get('filename')
# Handle potential path issues
if file_path:
file_path = Path(file_path)
if not file_path.exists():
# Try to find the file
possible_files = list(output_dir.glob(f"*{video_info.get('id', '')}*"))
if possible_files:
file_path = possible_files[0]
return {
'success': True,
'file_path': str(file_path) if file_path else None,
'filename': file_path.name if file_path else None,
'file_size': file_path.stat().st_size if file_path and file_path.exists() else video_info.get('filesize'),
'title': video_info.get('title'),
'duration': video_info.get('duration'),
'uploader': video_info.get('uploader'),
'upload_date': video_info.get('upload_date'),
'video_id': video_info.get('id'),
'platform': video_info.get('extractor_key', video_info.get('extractor', 'unknown')).lower()
}
except asyncio.TimeoutError:
return {
'success': False,
'error': 'Download timed out'
}
except Exception as e:
self.log(f"Error downloading embed: {e}", 'error')
return {
'success': False,
'error': str(e)
}
async def get_video_info(self, url: str) -> Dict:
"""
Get video information without downloading
Args:
url: Video URL
Returns:
Dict with video metadata
"""
if not self.is_available():
return {'success': False, 'error': 'yt-dlp not available'}
try:
cmd = [
self.ytdlp_path,
'--no-playlist',
'--no-warnings',
'-j', # Output JSON
'--no-download',
url
]
result = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await result.communicate()
if result.returncode != 0:
error_msg = stderr.decode('utf-8', errors='replace').strip()
return {
'success': False,
'error': error_msg or f'yt-dlp exited with code {result.returncode}'
}
video_info = json.loads(stdout.decode('utf-8'))
return {
'success': True,
'title': video_info.get('title'),
'duration': video_info.get('duration'),
'uploader': video_info.get('uploader'),
'upload_date': video_info.get('upload_date'),
'view_count': video_info.get('view_count'),
'like_count': video_info.get('like_count'),
'description': video_info.get('description'),
'thumbnail': video_info.get('thumbnail'),
'video_id': video_info.get('id'),
'platform': video_info.get('extractor_key', video_info.get('extractor', 'unknown')).lower(),
'formats': len(video_info.get('formats', []))
}
except Exception as e:
self.log(f"Error getting video info: {e}", 'error')
return {
'success': False,
'error': str(e)
}
@staticmethod
def detect_platform(url: str) -> Optional[str]:
"""Detect video platform from URL"""
url_lower = url.lower()
if 'youtube.com' in url_lower or 'youtu.be' in url_lower:
return 'youtube'
elif 'vimeo.com' in url_lower:
return 'vimeo'
elif 'dailymotion.com' in url_lower:
return 'dailymotion'
elif 'twitch.tv' in url_lower:
return 'twitch'
elif 'twitter.com' in url_lower or 'x.com' in url_lower:
return 'twitter'
elif 'tiktok.com' in url_lower:
return 'tiktok'
elif 'instagram.com' in url_lower:
return 'instagram'
elif 'reddit.com' in url_lower:
return 'reddit'
return None
@staticmethod
def is_supported_url(url: str) -> bool:
"""Check if URL is from a supported platform"""
return EmbedDownloader.detect_platform(url) is not None