Files
media-downloader/modules/paid_content/file_host_downloader.py
Todd 0d7b2b1aab Initial commit
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-29 22:42:55 -04:00

530 lines
22 KiB
Python

"""
Download files from external file hosting services
Supports: Bunkr, Pixeldrain, Gofile, Cyberdrop
"""
import asyncio
import re
from pathlib import Path
from typing import Dict, List, Optional
from urllib.parse import urlparse, parse_qs
import aiohttp
from modules.base_module import LoggingMixin, RateLimitMixin
class FileHostDownloader(LoggingMixin, RateLimitMixin):
"""
Download files from various file hosting services
Used for manual import of PPV content
"""
SUPPORTED_HOSTS = {
'bunkr': ['bunkr.sk', 'bunkr.si', 'bunkr.la', 'bunkrr.ru', 'bunkr.ph', 'bunkr.is', 'bunkr.ac', 'bunkr.cr'],
'pixeldrain': ['pixeldrain.com'],
'gofile': ['gofile.io'],
'cyberdrop': ['cyberdrop.me', 'cyberdrop.to', 'cyberdrop.cc'],
'fileditch': ['fileditchfiles.me', 'fileditch.me'],
}
# Bunkr CDN servers (food-themed) - try in order
BUNKR_CDNS = [
'i-soup.bunkr.ru',
'i-burger.bunkr.ru',
'i-pizza.bunkr.ru',
'i-taco.bunkr.ru',
'i-fries.bunkr.ru',
'i-hotdog.bunkr.ru',
'i-nachos.bunkr.ru',
'i-sushi.bunkr.ru',
'i-ramen.bunkr.ru',
'i-curry.bunkr.ru',
'i-kebab.bunkr.ru',
'i-pasta.bunkr.ru',
'i-steak.bunkr.ru',
'i-salad.bunkr.ru',
'i-sandwich.bunkr.ru',
'i-waffle.bunkr.ru',
'i-pancake.bunkr.ru',
'i-donut.bunkr.ru',
'i-cookie.bunkr.ru',
'i-cake.bunkr.ru',
'i-bacon.bunkr.ru',
'i-cheese.bunkr.ru',
'i-chicken.bunkr.ru',
'i-fish.bunkr.ru',
'i-noodle.bunkr.ru',
'i-rice.bunkr.ru',
'i-bread.bunkr.ru',
'burger.bunkr.ru',
'pizza.bunkr.ru',
'milkshake.bunkr.ru',
]
def __init__(self, log_callback=None, progress_callback=None):
self._init_logger('PaidContent', log_callback, default_module='FileHost')
self._init_rate_limiter(min_delay=1, max_delay=3)
self.progress_callback = progress_callback # Called with (downloaded_bytes, total_bytes, filename)
def detect_host(self, url: str) -> Optional[str]:
"""Detect which file host a URL belongs to"""
try:
parsed = urlparse(url)
domain = parsed.netloc.lower().replace('www.', '')
for host, domains in self.SUPPORTED_HOSTS.items():
if domain in domains:
return host
except Exception:
pass
return None
def is_supported_url(self, url: str) -> bool:
"""Check if URL is from a supported file host"""
return self.detect_host(url) is not None
async def download_url(self, url: str, save_dir: Path) -> Dict:
"""
Download file(s) from URL
Returns: {'success': bool, 'files': [paths], 'error': str}
"""
host = self.detect_host(url)
if not host:
return {'success': False, 'files': [], 'error': 'Unsupported host'}
handler = getattr(self, f'_download_{host}', None)
if not handler:
return {'success': False, 'files': [], 'error': f'No handler for {host}'}
try:
save_dir = Path(save_dir)
save_dir.mkdir(parents=True, exist_ok=True)
return await handler(url, save_dir)
except Exception as e:
self.log(f"Error downloading from {host}: {e}", 'error')
return {'success': False, 'files': [], 'error': str(e)}
async def _download_pixeldrain(self, url: str, save_dir: Path) -> Dict:
"""Download from Pixeldrain"""
# Extract file ID from URL
# Format: https://pixeldrain.com/u/FILEID or /l/LISTID
parsed = urlparse(url)
path_parts = parsed.path.strip('/').split('/')
if len(path_parts) < 2:
return {'success': False, 'files': [], 'error': 'Invalid Pixeldrain URL'}
url_type, file_id = path_parts[0], path_parts[1]
files = []
timeout = aiohttp.ClientTimeout(total=300)
async with aiohttp.ClientSession(timeout=timeout) as session:
if url_type == 'u':
# Single file
api_url = f"https://pixeldrain.com/api/file/{file_id}/info"
async with session.get(api_url) as resp:
if resp.status != 200:
return {'success': False, 'files': [], 'error': f'API error: {resp.status}'}
info = await resp.json()
download_url = f"https://pixeldrain.com/api/file/{file_id}"
filename = info.get('name', f'{file_id}.bin')
save_path = save_dir / self._sanitize_filename(filename)
await self._download_file(session, download_url, save_path)
files.append(str(save_path))
elif url_type == 'l':
# List (album)
api_url = f"https://pixeldrain.com/api/list/{file_id}"
async with session.get(api_url) as resp:
if resp.status != 200:
return {'success': False, 'files': [], 'error': f'API error: {resp.status}'}
data = await resp.json()
for i, item in enumerate(data.get('files', [])):
self._delay_between_items()
item_id = item['id']
filename = item.get('name', f'{i:03d}_{item_id}.bin')
download_url = f"https://pixeldrain.com/api/file/{item_id}"
save_path = save_dir / self._sanitize_filename(filename)
try:
await self._download_file(session, download_url, save_path)
files.append(str(save_path))
except Exception as e:
self.log(f"Failed to download {filename}: {e}", 'warning')
return {'success': True, 'files': files, 'error': None}
async def _download_gofile(self, url: str, save_dir: Path) -> Dict:
"""Download from Gofile"""
# Extract content ID from URL
# Format: https://gofile.io/d/CONTENTID
parsed = urlparse(url)
path_parts = parsed.path.strip('/').split('/')
if len(path_parts) < 2 or path_parts[0] != 'd':
return {'success': False, 'files': [], 'error': 'Invalid Gofile URL'}
content_id = path_parts[1]
files = []
timeout = aiohttp.ClientTimeout(total=300)
async with aiohttp.ClientSession(timeout=timeout) as session:
# Create guest account token (POST request required since API change)
async with session.post('https://api.gofile.io/accounts') as resp:
if resp.status != 200:
return {'success': False, 'files': [], 'error': 'Failed to get Gofile token'}
account_data = await resp.json()
if account_data.get('status') != 'ok':
return {'success': False, 'files': [], 'error': f"Gofile API error: {account_data.get('status')}"}
token = account_data.get('data', {}).get('token')
if not token:
return {'success': False, 'files': [], 'error': 'No Gofile token received'}
# Get content info
# Gofile requires x-website-token header (changed from query param in 2024)
headers = {
'Authorization': f'Bearer {token}',
'x-website-token': '4fd6sg89d7s6',
}
api_url = f"https://api.gofile.io/contents/{content_id}"
async with session.get(api_url, headers=headers) as resp:
if resp.status == 401:
return {'success': False, 'files': [], 'error': 'Gofile authentication failed - websiteToken may have changed'}
if resp.status != 200:
return {'success': False, 'files': [], 'error': f'Failed to get content: {resp.status}'}
content_data = await resp.json()
if content_data.get('status') == 'error-notPremium':
return {'success': False, 'files': [], 'error': 'Gofile requires premium account for API access - try direct download'}
if content_data.get('status') != 'ok':
error = content_data.get('data', {}).get('message', content_data.get('status', 'Unknown error'))
return {'success': False, 'files': [], 'error': error}
contents = content_data.get('data', {}).get('children', {})
for item_id, item in contents.items():
if item.get('type') != 'file':
continue
self._delay_between_items()
download_url = item.get('link')
filename = item.get('name', f'{item_id}.bin')
save_path = save_dir / self._sanitize_filename(filename)
try:
await self._download_file(session, download_url, save_path, headers=headers)
files.append(str(save_path))
except Exception as e:
self.log(f"Failed to download {filename}: {e}", 'warning')
return {'success': True, 'files': files, 'error': None}
async def _download_cyberdrop(self, url: str, save_dir: Path) -> Dict:
"""Download from Cyberdrop"""
# Cyberdrop albums: https://cyberdrop.me/a/ALBUMID
# Single files: https://cyberdrop.me/f/FILEID or direct CDN links
files = []
timeout = aiohttp.ClientTimeout(total=300)
async with aiohttp.ClientSession(timeout=timeout) as session:
parsed = urlparse(url)
path_parts = parsed.path.strip('/').split('/')
if len(path_parts) >= 2 and path_parts[0] == 'a':
# Album
album_url = url
async with session.get(album_url) as resp:
if resp.status != 200:
return {'success': False, 'files': [], 'error': f'Failed to fetch album: {resp.status}'}
html = await resp.text()
# Parse file links from HTML
# Pattern: href="https://fs-XXX.cyberdrop.to/FILE"
cdn_pattern = r'href="(https://[a-z0-9-]+\.cyberdrop\.[a-z]+/[^"]+)"'
matches = re.findall(cdn_pattern, html)
for i, file_url in enumerate(matches):
self._delay_between_items()
filename = file_url.split('/')[-1].split('?')[0]
if not filename:
filename = f'{i:03d}.bin'
save_path = save_dir / self._sanitize_filename(filename)
try:
await self._download_file(session, file_url, save_path)
files.append(str(save_path))
except Exception as e:
self.log(f"Failed to download {filename}: {e}", 'warning')
else:
# Single file or direct CDN link
filename = parsed.path.split('/')[-1] or 'download.bin'
save_path = save_dir / self._sanitize_filename(filename)
await self._download_file(session, url, save_path)
files.append(str(save_path))
return {'success': True, 'files': files, 'error': None}
async def _download_bunkr(self, url: str, save_dir: Path) -> Dict:
"""Download from Bunkr with CDN fallback support"""
# Bunkr albums: https://bunkr.sk/a/ALBUMID
# Single files: https://bunkr.sk/f/FILEID or https://bunkr.sk/v/VIDEOID
files = []
failed = []
timeout = aiohttp.ClientTimeout(total=600) # Increased for large files
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session:
parsed = urlparse(url)
path_parts = parsed.path.strip('/').split('/')
if len(path_parts) >= 2 and path_parts[0] == 'a':
# Album page
async with session.get(url) as resp:
if resp.status != 200:
return {'success': False, 'files': [], 'error': f'Failed to fetch album: {resp.status}'}
html = await resp.text()
# Parse file links from HTML - look for /f/ links
file_pattern = r'href="(/f/[^"]+)"'
matches = re.findall(file_pattern, html)
self.log(f"Found {len(matches)} files in Bunkr album", 'info')
for i, file_path in enumerate(matches):
self._delay_between_items()
# Make absolute URL
file_url = f"https://{parsed.netloc}{file_path}"
# Get direct download URL and file UUID
direct_url, file_uuid = await self._get_bunkr_direct_url_with_uuid(session, file_url)
if not direct_url:
self.log(f"Could not get direct URL for {file_url}", 'warning')
failed.append(file_url)
continue
filename = direct_url.split('/')[-1].split('?')[0]
if not filename:
filename = f'{i:03d}.bin'
save_path = save_dir / self._sanitize_filename(filename)
try:
await self._download_file(session, direct_url, save_path,
try_cdn_fallback=True, file_uuid=file_uuid)
files.append(str(save_path))
self.log(f"Downloaded: {filename}", 'info')
except Exception as e:
self.log(f"Failed to download {filename}: {e}", 'warning')
failed.append(filename)
else:
# Single file page
direct_url, file_uuid = await self._get_bunkr_direct_url_with_uuid(session, url)
if not direct_url:
return {'success': False, 'files': [], 'error': 'Could not get direct download URL'}
filename = direct_url.split('/')[-1].split('?')[0] or 'download.bin'
save_path = save_dir / self._sanitize_filename(filename)
await self._download_file(session, direct_url, save_path,
try_cdn_fallback=True, file_uuid=file_uuid)
files.append(str(save_path))
result = {'success': len(files) > 0, 'files': files, 'error': None}
if failed:
result['failed'] = failed
result['error'] = f'{len(failed)} files failed to download'
return result
async def _get_bunkr_direct_url_with_uuid(self, session: aiohttp.ClientSession, page_url: str) -> tuple:
"""Extract direct download URL and file UUID from Bunkr file page"""
try:
async with session.get(page_url) as resp:
if resp.status != 200:
return None, None
html = await resp.text()
file_uuid = None
# Extract file UUID first
uuid_patterns = [
r'data-v="([a-f0-9-]{36}\.[a-z0-9]+)"',
r'([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\.[a-z0-9]+)',
]
for pattern in uuid_patterns:
match = re.search(pattern, html)
if match:
file_uuid = match.group(1)
break
# Try to find existing CDN URL in page
cdn_patterns = [
r'href="(https://[^"]*\.bunkr\.ru/[^"]+)"',
r'src="(https://[^"]*\.bunkr\.ru/[^"]+)"',
r'data-src="(https://[^"]*\.bunkr\.ru/[^"]+)"',
]
for pattern in cdn_patterns:
match = re.search(pattern, html)
if match:
url = match.group(1)
if await self._check_url_accessible(session, url):
return url, file_uuid
# If we have UUID, try CDNs
if file_uuid:
self.log(f"Found file UUID: {file_uuid}, trying CDNs...", 'debug')
for cdn in self.BUNKR_CDNS:
cdn_url = f"https://{cdn}/{file_uuid}"
if await self._check_url_accessible(session, cdn_url):
self.log(f"Found working CDN: {cdn}", 'debug')
return cdn_url, file_uuid
return None, file_uuid
except Exception as e:
self.log(f"Error getting Bunkr direct URL: {e}", 'warning')
return None, None
async def _check_url_accessible(self, session: aiohttp.ClientSession, url: str) -> bool:
"""Check if a URL is accessible (returns 200)"""
try:
async with session.head(url, allow_redirects=True, timeout=aiohttp.ClientTimeout(total=10)) as resp:
return resp.status == 200
except Exception:
return False
async def _download_fileditch(self, url: str, save_dir: Path) -> Dict:
"""Download from FileDitch (Cloudflare-protected)"""
from modules.cloudflare_handler import CloudflareHandler
# Extract filename from URL: file.php?f=/b74/tLyJWGrzvSyRlJvBVDBa.mp4
parsed = urlparse(url)
params = parse_qs(parsed.query)
file_path = params.get('f', [''])[0]
if not file_path:
return {'success': False, 'files': [], 'error': 'Invalid FileDitch URL - no file parameter'}
filename = file_path.rsplit('/', 1)[-1] if '/' in file_path else file_path
if not filename:
return {'success': False, 'files': [], 'error': 'Could not extract filename from URL'}
save_path = save_dir / self._sanitize_filename(filename)
# Use CloudflareHandler to get cookies via FlareSolverr
cf_handler = CloudflareHandler(
module_name='FileDitch',
flaresolverr_url='http://localhost:8191/v1',
flaresolverr_enabled=True,
)
self.log('Bypassing Cloudflare for FileDitch via FlareSolverr...', 'info')
if not cf_handler.get_cookies_via_flaresolverr(url):
return {'success': False, 'files': [], 'error': 'Failed to bypass Cloudflare for FileDitch'}
cookies = cf_handler.get_cookies_dict()
user_agent = cf_handler.get_user_agent()
# Download with the obtained cookies
timeout = aiohttp.ClientTimeout(total=3600)
cookie_jar = aiohttp.CookieJar()
headers = {'User-Agent': user_agent or 'Mozilla/5.0'}
async with aiohttp.ClientSession(timeout=timeout, cookie_jar=cookie_jar, headers=headers) as session:
# Set cookies on session
for name, value in cookies.items():
cookie_jar.update_cookies({name: value}, response_url=url)
await self._download_file(session, url, save_path, headers=headers)
return {'success': True, 'files': [str(save_path)], 'error': None}
async def _download_file(self, session: aiohttp.ClientSession, url: str,
save_path: Path, headers: Dict = None,
try_cdn_fallback: bool = False, file_uuid: str = None) -> None:
"""Download a single file with streaming and optional CDN fallback"""
save_path.parent.mkdir(parents=True, exist_ok=True)
urls_to_try = [url]
# If CDN fallback enabled and we have a file UUID, add alternate CDNs
if try_cdn_fallback and file_uuid:
for cdn in self.BUNKR_CDNS:
alt_url = f"https://{cdn}/{file_uuid}"
if alt_url != url:
urls_to_try.append(alt_url)
last_error = None
for try_url in urls_to_try:
try:
self.log(f"Downloading: {save_path.name} from {try_url[:60]}...", 'info')
async with session.get(try_url, headers=headers) as resp:
if resp.status == 200:
total_size = int(resp.headers.get('content-length', 0))
downloaded = 0
last_log_pct = 0
with open(save_path, 'wb') as f:
async for chunk in resp.content.iter_chunked(65536): # 64KB chunks
f.write(chunk)
downloaded += len(chunk)
# Log and callback progress every 2%
if total_size > 0:
pct = int(downloaded * 100 / total_size)
if pct >= last_log_pct + 2:
self.log(f" {save_path.name}: {pct}% ({downloaded // (1024*1024)}MB / {total_size // (1024*1024)}MB)", 'info')
last_log_pct = pct
# Call progress callback if provided
if self.progress_callback:
try:
self.progress_callback(downloaded, total_size, save_path.name)
except Exception:
pass # Don't fail download due to callback error
self.log(f"Downloaded: {save_path.name} ({downloaded // (1024*1024)}MB)", 'info')
return # Success
else:
last_error = f"HTTP {resp.status}"
self.log(f"Download failed: {save_path.name} - {last_error}", 'warning')
except Exception as e:
last_error = str(e)
self.log(f"Download error: {save_path.name} - {last_error}", 'warning')
# Try next CDN
continue
raise Exception(f"Download failed after trying {len(urls_to_try)} URLs: {last_error}")
def _sanitize_filename(self, filename: str) -> str:
"""Sanitize filename for filesystem"""
if not filename:
return 'download.bin'
# Remove/replace invalid characters
filename = re.sub(r'[<>:"/\\|?*\x00-\x1f]', '', filename)
filename = filename.strip('. ')
return filename or 'download.bin'
@classmethod
def get_supported_domains(cls) -> List[str]:
"""Get list of all supported domains"""
domains = []
for host_domains in cls.SUPPORTED_HOSTS.values():
domains.extend(host_domains)
return domains