530 lines
22 KiB
Python
530 lines
22 KiB
Python
"""
|
|
Download files from external file hosting services
|
|
Supports: Bunkr, Pixeldrain, Gofile, Cyberdrop
|
|
"""
|
|
|
|
import asyncio
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional
|
|
from urllib.parse import urlparse, parse_qs
|
|
|
|
import aiohttp
|
|
|
|
from modules.base_module import LoggingMixin, RateLimitMixin
|
|
|
|
|
|
class FileHostDownloader(LoggingMixin, RateLimitMixin):
|
|
"""
|
|
Download files from various file hosting services
|
|
Used for manual import of PPV content
|
|
"""
|
|
|
|
SUPPORTED_HOSTS = {
|
|
'bunkr': ['bunkr.sk', 'bunkr.si', 'bunkr.la', 'bunkrr.ru', 'bunkr.ph', 'bunkr.is', 'bunkr.ac', 'bunkr.cr'],
|
|
'pixeldrain': ['pixeldrain.com'],
|
|
'gofile': ['gofile.io'],
|
|
'cyberdrop': ['cyberdrop.me', 'cyberdrop.to', 'cyberdrop.cc'],
|
|
'fileditch': ['fileditchfiles.me', 'fileditch.me'],
|
|
}
|
|
|
|
# Bunkr CDN servers (food-themed) - try in order
|
|
BUNKR_CDNS = [
|
|
'i-soup.bunkr.ru',
|
|
'i-burger.bunkr.ru',
|
|
'i-pizza.bunkr.ru',
|
|
'i-taco.bunkr.ru',
|
|
'i-fries.bunkr.ru',
|
|
'i-hotdog.bunkr.ru',
|
|
'i-nachos.bunkr.ru',
|
|
'i-sushi.bunkr.ru',
|
|
'i-ramen.bunkr.ru',
|
|
'i-curry.bunkr.ru',
|
|
'i-kebab.bunkr.ru',
|
|
'i-pasta.bunkr.ru',
|
|
'i-steak.bunkr.ru',
|
|
'i-salad.bunkr.ru',
|
|
'i-sandwich.bunkr.ru',
|
|
'i-waffle.bunkr.ru',
|
|
'i-pancake.bunkr.ru',
|
|
'i-donut.bunkr.ru',
|
|
'i-cookie.bunkr.ru',
|
|
'i-cake.bunkr.ru',
|
|
'i-bacon.bunkr.ru',
|
|
'i-cheese.bunkr.ru',
|
|
'i-chicken.bunkr.ru',
|
|
'i-fish.bunkr.ru',
|
|
'i-noodle.bunkr.ru',
|
|
'i-rice.bunkr.ru',
|
|
'i-bread.bunkr.ru',
|
|
'burger.bunkr.ru',
|
|
'pizza.bunkr.ru',
|
|
'milkshake.bunkr.ru',
|
|
]
|
|
|
|
def __init__(self, log_callback=None, progress_callback=None):
|
|
self._init_logger('PaidContent', log_callback, default_module='FileHost')
|
|
self._init_rate_limiter(min_delay=1, max_delay=3)
|
|
self.progress_callback = progress_callback # Called with (downloaded_bytes, total_bytes, filename)
|
|
|
|
def detect_host(self, url: str) -> Optional[str]:
|
|
"""Detect which file host a URL belongs to"""
|
|
try:
|
|
parsed = urlparse(url)
|
|
domain = parsed.netloc.lower().replace('www.', '')
|
|
|
|
for host, domains in self.SUPPORTED_HOSTS.items():
|
|
if domain in domains:
|
|
return host
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
def is_supported_url(self, url: str) -> bool:
|
|
"""Check if URL is from a supported file host"""
|
|
return self.detect_host(url) is not None
|
|
|
|
async def download_url(self, url: str, save_dir: Path) -> Dict:
|
|
"""
|
|
Download file(s) from URL
|
|
Returns: {'success': bool, 'files': [paths], 'error': str}
|
|
"""
|
|
host = self.detect_host(url)
|
|
if not host:
|
|
return {'success': False, 'files': [], 'error': 'Unsupported host'}
|
|
|
|
handler = getattr(self, f'_download_{host}', None)
|
|
if not handler:
|
|
return {'success': False, 'files': [], 'error': f'No handler for {host}'}
|
|
|
|
try:
|
|
save_dir = Path(save_dir)
|
|
save_dir.mkdir(parents=True, exist_ok=True)
|
|
return await handler(url, save_dir)
|
|
except Exception as e:
|
|
self.log(f"Error downloading from {host}: {e}", 'error')
|
|
return {'success': False, 'files': [], 'error': str(e)}
|
|
|
|
async def _download_pixeldrain(self, url: str, save_dir: Path) -> Dict:
|
|
"""Download from Pixeldrain"""
|
|
# Extract file ID from URL
|
|
# Format: https://pixeldrain.com/u/FILEID or /l/LISTID
|
|
|
|
parsed = urlparse(url)
|
|
path_parts = parsed.path.strip('/').split('/')
|
|
|
|
if len(path_parts) < 2:
|
|
return {'success': False, 'files': [], 'error': 'Invalid Pixeldrain URL'}
|
|
|
|
url_type, file_id = path_parts[0], path_parts[1]
|
|
|
|
files = []
|
|
timeout = aiohttp.ClientTimeout(total=300)
|
|
|
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
if url_type == 'u':
|
|
# Single file
|
|
api_url = f"https://pixeldrain.com/api/file/{file_id}/info"
|
|
async with session.get(api_url) as resp:
|
|
if resp.status != 200:
|
|
return {'success': False, 'files': [], 'error': f'API error: {resp.status}'}
|
|
info = await resp.json()
|
|
|
|
download_url = f"https://pixeldrain.com/api/file/{file_id}"
|
|
filename = info.get('name', f'{file_id}.bin')
|
|
save_path = save_dir / self._sanitize_filename(filename)
|
|
|
|
await self._download_file(session, download_url, save_path)
|
|
files.append(str(save_path))
|
|
|
|
elif url_type == 'l':
|
|
# List (album)
|
|
api_url = f"https://pixeldrain.com/api/list/{file_id}"
|
|
async with session.get(api_url) as resp:
|
|
if resp.status != 200:
|
|
return {'success': False, 'files': [], 'error': f'API error: {resp.status}'}
|
|
data = await resp.json()
|
|
|
|
for i, item in enumerate(data.get('files', [])):
|
|
self._delay_between_items()
|
|
item_id = item['id']
|
|
filename = item.get('name', f'{i:03d}_{item_id}.bin')
|
|
download_url = f"https://pixeldrain.com/api/file/{item_id}"
|
|
save_path = save_dir / self._sanitize_filename(filename)
|
|
|
|
try:
|
|
await self._download_file(session, download_url, save_path)
|
|
files.append(str(save_path))
|
|
except Exception as e:
|
|
self.log(f"Failed to download {filename}: {e}", 'warning')
|
|
|
|
return {'success': True, 'files': files, 'error': None}
|
|
|
|
async def _download_gofile(self, url: str, save_dir: Path) -> Dict:
|
|
"""Download from Gofile"""
|
|
# Extract content ID from URL
|
|
# Format: https://gofile.io/d/CONTENTID
|
|
|
|
parsed = urlparse(url)
|
|
path_parts = parsed.path.strip('/').split('/')
|
|
|
|
if len(path_parts) < 2 or path_parts[0] != 'd':
|
|
return {'success': False, 'files': [], 'error': 'Invalid Gofile URL'}
|
|
|
|
content_id = path_parts[1]
|
|
|
|
files = []
|
|
timeout = aiohttp.ClientTimeout(total=300)
|
|
|
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
# Create guest account token (POST request required since API change)
|
|
async with session.post('https://api.gofile.io/accounts') as resp:
|
|
if resp.status != 200:
|
|
return {'success': False, 'files': [], 'error': 'Failed to get Gofile token'}
|
|
account_data = await resp.json()
|
|
if account_data.get('status') != 'ok':
|
|
return {'success': False, 'files': [], 'error': f"Gofile API error: {account_data.get('status')}"}
|
|
token = account_data.get('data', {}).get('token')
|
|
|
|
if not token:
|
|
return {'success': False, 'files': [], 'error': 'No Gofile token received'}
|
|
|
|
# Get content info
|
|
# Gofile requires x-website-token header (changed from query param in 2024)
|
|
headers = {
|
|
'Authorization': f'Bearer {token}',
|
|
'x-website-token': '4fd6sg89d7s6',
|
|
}
|
|
api_url = f"https://api.gofile.io/contents/{content_id}"
|
|
|
|
async with session.get(api_url, headers=headers) as resp:
|
|
if resp.status == 401:
|
|
return {'success': False, 'files': [], 'error': 'Gofile authentication failed - websiteToken may have changed'}
|
|
if resp.status != 200:
|
|
return {'success': False, 'files': [], 'error': f'Failed to get content: {resp.status}'}
|
|
content_data = await resp.json()
|
|
|
|
if content_data.get('status') == 'error-notPremium':
|
|
return {'success': False, 'files': [], 'error': 'Gofile requires premium account for API access - try direct download'}
|
|
if content_data.get('status') != 'ok':
|
|
error = content_data.get('data', {}).get('message', content_data.get('status', 'Unknown error'))
|
|
return {'success': False, 'files': [], 'error': error}
|
|
|
|
contents = content_data.get('data', {}).get('children', {})
|
|
|
|
for item_id, item in contents.items():
|
|
if item.get('type') != 'file':
|
|
continue
|
|
|
|
self._delay_between_items()
|
|
download_url = item.get('link')
|
|
filename = item.get('name', f'{item_id}.bin')
|
|
save_path = save_dir / self._sanitize_filename(filename)
|
|
|
|
try:
|
|
await self._download_file(session, download_url, save_path, headers=headers)
|
|
files.append(str(save_path))
|
|
except Exception as e:
|
|
self.log(f"Failed to download {filename}: {e}", 'warning')
|
|
|
|
return {'success': True, 'files': files, 'error': None}
|
|
|
|
async def _download_cyberdrop(self, url: str, save_dir: Path) -> Dict:
|
|
"""Download from Cyberdrop"""
|
|
# Cyberdrop albums: https://cyberdrop.me/a/ALBUMID
|
|
# Single files: https://cyberdrop.me/f/FILEID or direct CDN links
|
|
|
|
files = []
|
|
timeout = aiohttp.ClientTimeout(total=300)
|
|
|
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
parsed = urlparse(url)
|
|
path_parts = parsed.path.strip('/').split('/')
|
|
|
|
if len(path_parts) >= 2 and path_parts[0] == 'a':
|
|
# Album
|
|
album_url = url
|
|
async with session.get(album_url) as resp:
|
|
if resp.status != 200:
|
|
return {'success': False, 'files': [], 'error': f'Failed to fetch album: {resp.status}'}
|
|
html = await resp.text()
|
|
|
|
# Parse file links from HTML
|
|
# Pattern: href="https://fs-XXX.cyberdrop.to/FILE"
|
|
cdn_pattern = r'href="(https://[a-z0-9-]+\.cyberdrop\.[a-z]+/[^"]+)"'
|
|
matches = re.findall(cdn_pattern, html)
|
|
|
|
for i, file_url in enumerate(matches):
|
|
self._delay_between_items()
|
|
filename = file_url.split('/')[-1].split('?')[0]
|
|
if not filename:
|
|
filename = f'{i:03d}.bin'
|
|
save_path = save_dir / self._sanitize_filename(filename)
|
|
|
|
try:
|
|
await self._download_file(session, file_url, save_path)
|
|
files.append(str(save_path))
|
|
except Exception as e:
|
|
self.log(f"Failed to download {filename}: {e}", 'warning')
|
|
|
|
else:
|
|
# Single file or direct CDN link
|
|
filename = parsed.path.split('/')[-1] or 'download.bin'
|
|
save_path = save_dir / self._sanitize_filename(filename)
|
|
|
|
await self._download_file(session, url, save_path)
|
|
files.append(str(save_path))
|
|
|
|
return {'success': True, 'files': files, 'error': None}
|
|
|
|
async def _download_bunkr(self, url: str, save_dir: Path) -> Dict:
|
|
"""Download from Bunkr with CDN fallback support"""
|
|
# Bunkr albums: https://bunkr.sk/a/ALBUMID
|
|
# Single files: https://bunkr.sk/f/FILEID or https://bunkr.sk/v/VIDEOID
|
|
|
|
files = []
|
|
failed = []
|
|
timeout = aiohttp.ClientTimeout(total=600) # Increased for large files
|
|
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
}
|
|
|
|
async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session:
|
|
parsed = urlparse(url)
|
|
path_parts = parsed.path.strip('/').split('/')
|
|
|
|
if len(path_parts) >= 2 and path_parts[0] == 'a':
|
|
# Album page
|
|
async with session.get(url) as resp:
|
|
if resp.status != 200:
|
|
return {'success': False, 'files': [], 'error': f'Failed to fetch album: {resp.status}'}
|
|
html = await resp.text()
|
|
|
|
# Parse file links from HTML - look for /f/ links
|
|
file_pattern = r'href="(/f/[^"]+)"'
|
|
matches = re.findall(file_pattern, html)
|
|
|
|
self.log(f"Found {len(matches)} files in Bunkr album", 'info')
|
|
|
|
for i, file_path in enumerate(matches):
|
|
self._delay_between_items()
|
|
|
|
# Make absolute URL
|
|
file_url = f"https://{parsed.netloc}{file_path}"
|
|
|
|
# Get direct download URL and file UUID
|
|
direct_url, file_uuid = await self._get_bunkr_direct_url_with_uuid(session, file_url)
|
|
if not direct_url:
|
|
self.log(f"Could not get direct URL for {file_url}", 'warning')
|
|
failed.append(file_url)
|
|
continue
|
|
|
|
filename = direct_url.split('/')[-1].split('?')[0]
|
|
if not filename:
|
|
filename = f'{i:03d}.bin'
|
|
save_path = save_dir / self._sanitize_filename(filename)
|
|
|
|
try:
|
|
await self._download_file(session, direct_url, save_path,
|
|
try_cdn_fallback=True, file_uuid=file_uuid)
|
|
files.append(str(save_path))
|
|
self.log(f"Downloaded: {filename}", 'info')
|
|
except Exception as e:
|
|
self.log(f"Failed to download {filename}: {e}", 'warning')
|
|
failed.append(filename)
|
|
|
|
else:
|
|
# Single file page
|
|
direct_url, file_uuid = await self._get_bunkr_direct_url_with_uuid(session, url)
|
|
if not direct_url:
|
|
return {'success': False, 'files': [], 'error': 'Could not get direct download URL'}
|
|
|
|
filename = direct_url.split('/')[-1].split('?')[0] or 'download.bin'
|
|
save_path = save_dir / self._sanitize_filename(filename)
|
|
|
|
await self._download_file(session, direct_url, save_path,
|
|
try_cdn_fallback=True, file_uuid=file_uuid)
|
|
files.append(str(save_path))
|
|
|
|
result = {'success': len(files) > 0, 'files': files, 'error': None}
|
|
if failed:
|
|
result['failed'] = failed
|
|
result['error'] = f'{len(failed)} files failed to download'
|
|
return result
|
|
|
|
async def _get_bunkr_direct_url_with_uuid(self, session: aiohttp.ClientSession, page_url: str) -> tuple:
|
|
"""Extract direct download URL and file UUID from Bunkr file page"""
|
|
try:
|
|
async with session.get(page_url) as resp:
|
|
if resp.status != 200:
|
|
return None, None
|
|
html = await resp.text()
|
|
|
|
file_uuid = None
|
|
|
|
# Extract file UUID first
|
|
uuid_patterns = [
|
|
r'data-v="([a-f0-9-]{36}\.[a-z0-9]+)"',
|
|
r'([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\.[a-z0-9]+)',
|
|
]
|
|
for pattern in uuid_patterns:
|
|
match = re.search(pattern, html)
|
|
if match:
|
|
file_uuid = match.group(1)
|
|
break
|
|
|
|
# Try to find existing CDN URL in page
|
|
cdn_patterns = [
|
|
r'href="(https://[^"]*\.bunkr\.ru/[^"]+)"',
|
|
r'src="(https://[^"]*\.bunkr\.ru/[^"]+)"',
|
|
r'data-src="(https://[^"]*\.bunkr\.ru/[^"]+)"',
|
|
]
|
|
|
|
for pattern in cdn_patterns:
|
|
match = re.search(pattern, html)
|
|
if match:
|
|
url = match.group(1)
|
|
if await self._check_url_accessible(session, url):
|
|
return url, file_uuid
|
|
|
|
# If we have UUID, try CDNs
|
|
if file_uuid:
|
|
self.log(f"Found file UUID: {file_uuid}, trying CDNs...", 'debug')
|
|
for cdn in self.BUNKR_CDNS:
|
|
cdn_url = f"https://{cdn}/{file_uuid}"
|
|
if await self._check_url_accessible(session, cdn_url):
|
|
self.log(f"Found working CDN: {cdn}", 'debug')
|
|
return cdn_url, file_uuid
|
|
|
|
return None, file_uuid
|
|
except Exception as e:
|
|
self.log(f"Error getting Bunkr direct URL: {e}", 'warning')
|
|
return None, None
|
|
|
|
async def _check_url_accessible(self, session: aiohttp.ClientSession, url: str) -> bool:
|
|
"""Check if a URL is accessible (returns 200)"""
|
|
try:
|
|
async with session.head(url, allow_redirects=True, timeout=aiohttp.ClientTimeout(total=10)) as resp:
|
|
return resp.status == 200
|
|
except Exception:
|
|
return False
|
|
|
|
async def _download_fileditch(self, url: str, save_dir: Path) -> Dict:
|
|
"""Download from FileDitch (Cloudflare-protected)"""
|
|
from modules.cloudflare_handler import CloudflareHandler
|
|
|
|
# Extract filename from URL: file.php?f=/b74/tLyJWGrzvSyRlJvBVDBa.mp4
|
|
parsed = urlparse(url)
|
|
params = parse_qs(parsed.query)
|
|
file_path = params.get('f', [''])[0]
|
|
if not file_path:
|
|
return {'success': False, 'files': [], 'error': 'Invalid FileDitch URL - no file parameter'}
|
|
|
|
filename = file_path.rsplit('/', 1)[-1] if '/' in file_path else file_path
|
|
if not filename:
|
|
return {'success': False, 'files': [], 'error': 'Could not extract filename from URL'}
|
|
|
|
save_path = save_dir / self._sanitize_filename(filename)
|
|
|
|
# Use CloudflareHandler to get cookies via FlareSolverr
|
|
cf_handler = CloudflareHandler(
|
|
module_name='FileDitch',
|
|
flaresolverr_url='http://localhost:8191/v1',
|
|
flaresolverr_enabled=True,
|
|
)
|
|
|
|
self.log('Bypassing Cloudflare for FileDitch via FlareSolverr...', 'info')
|
|
if not cf_handler.get_cookies_via_flaresolverr(url):
|
|
return {'success': False, 'files': [], 'error': 'Failed to bypass Cloudflare for FileDitch'}
|
|
|
|
cookies = cf_handler.get_cookies_dict()
|
|
user_agent = cf_handler.get_user_agent()
|
|
|
|
# Download with the obtained cookies
|
|
timeout = aiohttp.ClientTimeout(total=3600)
|
|
cookie_jar = aiohttp.CookieJar()
|
|
headers = {'User-Agent': user_agent or 'Mozilla/5.0'}
|
|
|
|
async with aiohttp.ClientSession(timeout=timeout, cookie_jar=cookie_jar, headers=headers) as session:
|
|
# Set cookies on session
|
|
for name, value in cookies.items():
|
|
cookie_jar.update_cookies({name: value}, response_url=url)
|
|
|
|
await self._download_file(session, url, save_path, headers=headers)
|
|
|
|
return {'success': True, 'files': [str(save_path)], 'error': None}
|
|
|
|
async def _download_file(self, session: aiohttp.ClientSession, url: str,
|
|
save_path: Path, headers: Dict = None,
|
|
try_cdn_fallback: bool = False, file_uuid: str = None) -> None:
|
|
"""Download a single file with streaming and optional CDN fallback"""
|
|
save_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
urls_to_try = [url]
|
|
|
|
# If CDN fallback enabled and we have a file UUID, add alternate CDNs
|
|
if try_cdn_fallback and file_uuid:
|
|
for cdn in self.BUNKR_CDNS:
|
|
alt_url = f"https://{cdn}/{file_uuid}"
|
|
if alt_url != url:
|
|
urls_to_try.append(alt_url)
|
|
|
|
last_error = None
|
|
for try_url in urls_to_try:
|
|
try:
|
|
self.log(f"Downloading: {save_path.name} from {try_url[:60]}...", 'info')
|
|
async with session.get(try_url, headers=headers) as resp:
|
|
if resp.status == 200:
|
|
total_size = int(resp.headers.get('content-length', 0))
|
|
downloaded = 0
|
|
last_log_pct = 0
|
|
|
|
with open(save_path, 'wb') as f:
|
|
async for chunk in resp.content.iter_chunked(65536): # 64KB chunks
|
|
f.write(chunk)
|
|
downloaded += len(chunk)
|
|
|
|
# Log and callback progress every 2%
|
|
if total_size > 0:
|
|
pct = int(downloaded * 100 / total_size)
|
|
if pct >= last_log_pct + 2:
|
|
self.log(f" {save_path.name}: {pct}% ({downloaded // (1024*1024)}MB / {total_size // (1024*1024)}MB)", 'info')
|
|
last_log_pct = pct
|
|
# Call progress callback if provided
|
|
if self.progress_callback:
|
|
try:
|
|
self.progress_callback(downloaded, total_size, save_path.name)
|
|
except Exception:
|
|
pass # Don't fail download due to callback error
|
|
|
|
self.log(f"Downloaded: {save_path.name} ({downloaded // (1024*1024)}MB)", 'info')
|
|
return # Success
|
|
else:
|
|
last_error = f"HTTP {resp.status}"
|
|
self.log(f"Download failed: {save_path.name} - {last_error}", 'warning')
|
|
except Exception as e:
|
|
last_error = str(e)
|
|
self.log(f"Download error: {save_path.name} - {last_error}", 'warning')
|
|
# Try next CDN
|
|
continue
|
|
|
|
raise Exception(f"Download failed after trying {len(urls_to_try)} URLs: {last_error}")
|
|
|
|
def _sanitize_filename(self, filename: str) -> str:
|
|
"""Sanitize filename for filesystem"""
|
|
if not filename:
|
|
return 'download.bin'
|
|
# Remove/replace invalid characters
|
|
filename = re.sub(r'[<>:"/\\|?*\x00-\x1f]', '', filename)
|
|
filename = filename.strip('. ')
|
|
return filename or 'download.bin'
|
|
|
|
@classmethod
|
|
def get_supported_domains(cls) -> List[str]:
|
|
"""Get list of all supported domains"""
|
|
domains = []
|
|
for host_domains in cls.SUPPORTED_HOSTS.values():
|
|
domains.extend(host_domains)
|
|
return domains
|