""" Download files from external file hosting services Supports: Bunkr, Pixeldrain, Gofile, Cyberdrop """ import asyncio import re from pathlib import Path from typing import Dict, List, Optional from urllib.parse import urlparse, parse_qs import aiohttp from modules.base_module import LoggingMixin, RateLimitMixin class FileHostDownloader(LoggingMixin, RateLimitMixin): """ Download files from various file hosting services Used for manual import of PPV content """ SUPPORTED_HOSTS = { 'bunkr': ['bunkr.sk', 'bunkr.si', 'bunkr.la', 'bunkrr.ru', 'bunkr.ph', 'bunkr.is', 'bunkr.ac', 'bunkr.cr'], 'pixeldrain': ['pixeldrain.com'], 'gofile': ['gofile.io'], 'cyberdrop': ['cyberdrop.me', 'cyberdrop.to', 'cyberdrop.cc'], 'fileditch': ['fileditchfiles.me', 'fileditch.me'], } # Bunkr CDN servers (food-themed) - try in order BUNKR_CDNS = [ 'i-soup.bunkr.ru', 'i-burger.bunkr.ru', 'i-pizza.bunkr.ru', 'i-taco.bunkr.ru', 'i-fries.bunkr.ru', 'i-hotdog.bunkr.ru', 'i-nachos.bunkr.ru', 'i-sushi.bunkr.ru', 'i-ramen.bunkr.ru', 'i-curry.bunkr.ru', 'i-kebab.bunkr.ru', 'i-pasta.bunkr.ru', 'i-steak.bunkr.ru', 'i-salad.bunkr.ru', 'i-sandwich.bunkr.ru', 'i-waffle.bunkr.ru', 'i-pancake.bunkr.ru', 'i-donut.bunkr.ru', 'i-cookie.bunkr.ru', 'i-cake.bunkr.ru', 'i-bacon.bunkr.ru', 'i-cheese.bunkr.ru', 'i-chicken.bunkr.ru', 'i-fish.bunkr.ru', 'i-noodle.bunkr.ru', 'i-rice.bunkr.ru', 'i-bread.bunkr.ru', 'burger.bunkr.ru', 'pizza.bunkr.ru', 'milkshake.bunkr.ru', ] def __init__(self, log_callback=None, progress_callback=None): self._init_logger('PaidContent', log_callback, default_module='FileHost') self._init_rate_limiter(min_delay=1, max_delay=3) self.progress_callback = progress_callback # Called with (downloaded_bytes, total_bytes, filename) def detect_host(self, url: str) -> Optional[str]: """Detect which file host a URL belongs to""" try: parsed = urlparse(url) domain = parsed.netloc.lower().replace('www.', '') for host, domains in self.SUPPORTED_HOSTS.items(): if domain in domains: return host except Exception: pass return None def is_supported_url(self, url: str) -> bool: """Check if URL is from a supported file host""" return self.detect_host(url) is not None async def download_url(self, url: str, save_dir: Path) -> Dict: """ Download file(s) from URL Returns: {'success': bool, 'files': [paths], 'error': str} """ host = self.detect_host(url) if not host: return {'success': False, 'files': [], 'error': 'Unsupported host'} handler = getattr(self, f'_download_{host}', None) if not handler: return {'success': False, 'files': [], 'error': f'No handler for {host}'} try: save_dir = Path(save_dir) save_dir.mkdir(parents=True, exist_ok=True) return await handler(url, save_dir) except Exception as e: self.log(f"Error downloading from {host}: {e}", 'error') return {'success': False, 'files': [], 'error': str(e)} async def _download_pixeldrain(self, url: str, save_dir: Path) -> Dict: """Download from Pixeldrain""" # Extract file ID from URL # Format: https://pixeldrain.com/u/FILEID or /l/LISTID parsed = urlparse(url) path_parts = parsed.path.strip('/').split('/') if len(path_parts) < 2: return {'success': False, 'files': [], 'error': 'Invalid Pixeldrain URL'} url_type, file_id = path_parts[0], path_parts[1] files = [] timeout = aiohttp.ClientTimeout(total=300) async with aiohttp.ClientSession(timeout=timeout) as session: if url_type == 'u': # Single file api_url = f"https://pixeldrain.com/api/file/{file_id}/info" async with session.get(api_url) as resp: if resp.status != 200: return {'success': False, 'files': [], 'error': f'API error: {resp.status}'} info = await resp.json() download_url = f"https://pixeldrain.com/api/file/{file_id}" filename = info.get('name', f'{file_id}.bin') save_path = save_dir / self._sanitize_filename(filename) await self._download_file(session, download_url, save_path) files.append(str(save_path)) elif url_type == 'l': # List (album) api_url = f"https://pixeldrain.com/api/list/{file_id}" async with session.get(api_url) as resp: if resp.status != 200: return {'success': False, 'files': [], 'error': f'API error: {resp.status}'} data = await resp.json() for i, item in enumerate(data.get('files', [])): self._delay_between_items() item_id = item['id'] filename = item.get('name', f'{i:03d}_{item_id}.bin') download_url = f"https://pixeldrain.com/api/file/{item_id}" save_path = save_dir / self._sanitize_filename(filename) try: await self._download_file(session, download_url, save_path) files.append(str(save_path)) except Exception as e: self.log(f"Failed to download {filename}: {e}", 'warning') return {'success': True, 'files': files, 'error': None} async def _download_gofile(self, url: str, save_dir: Path) -> Dict: """Download from Gofile""" # Extract content ID from URL # Format: https://gofile.io/d/CONTENTID parsed = urlparse(url) path_parts = parsed.path.strip('/').split('/') if len(path_parts) < 2 or path_parts[0] != 'd': return {'success': False, 'files': [], 'error': 'Invalid Gofile URL'} content_id = path_parts[1] files = [] timeout = aiohttp.ClientTimeout(total=300) async with aiohttp.ClientSession(timeout=timeout) as session: # Create guest account token (POST request required since API change) async with session.post('https://api.gofile.io/accounts') as resp: if resp.status != 200: return {'success': False, 'files': [], 'error': 'Failed to get Gofile token'} account_data = await resp.json() if account_data.get('status') != 'ok': return {'success': False, 'files': [], 'error': f"Gofile API error: {account_data.get('status')}"} token = account_data.get('data', {}).get('token') if not token: return {'success': False, 'files': [], 'error': 'No Gofile token received'} # Get content info # Gofile requires x-website-token header (changed from query param in 2024) headers = { 'Authorization': f'Bearer {token}', 'x-website-token': '4fd6sg89d7s6', } api_url = f"https://api.gofile.io/contents/{content_id}" async with session.get(api_url, headers=headers) as resp: if resp.status == 401: return {'success': False, 'files': [], 'error': 'Gofile authentication failed - websiteToken may have changed'} if resp.status != 200: return {'success': False, 'files': [], 'error': f'Failed to get content: {resp.status}'} content_data = await resp.json() if content_data.get('status') == 'error-notPremium': return {'success': False, 'files': [], 'error': 'Gofile requires premium account for API access - try direct download'} if content_data.get('status') != 'ok': error = content_data.get('data', {}).get('message', content_data.get('status', 'Unknown error')) return {'success': False, 'files': [], 'error': error} contents = content_data.get('data', {}).get('children', {}) for item_id, item in contents.items(): if item.get('type') != 'file': continue self._delay_between_items() download_url = item.get('link') filename = item.get('name', f'{item_id}.bin') save_path = save_dir / self._sanitize_filename(filename) try: await self._download_file(session, download_url, save_path, headers=headers) files.append(str(save_path)) except Exception as e: self.log(f"Failed to download {filename}: {e}", 'warning') return {'success': True, 'files': files, 'error': None} async def _download_cyberdrop(self, url: str, save_dir: Path) -> Dict: """Download from Cyberdrop""" # Cyberdrop albums: https://cyberdrop.me/a/ALBUMID # Single files: https://cyberdrop.me/f/FILEID or direct CDN links files = [] timeout = aiohttp.ClientTimeout(total=300) async with aiohttp.ClientSession(timeout=timeout) as session: parsed = urlparse(url) path_parts = parsed.path.strip('/').split('/') if len(path_parts) >= 2 and path_parts[0] == 'a': # Album album_url = url async with session.get(album_url) as resp: if resp.status != 200: return {'success': False, 'files': [], 'error': f'Failed to fetch album: {resp.status}'} html = await resp.text() # Parse file links from HTML # Pattern: href="https://fs-XXX.cyberdrop.to/FILE" cdn_pattern = r'href="(https://[a-z0-9-]+\.cyberdrop\.[a-z]+/[^"]+)"' matches = re.findall(cdn_pattern, html) for i, file_url in enumerate(matches): self._delay_between_items() filename = file_url.split('/')[-1].split('?')[0] if not filename: filename = f'{i:03d}.bin' save_path = save_dir / self._sanitize_filename(filename) try: await self._download_file(session, file_url, save_path) files.append(str(save_path)) except Exception as e: self.log(f"Failed to download {filename}: {e}", 'warning') else: # Single file or direct CDN link filename = parsed.path.split('/')[-1] or 'download.bin' save_path = save_dir / self._sanitize_filename(filename) await self._download_file(session, url, save_path) files.append(str(save_path)) return {'success': True, 'files': files, 'error': None} async def _download_bunkr(self, url: str, save_dir: Path) -> Dict: """Download from Bunkr with CDN fallback support""" # Bunkr albums: https://bunkr.sk/a/ALBUMID # Single files: https://bunkr.sk/f/FILEID or https://bunkr.sk/v/VIDEOID files = [] failed = [] timeout = aiohttp.ClientTimeout(total=600) # Increased for large files headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' } async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session: parsed = urlparse(url) path_parts = parsed.path.strip('/').split('/') if len(path_parts) >= 2 and path_parts[0] == 'a': # Album page async with session.get(url) as resp: if resp.status != 200: return {'success': False, 'files': [], 'error': f'Failed to fetch album: {resp.status}'} html = await resp.text() # Parse file links from HTML - look for /f/ links file_pattern = r'href="(/f/[^"]+)"' matches = re.findall(file_pattern, html) self.log(f"Found {len(matches)} files in Bunkr album", 'info') for i, file_path in enumerate(matches): self._delay_between_items() # Make absolute URL file_url = f"https://{parsed.netloc}{file_path}" # Get direct download URL and file UUID direct_url, file_uuid = await self._get_bunkr_direct_url_with_uuid(session, file_url) if not direct_url: self.log(f"Could not get direct URL for {file_url}", 'warning') failed.append(file_url) continue filename = direct_url.split('/')[-1].split('?')[0] if not filename: filename = f'{i:03d}.bin' save_path = save_dir / self._sanitize_filename(filename) try: await self._download_file(session, direct_url, save_path, try_cdn_fallback=True, file_uuid=file_uuid) files.append(str(save_path)) self.log(f"Downloaded: {filename}", 'info') except Exception as e: self.log(f"Failed to download {filename}: {e}", 'warning') failed.append(filename) else: # Single file page direct_url, file_uuid = await self._get_bunkr_direct_url_with_uuid(session, url) if not direct_url: return {'success': False, 'files': [], 'error': 'Could not get direct download URL'} filename = direct_url.split('/')[-1].split('?')[0] or 'download.bin' save_path = save_dir / self._sanitize_filename(filename) await self._download_file(session, direct_url, save_path, try_cdn_fallback=True, file_uuid=file_uuid) files.append(str(save_path)) result = {'success': len(files) > 0, 'files': files, 'error': None} if failed: result['failed'] = failed result['error'] = f'{len(failed)} files failed to download' return result async def _get_bunkr_direct_url_with_uuid(self, session: aiohttp.ClientSession, page_url: str) -> tuple: """Extract direct download URL and file UUID from Bunkr file page""" try: async with session.get(page_url) as resp: if resp.status != 200: return None, None html = await resp.text() file_uuid = None # Extract file UUID first uuid_patterns = [ r'data-v="([a-f0-9-]{36}\.[a-z0-9]+)"', r'([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\.[a-z0-9]+)', ] for pattern in uuid_patterns: match = re.search(pattern, html) if match: file_uuid = match.group(1) break # Try to find existing CDN URL in page cdn_patterns = [ r'href="(https://[^"]*\.bunkr\.ru/[^"]+)"', r'src="(https://[^"]*\.bunkr\.ru/[^"]+)"', r'data-src="(https://[^"]*\.bunkr\.ru/[^"]+)"', ] for pattern in cdn_patterns: match = re.search(pattern, html) if match: url = match.group(1) if await self._check_url_accessible(session, url): return url, file_uuid # If we have UUID, try CDNs if file_uuid: self.log(f"Found file UUID: {file_uuid}, trying CDNs...", 'debug') for cdn in self.BUNKR_CDNS: cdn_url = f"https://{cdn}/{file_uuid}" if await self._check_url_accessible(session, cdn_url): self.log(f"Found working CDN: {cdn}", 'debug') return cdn_url, file_uuid return None, file_uuid except Exception as e: self.log(f"Error getting Bunkr direct URL: {e}", 'warning') return None, None async def _check_url_accessible(self, session: aiohttp.ClientSession, url: str) -> bool: """Check if a URL is accessible (returns 200)""" try: async with session.head(url, allow_redirects=True, timeout=aiohttp.ClientTimeout(total=10)) as resp: return resp.status == 200 except Exception: return False async def _download_fileditch(self, url: str, save_dir: Path) -> Dict: """Download from FileDitch (Cloudflare-protected)""" from modules.cloudflare_handler import CloudflareHandler # Extract filename from URL: file.php?f=/b74/tLyJWGrzvSyRlJvBVDBa.mp4 parsed = urlparse(url) params = parse_qs(parsed.query) file_path = params.get('f', [''])[0] if not file_path: return {'success': False, 'files': [], 'error': 'Invalid FileDitch URL - no file parameter'} filename = file_path.rsplit('/', 1)[-1] if '/' in file_path else file_path if not filename: return {'success': False, 'files': [], 'error': 'Could not extract filename from URL'} save_path = save_dir / self._sanitize_filename(filename) # Use CloudflareHandler to get cookies via FlareSolverr cf_handler = CloudflareHandler( module_name='FileDitch', flaresolverr_url='http://localhost:8191/v1', flaresolverr_enabled=True, ) self.log('Bypassing Cloudflare for FileDitch via FlareSolverr...', 'info') if not cf_handler.get_cookies_via_flaresolverr(url): return {'success': False, 'files': [], 'error': 'Failed to bypass Cloudflare for FileDitch'} cookies = cf_handler.get_cookies_dict() user_agent = cf_handler.get_user_agent() # Download with the obtained cookies timeout = aiohttp.ClientTimeout(total=3600) cookie_jar = aiohttp.CookieJar() headers = {'User-Agent': user_agent or 'Mozilla/5.0'} async with aiohttp.ClientSession(timeout=timeout, cookie_jar=cookie_jar, headers=headers) as session: # Set cookies on session for name, value in cookies.items(): cookie_jar.update_cookies({name: value}, response_url=url) await self._download_file(session, url, save_path, headers=headers) return {'success': True, 'files': [str(save_path)], 'error': None} async def _download_file(self, session: aiohttp.ClientSession, url: str, save_path: Path, headers: Dict = None, try_cdn_fallback: bool = False, file_uuid: str = None) -> None: """Download a single file with streaming and optional CDN fallback""" save_path.parent.mkdir(parents=True, exist_ok=True) urls_to_try = [url] # If CDN fallback enabled and we have a file UUID, add alternate CDNs if try_cdn_fallback and file_uuid: for cdn in self.BUNKR_CDNS: alt_url = f"https://{cdn}/{file_uuid}" if alt_url != url: urls_to_try.append(alt_url) last_error = None for try_url in urls_to_try: try: self.log(f"Downloading: {save_path.name} from {try_url[:60]}...", 'info') async with session.get(try_url, headers=headers) as resp: if resp.status == 200: total_size = int(resp.headers.get('content-length', 0)) downloaded = 0 last_log_pct = 0 with open(save_path, 'wb') as f: async for chunk in resp.content.iter_chunked(65536): # 64KB chunks f.write(chunk) downloaded += len(chunk) # Log and callback progress every 2% if total_size > 0: pct = int(downloaded * 100 / total_size) if pct >= last_log_pct + 2: self.log(f" {save_path.name}: {pct}% ({downloaded // (1024*1024)}MB / {total_size // (1024*1024)}MB)", 'info') last_log_pct = pct # Call progress callback if provided if self.progress_callback: try: self.progress_callback(downloaded, total_size, save_path.name) except Exception: pass # Don't fail download due to callback error self.log(f"Downloaded: {save_path.name} ({downloaded // (1024*1024)}MB)", 'info') return # Success else: last_error = f"HTTP {resp.status}" self.log(f"Download failed: {save_path.name} - {last_error}", 'warning') except Exception as e: last_error = str(e) self.log(f"Download error: {save_path.name} - {last_error}", 'warning') # Try next CDN continue raise Exception(f"Download failed after trying {len(urls_to_try)} URLs: {last_error}") def _sanitize_filename(self, filename: str) -> str: """Sanitize filename for filesystem""" if not filename: return 'download.bin' # Remove/replace invalid characters filename = re.sub(r'[<>:"/\\|?*\x00-\x1f]', '', filename) filename = filename.strip('. ') return filename or 'download.bin' @classmethod def get_supported_domains(cls) -> List[str]: """Get list of all supported domains""" domains = [] for host_domains in cls.SUPPORTED_HOSTS.values(): domains.extend(host_domains) return domains