#!/usr/bin/env python3
"""
Multi-threaded Download Manager
Handles concurrent downloads with rate limiting, retries, and progress tracking
Can be used by forum_downloader, fastdl_module, and other downloaders
"""

import os
import re
import time
import hashlib
import requests
import threading
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional, Any, Callable
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Lock, Semaphore
from dataclasses import dataclass
import sqlite3
from urllib.parse import urlparse
from modules.base_module import LoggingMixin
from modules.universal_logger import get_logger

logger = get_logger('DownloadManager')  # For standalone/example usage


@dataclass
class DownloadItem:
    """Single download item"""
    url: str
    save_path: Path
    referer: Optional[str] = None
    headers: Optional[Dict[str, str]] = None
    metadata: Optional[Dict[str, Any]] = None
    post_date: Optional[datetime] = None  # Timestamp to set on downloaded file
    retry_count: int = 0
    max_retries: int = 3
    
    
@dataclass
class DownloadResult:
    """Result of a download"""
    success: bool
    item: DownloadItem
    file_size: Optional[int] = None
    download_time: Optional[float] = None
    error: Optional[str] = None
    file_hash: Optional[str] = None


class DownloadManager(LoggingMixin):
    """
    Multi-threaded download manager with:
    - Concurrent downloads
    - Rate limiting
    - Automatic retries
    - Progress tracking
    - Database tracking
    - Playwright support for authenticated downloads
    """
    
    def __init__(self,
                 max_workers: int = 5,
                 rate_limit: float = 0.5,
                 timeout: int = 30,
                 chunk_size: int = 8192,
                 use_database: bool = False,
                 db_path: str = None,
                 show_progress: bool = True,
                 show_debug: bool = False):
        """
        Initialize download manager
        
        Args:
            max_workers: Maximum concurrent downloads
            rate_limit: Seconds between downloads per thread
            timeout: Download timeout in seconds
            chunk_size: Chunk size for streaming downloads
            use_database: Track downloads in database
            db_path: Path to database file
            show_progress: Show download progress
            show_debug: Show debug messages
        """
        self.max_workers = max_workers
        self.rate_limit = rate_limit
        self.timeout = timeout
        self.chunk_size = chunk_size
        self.use_database = use_database
        self.db_path = db_path
        self.show_progress = show_progress

        # Initialize logging via mixin
        self._init_logger('DownloadManager', None, default_module='Download', show_debug=show_debug)

        # Thread synchronization
        self.download_lock = Lock()
        self.rate_limiter = Semaphore(max_workers)
        self.last_download_time = {}

        # Thread-local storage for ImageBam sessions (each thread gets its own session)
        self._imagebam_session_local = threading.local()
        
        # Statistics
        self.stats = {
            'total': 0,
            'successful': 0,
            'failed': 0,
            'skipped': 0,
            'total_bytes': 0,
            'total_time': 0
        }
        
        # User agent
        self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
        
        # Playwright context for authenticated downloads
        self.playwright_context = None
        
        # Initialize database only if explicitly enabled AND path provided
        if self.use_database and self.db_path:
            self._init_database()
        elif self.use_database and not self.db_path:
            # Disable database if no path provided to prevent creating files in CWD
            self.use_database = False

    def _init_database(self):
        """Initialize download tracking database"""
        if not self.db_path:
            return
        conn = sqlite3.connect(self.db_path)
        try:
            cursor = conn.cursor()

            cursor.execute('''
                CREATE TABLE IF NOT EXISTS downloads (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    url TEXT UNIQUE NOT NULL,
                    file_path TEXT NOT NULL,
                    file_hash TEXT,
                    file_size INTEGER,
                    download_date DATETIME DEFAULT CURRENT_TIMESTAMP,
                    metadata TEXT
                )
            ''')

            cursor.execute('''
                CREATE INDEX IF NOT EXISTS idx_downloads_url ON downloads(url)
            ''')
            cursor.execute('''
                CREATE INDEX IF NOT EXISTS idx_downloads_hash ON downloads(file_hash)
            ''')

            conn.commit()
        finally:
            conn.close()
    
    def set_playwright_context(self, context):
        """Set Playwright context for authenticated downloads"""
        self.playwright_context = context
        # Extract cookies from context for requests library
        if context:
            try:
                self.cookies = {}
                cookies = context.cookies()
                for cookie in cookies:
                    self.cookies[cookie['name']] = cookie['value']
            except Exception:
                self.cookies = {}
    
    def _is_already_downloaded(self, url: str, file_path: Path) -> bool:
        """Check if file was already downloaded"""
        if not self.use_database:
            return file_path.exists() and file_path.stat().st_size > 0

        conn = sqlite3.connect(self.db_path)
        try:
            cursor = conn.cursor()

            cursor.execute(
                "SELECT file_path, file_size FROM downloads WHERE url = ?",
                (url,)
            )
            result = cursor.fetchone()
        finally:
            conn.close()

        if result:
            # Check if file still exists and has expected size
            saved_path = Path(result[0])
            if saved_path.exists() and saved_path.stat().st_size == result[1]:
                return True
        
        return False
    
    def _apply_rate_limit(self, thread_id: int):
        """Apply rate limiting per thread"""
        with self.download_lock:
            if thread_id in self.last_download_time:
                elapsed = time.time() - self.last_download_time[thread_id]
                if elapsed < self.rate_limit:
                    time.sleep(self.rate_limit - elapsed)
            self.last_download_time[thread_id] = time.time()
    
    def _extract_pixhost_direct_url(self, show_url: str) -> Optional[str]:
        """Extract direct image URL from pixhost show URL"""
        try:
            # Pattern to extract ID and filename from show URL
            show_pattern = re.compile(r"https?://(?:www\.)?pixhost\.to/show/(\d+)/([^/]+)$", re.IGNORECASE)
            match = show_pattern.match(show_url)
            
            if not match:
                return None
            
            img_id = match.group(1)
            filename = match.group(2)
            
            # Try common hosts in order
            common_hosts = [1, 2, 3, 4, 5, 10, 15, 20, 25, 30, 40, 50, 60, 70, 80, 90, 100]
            
            for host_num in common_hosts:
                test_url = f"https://img{host_num}.pixhost.to/images/{img_id}/{filename}"
                
                try:
                    # Quick HEAD request to check if URL exists
                    response = requests.head(test_url, timeout=2, allow_redirects=False)
                    if response.status_code == 200:
                        return test_url
                except requests.RequestException:
                    continue

            # Try sequential scan if common hosts don't work
            for host_num in range(1, 121):
                if host_num in common_hosts:
                    continue

                test_url = f"https://img{host_num}.pixhost.to/images/{img_id}/{filename}"

                try:
                    response = requests.head(test_url, timeout=1, allow_redirects=False)
                    if response.status_code == 200:
                        return test_url
                except requests.RequestException:
                    continue
            
            return None
        except Exception as e:
            self.log(f"Error extracting pixhost URL: {e}", "error")
            return None
    
    def _extract_imagebam_direct_url(self, imagebam_url: str) -> Optional[str]:
        """Extract direct image URL from ImageBam page"""
        try:
            # Get or create thread-local ImageBam session (thread-safe)
            session = getattr(self._imagebam_session_local, 'session', None)
            if session is None:
                session = requests.Session()
                session.headers.update({
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
                })
                # Set cookies to bypass the interstitial ad page (both old and new cookies)
                session.cookies.set('nsfw_inter', '1', domain='.imagebam.com')
                session.cookies.set('sfw_inter', '1', domain='.imagebam.com')
                self._imagebam_session_local.session = session

            # ImageBam now requires two requests - first to get session cookies, second to get image
            # First request sets up the session
            response = session.get(imagebam_url, timeout=5)

            if response.status_code != 200:
                self.log(f"ImageBam page returned {response.status_code}", "warning")
                return None

            # Check if we got the interstitial page (contains "Continue to your image")
            if 'Continue to your image' in response.text or 'Please wait' in response.text:
                # Make sure bypass cookies are set and request again
                session.cookies.set('sfw_inter', '1', domain='.imagebam.com')
                session.cookies.set('nsfw_inter', '1', domain='.imagebam.com')
                response = session.get(imagebam_url, timeout=5)

            # Look for the direct image URL in the HTML
            # ImageBam stores the full image with _o suffix
            # First try to find the full resolution image
            full_img_pattern = r'(https?://images\d*\.imagebam\.com/[a-f0-9/]+/[A-Z0-9]+_o\.\w+)'
            matches = re.findall(full_img_pattern, response.text, re.IGNORECASE)

            if matches:
                # Return the first full resolution image found
                direct_url = matches[0]
                self.log(f"Extracted ImageBam direct URL: {direct_url}", "debug")
                return direct_url

            # Fallback: look for any image on images*.imagebam.com
            fallback_patterns = [
                r'<img[^>]+src="(https?://images\d*\.imagebam\.com/[^"]+)"',
                r'"(https?://images\d*\.imagebam\.com/[^"]+\.(?:jpg|jpeg|png|gif))"',
            ]

            for pattern in fallback_patterns:
                matches = re.findall(pattern, response.text, re.IGNORECASE)
                if matches:
                    direct_url = matches[0]
                    self.log(f"Extracted ImageBam direct URL (fallback): {direct_url}", "debug")
                    return direct_url

            self.log("No direct image URL found in ImageBam HTML", "warning")
            return None

        except requests.Timeout:
            self.log(f"ImageBam extraction timed out for {imagebam_url}", "warning")
            return None
        except Exception as e:
            self.log(f"Error extracting ImageBam URL: {e}", "error")
            return None

    def _download_with_gallery_dl(self, item: DownloadItem) -> DownloadResult:
        """Download using gallery-dl for supported hosts (ImageTwist, etc.)"""
        import subprocess
        start_time = time.time()

        try:
            # Ensure parent directory exists
            item.save_path.parent.mkdir(parents=True, exist_ok=True)

            # Build gallery-dl command
            cmd = [
                "gallery-dl",
                "--dest", str(item.save_path.parent),
                "--filename", item.save_path.name,
                "--no-skip",
                "--no-part",
                "--quiet"
            ]

            # Add referer if provided
            if item.referer:
                cmd.extend(["--header", f"Referer: {item.referer}"])

            cmd.append(item.url)

            # Run gallery-dl with timeout
            result = subprocess.run(
                cmd,
                capture_output=True,
                text=True,
                timeout=60
            )

            if result.returncode == 0 and item.save_path.exists():
                file_size = item.save_path.stat().st_size
                download_time = time.time() - start_time

                # Calculate hash (SHA256 for consistency with unified database)
                with open(item.save_path, 'rb') as f:
                    file_hash = hashlib.sha256(f.read()).hexdigest()

                # Set file timestamp if we have a date
                if item.post_date:
                    try:
                        timestamp_unix = item.post_date.timestamp()
                        os.utime(item.save_path, (timestamp_unix, timestamp_unix))
                    except Exception as e:
                        self.log(f"Failed to set timestamp: {e}", "warning")

                self.log(f"Downloaded via gallery-dl: {item.save_path.name}", "success")
                return DownloadResult(
                    success=True,
                    item=item,
                    file_size=file_size,
                    download_time=download_time,
                    file_hash=file_hash
                )
            else:
                error_msg = result.stderr or "Unknown error"
                return DownloadResult(
                    success=False,
                    item=item,
                    error=f"gallery-dl failed: {error_msg}"
                )

        except subprocess.TimeoutExpired:
            return DownloadResult(
                success=False,
                item=item,
                error="gallery-dl timed out"
            )
        except Exception as e:
            return DownloadResult(
                success=False,
                item=item,
                error=str(e)
            )

    def _download_from_imagetwist(self, item: DownloadItem) -> DownloadResult:
        """Download image from ImageTwist using gallery-dl for URL resolution"""
        import subprocess
        start_time = time.time()

        # Rate limiting for ImageTwist (they return error images if too fast)
        if not hasattr(self, '_imagetwist_last_request'):
            self._imagetwist_last_request = 0

        with self.download_lock:
            elapsed = time.time() - self._imagetwist_last_request
            if elapsed < 2.0:  # Minimum 2 seconds between ImageTwist requests
                time.sleep(2.0 - elapsed)
            self._imagetwist_last_request = time.time()

        try:
            # Use gallery-dl to get the actual image URL
            result = subprocess.run(
                ['/opt/media-downloader/venv/bin/gallery-dl', '-g', item.url],
                capture_output=True, text=True, timeout=30
            )

            if result.returncode != 0 or not result.stdout.strip():
                # Fallback to manual parsing
                return self._download_from_imagetwist_fallback(item, start_time)

            img_url = result.stdout.strip().split('\n')[0]

            if not img_url or 'imagetwist' not in img_url:
                return self._download_from_imagetwist_fallback(item, start_time)

            # Rate limit again before actual download
            with self.download_lock:
                elapsed = time.time() - self._imagetwist_last_request
                if elapsed < 2.0:
                    time.sleep(2.0 - elapsed)
                self._imagetwist_last_request = time.time()

            # Download the actual image - use imagetwist page as Referer
            item.save_path.parent.mkdir(parents=True, exist_ok=True)

            headers = {
                'User-Agent': self.user_agent,
                'Referer': item.url  # Use imagetwist page URL as Referer
            }

            img_response = requests.get(img_url, headers=headers, timeout=30, stream=True)
            img_response.raise_for_status()

            # Check for ImageTwist error placeholder (8346 bytes - rate limited or deleted)
            content_length = img_response.headers.get('Content-Length', '')
            if content_length == '8346':
                self.log(f"ImageTwist rate limited or unavailable: {item.url}", "warning")
                return DownloadResult(success=False, item=item, error="ImageTwist error image (rate limited)")

            # Validate it's an image, not HTML
            chunks = []
            for chunk in img_response.iter_content(chunk_size=8192):
                if not chunks:  # First chunk
                    if chunk[:100].lower().find(b'<html') != -1 or chunk[:100].lower().find(b'<!doctype') != -1:
                        return DownloadResult(
                            success=False,
                            item=item,
                            error="Got HTML instead of image"
                        )
                chunks.append(chunk)

            # Save the image
            with open(item.save_path, 'wb') as f:
                for chunk in chunks:
                    f.write(chunk)

            file_size = item.save_path.stat().st_size
            download_time = time.time() - start_time

            # Calculate hash (SHA256 for consistency with unified database)
            with open(item.save_path, 'rb') as f:
                file_hash = hashlib.sha256(f.read()).hexdigest()

            # Set file timestamp if we have a date
            if item.post_date:
                try:
                    timestamp_unix = item.post_date.timestamp()
                    os.utime(item.save_path, (timestamp_unix, timestamp_unix))
                except Exception:
                    pass

            self.log(f"Downloaded ImageTwist: {item.save_path.name}", "success")
            return DownloadResult(
                success=True,
                item=item,
                file_size=file_size,
                download_time=download_time,
                file_hash=file_hash
            )

        except Exception as e:
            return DownloadResult(
                success=False,
                item=item,
                error=f"ImageTwist download failed: {e}"
            )

    def _download_from_imagetwist_fallback(self, item: DownloadItem, start_time: float) -> DownloadResult:
        """Fallback method using manual page parsing"""
        from bs4 import BeautifulSoup
        import re

        try:
            headers = {
                'User-Agent': self.user_agent,
                'Referer': item.referer or 'https://forum.phun.org/'
            }

            response = requests.get(item.url, headers=headers, timeout=30)
            response.raise_for_status()

            page_content = response.text
            img_url = None

            # Method 1: Look for pic class
            soup = BeautifulSoup(page_content, 'html.parser')
            pic_img = soup.find('img', class_='pic')
            if pic_img and pic_img.get('src'):
                img_url = pic_img['src']

            # Method 2: Regex for i*.imagetwist.com/i/ pattern
            if not img_url:
                match = re.search(r'(https?://i\d*(?:phun)?\.imagetwist\.com/i/[^"\'>\s]+)', page_content)
                if match:
                    img_url = match.group(1)

            if not img_url:
                return DownloadResult(
                    success=False,
                    item=item,
                    error="Could not find direct image URL on ImageTwist page"
                )

            # Download the actual image
            item.save_path.parent.mkdir(parents=True, exist_ok=True)

            img_response = requests.get(img_url, headers=headers, timeout=30, stream=True)
            img_response.raise_for_status()

            chunks = []
            for chunk in img_response.iter_content(chunk_size=8192):
                if not chunks:
                    if chunk[:100].lower().find(b'<html') != -1:
                        return DownloadResult(success=False, item=item, error="Got HTML instead of image")
                chunks.append(chunk)

            with open(item.save_path, 'wb') as f:
                for chunk in chunks:
                    f.write(chunk)

            file_size = item.save_path.stat().st_size
            download_time = time.time() - start_time

            with open(item.save_path, 'rb') as f:
                file_hash = hashlib.sha256(f.read()).hexdigest()

            self.log(f"Downloaded ImageTwist (fallback): {item.save_path.name}", "success")
            return DownloadResult(success=True, item=item, file_size=file_size, download_time=download_time, file_hash=file_hash)

        except Exception as e:
            return DownloadResult(success=False, item=item, error=f"ImageTwist fallback failed: {e}")

    def _download_with_playwright(self, item: DownloadItem) -> DownloadResult:
        """Download using Playwright for authenticated sessions"""
        if not self.playwright_context:
            return self._download_with_requests(item)
        
        start_time = time.time()
        
        try:
            page = self.playwright_context.new_page()
            try:
                # Set headers
                headers = item.headers or {}
                if item.referer:
                    headers['Referer'] = item.referer
                if headers:
                    page.set_extra_http_headers(headers)
                
                # Direct download (pixhost should already be processed)
                response = page.goto(item.url, wait_until='networkidle', 
                                   timeout=self.timeout * 1000)
                
                if response and response.ok:
                    content = response.body()
                    
                    # Check for HTML error pages
                    if content[:1000].lower().find(b'<!doctype') != -1 or \
                       content[:1000].lower().find(b'<html') != -1:
                        return DownloadResult(
                            success=False,
                            item=item,
                            error="Got HTML instead of expected file"
                        )
                    
                    # Save file
                    item.save_path.parent.mkdir(parents=True, exist_ok=True)
                    item.save_path.write_bytes(content)
                    
                    # Calculate hash (SHA256 for consistency with unified database)
                    file_hash = hashlib.sha256(content).hexdigest()
                    
                    # Update timestamps if we have a date
                    if item.post_date:
                        try:
                            timestamp_unix = item.post_date.timestamp()
                            os.utime(item.save_path, (timestamp_unix, timestamp_unix))
                            self.log(f"Set timestamp to {item.post_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
                        except Exception as e:
                            self.log(f"Failed to set timestamp: {e}", "warning")
                    
                    download_time = time.time() - start_time
                    
                    return DownloadResult(
                        success=True,
                        item=item,
                        file_size=len(content),
                        download_time=download_time,
                        file_hash=file_hash
                    )
                else:
                    return DownloadResult(
                        success=False,
                        item=item,
                        error=f"HTTP {response.status if response else 'No response'}"
                    )
                    
            finally:
                page.close()
                
        except Exception as e:
            return DownloadResult(
                success=False,
                item=item,
                error=str(e)
            )
    
    def _download_with_requests(self, item: DownloadItem) -> DownloadResult:
        """Download using requests library"""
        start_time = time.time()
        
        try:
            headers = item.headers or {}
            headers['User-Agent'] = self.user_agent
            if item.referer:
                headers['Referer'] = item.referer
            
            # Use cookies if available
            cookies = getattr(self, 'cookies', {})
            
            response = requests.get(
                item.url,
                headers=headers,
                cookies=cookies if cookies else None,
                timeout=self.timeout,
                stream=True
            )
            response.raise_for_status()
            
            # Stream download to memory first to validate content
            item.save_path.parent.mkdir(parents=True, exist_ok=True)
            content = b''
            first_chunk_checked = False

            for chunk in response.iter_content(chunk_size=self.chunk_size):
                if chunk:
                    # Check first chunk for HTML error pages
                    if not first_chunk_checked:
                        first_chunk_checked = True
                        if chunk[:100].lower().find(b'<html') != -1 or \
                           chunk[:100].lower().find(b'<!doctype') != -1 or \
                           chunk[:100].lower().find(b'<head>') != -1:
                            return DownloadResult(
                                success=False,
                                item=item,
                                error="Got HTML instead of image"
                            )
                    content += chunk

            # Save to file only after validation
            with open(item.save_path, 'wb') as f:
                f.write(content)

            # Calculate hash (SHA256 for consistency with unified database)
            file_hash = hashlib.sha256(content).hexdigest()

            # Set file timestamp if we have a date
            if item.post_date:
                try:
                    timestamp_unix = item.post_date.timestamp()
                    os.utime(item.save_path, (timestamp_unix, timestamp_unix))
                    self.log(f"Set timestamp to {item.post_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
                except Exception as e:
                    self.log(f"Failed to set timestamp: {e}", "warning")

            download_time = time.time() - start_time

            return DownloadResult(
                success=True,
                item=item,
                file_size=len(content),
                download_time=download_time,
                file_hash=file_hash
            )
            
        except Exception as e:
            # Clean up partial download
            if item.save_path.exists():
                item.save_path.unlink()
            
            return DownloadResult(
                success=False,
                item=item,
                error=str(e)
            )
    
    def _download_worker(self, item: DownloadItem, thread_id: int) -> DownloadResult:
        """Worker function for downloading a single item"""
        # Process image hosting URLs to get direct URLs
        if 'pixhost.to/show/' in item.url:
            direct_url = self._extract_pixhost_direct_url(item.url)
            if direct_url:
                self.log(f"Converted pixhost URL to direct: {direct_url.split('/')[-1]}", "debug")
                item.url = direct_url
            else:
                self.log(f"Failed to extract pixhost direct URL: {item.url}", "warning")
        
        elif 'imagebam.com' in item.url:
            direct_url = self._extract_imagebam_direct_url(item.url)
            if direct_url:
                self.log(f"Converted ImageBam URL to direct: {direct_url.split('/')[-1]}", "debug")
                item.url = direct_url
            else:
                self.log(f"Failed to extract ImageBam direct URL: {item.url}", "warning")

        elif 'imagetwist.com' in item.url:
            # ImageTwist requires parsing the page to get direct image URL
            result = self._download_from_imagetwist(item)
            if result.success:
                return result
            self.log(f"ImageTwist download failed: {item.url}", "warning")

        # Check if already downloaded
        if self._is_already_downloaded(item.url, item.save_path):
            self.log(f"Already downloaded: {item.save_path.name}", "skip")
            return DownloadResult(
                success=True,
                item=item,
                file_size=item.save_path.stat().st_size if item.save_path.exists() else 0
            )
        
        # Apply rate limiting
        self._apply_rate_limit(thread_id)
        
        # Always use requests for direct image downloads (faster)
        result = self._download_with_requests(item)
        
        # Handle retries
        if not result.success and item.retry_count < item.max_retries:
            item.retry_count += 1
            self.log(f"Retrying {item.url} ({item.retry_count}/{item.max_retries})", "warning")
            time.sleep(self.rate_limit * 2)  # Extra delay before retry
            return self._download_worker(item, thread_id)
        
        # Save to database if successful
        if result.success and self.use_database:
            self._save_to_database(result)
        
        # Update statistics
        with self.download_lock:
            if result.success:
                self.stats['successful'] += 1
                if result.file_size:
                    self.stats['total_bytes'] += result.file_size
                if result.download_time:
                    self.stats['total_time'] += result.download_time
            else:
                self.stats['failed'] += 1
        
        return result
    
    def _save_to_database(self, result: DownloadResult):
        """Save successful download to database"""
        conn = sqlite3.connect(self.db_path)
        try:
            cursor = conn.cursor()

            metadata_str = None
            if result.item.metadata:
                import json
                metadata_str = json.dumps(result.item.metadata)

            cursor.execute('''
                INSERT OR REPLACE INTO downloads
                (url, file_path, file_hash, file_size, metadata)
                VALUES (?, ?, ?, ?, ?)
            ''', (
                result.item.url,
                str(result.item.save_path),
                result.file_hash,
                result.file_size,
                metadata_str
            ))

            conn.commit()
        finally:
            conn.close()
    
    def download_batch(self, items: List[DownloadItem], 
                      progress_callback: Optional[Callable] = None) -> List[DownloadResult]:
        """
        Download multiple items concurrently
        
        Args:
            items: List of DownloadItem objects
            progress_callback: Optional callback for progress updates
        
        Returns:
            List of DownloadResult objects
        """
        self.stats['total'] = len(items)
        results = []
        
        self.log(f"Starting batch download of {len(items)} items with {self.max_workers} workers", "info")
        
        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            # Submit all downloads
            futures = {
                executor.submit(self._download_worker, item, i % self.max_workers): item
                for i, item in enumerate(items)
            }
            
            # Process completed downloads
            completed = 0
            for future in as_completed(futures):
                result = future.result()
                results.append(result)
                completed += 1
                
                # Progress update
                if progress_callback:
                    progress_callback(completed, len(items), result)
                
                if self.show_progress:
                    pct = (completed / len(items)) * 100
                    status = "✓" if result.success else "✗"
                    self.log(
                        f"[{completed}/{len(items)}] {pct:.1f}% - {status} {result.item.save_path.name}",
                        "success" if result.success else "error"
                    )
        
        # Summary
        self.log(f"Batch complete: {self.stats['successful']} successful, {self.stats['failed']} failed", "info")
        
        if self.stats['successful'] > 0:
            avg_speed = self.stats['total_bytes'] / self.stats['total_time'] / 1024 / 1024
            self.log(f"Average speed: {avg_speed:.2f} MB/s", "info")
        
        return results
    
    def download_urls(self, urls: List[str], base_path: Path, 
                     referer: Optional[str] = None,
                     metadata: Optional[Dict] = None) -> List[DownloadResult]:
        """
        Convenience method to download URLs to a directory
        
        Args:
            urls: List of URLs to download
            base_path: Directory to save files
            referer: Optional referer header
            metadata: Optional metadata for all downloads
        
        Returns:
            List of DownloadResult objects
        """
        items = []
        for url in urls:
            filename = os.path.basename(urlparse(url).path) or f"download_{hashlib.sha256(url.encode()).hexdigest()[:8]}"
            save_path = base_path / filename
            
            items.append(DownloadItem(
                url=url,
                save_path=save_path,
                referer=referer,
                metadata=metadata
            ))
        
        return self.download_batch(items)
    
    def get_statistics(self) -> Dict:
        """Get download statistics"""
        return self.stats.copy()
    
    def cleanup_old_downloads(self, days: int = 30):
        """Remove old download records from database"""
        if not self.use_database:
            return 0

        conn = sqlite3.connect(self.db_path)
        try:
            cursor = conn.cursor()

            cursor.execute('''
                DELETE FROM downloads
                WHERE download_date < datetime('now', ? || ' days')
            ''', (-days,))

            deleted = cursor.rowcount
            conn.commit()
        finally:
            conn.close()

        self.log(f"Cleaned up {deleted} old download records", "info")
        return deleted


# Example usage
if __name__ == "__main__":
    from pathlib import Path
    
    # Test download manager
    manager = DownloadManager(
        max_workers=3,
        rate_limit=0.5,
        show_progress=True
    )
    
    # Test URLs
    urls = [
        "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
        "https://sample-videos.com/img/Sample-jpg-image-50kb.jpg",
        "https://www.w3schools.com/html/img_girl.jpg"
    ]
    
    # Download
    results = manager.download_urls(urls, Path("/tmp/test-downloads"))
    
    # Print results
    logger.info(f"Downloaded {len([r for r in results if r.success])} of {len(results)} files")
    logger.info(f"Total bytes: {manager.stats['total_bytes'] / 1024:.1f} KB")
    logger.info(f"Total time: {manager.stats['total_time']:.2f} seconds")