media-downloader/modules/snapchat_client_module.py

#!/usr/bin/env python3
"""
Snapchat Client Module - Direct HTTP-based Snapchat downloader using curl_cffi.

Replaces Playwright-based scraping with direct HTTP requests. Snapchat embeds
all page data in <script id="__NEXT_DATA__"> JSON tags, so no JavaScript
execution is needed. Uses story.snapchat.com which may not require Cloudflare.

Follows the same pattern as instagram_client_module.py.
"""

import os
import json
import re
import subprocess
import time
import random
import platform
from datetime import datetime, timedelta
from pathlib import Path
from typing import Optional, Dict, List, Set

from modules.base_module import LoggingMixin
from modules.snapchat_scraper import SnapMedia, SnapCollection


class SnapchatClientDownloader(LoggingMixin):
    """Snapchat downloader using direct HTTP via curl_cffi (no Playwright)"""

    def __init__(self,
                 show_progress: bool = True,
                 use_database: bool = True,
                 log_callback=None,
                 unified_db=None):
        """Initialize the Snapchat Client downloader.

        Args:
            show_progress: Whether to show download progress
            use_database: Whether to use database for dedup
            log_callback: Optional logging callback
            unified_db: UnifiedDatabase instance
        """
        self._init_logger('SnapchatClient', log_callback, default_module='Download')

        self.scraper_id = 'snapchat_client'
        self.show_progress = show_progress
        self.use_database = use_database
        self.download_count = 0
        self.downloaded_files: Set[str] = set()
        self.pending_downloads = []

        # Session (lazy-initialized)
        self._session = None

        # Database
        if unified_db and use_database:
            from modules.unified_database import SnapchatDatabaseAdapter
            self.db = SnapchatDatabaseAdapter(unified_db)
            self.unified_db = unified_db
        else:
            self.db = None
            self.unified_db = None
            self.use_database = False

        # Activity status manager
        try:
            from modules.activity_status import get_activity_manager
            self.activity_manager = get_activity_manager(unified_db)
        except ImportError:
            self.activity_manager = None

        # Cookie data from DB
        self.cookies = []
        self.user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'

    def _get_session(self):
        """Get or create a curl_cffi session with browser TLS fingerprinting."""
        if self._session is None:
            from curl_cffi.requests import Session
            # Try multiple browser versions for curl_cffi compatibility
            for _browser in ("chrome131", "chrome136", "chrome"):
                try:
                    self._session = Session(impersonate=_browser)
                    break
                except Exception:
                    continue
            else:
                self._session = Session()
            self._session.headers.update({
                'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'accept-language': 'en-US,en;q=0.9',
                'cache-control': 'no-cache',
            })
            # Load cookies from database
            self._load_cookies()
        return self._session

    def _load_cookies(self):
        """Load cookies from database for authenticated requests."""
        if not self.unified_db:
            return

        # Try snapchat_client cookies first, fall back to snapchat
        for scraper_id in ['snapchat_client', 'snapchat']:
            try:
                cookies = self.unified_db.get_scraper_cookies(scraper_id)
                if cookies:
                    self.log(f"Loaded {len(cookies)} cookies from '{scraper_id}' scraper", "debug")
                    self.cookies = cookies
                    for cookie in cookies:
                        name = cookie.get('name', '')
                        value = cookie.get('value', '')
                        domain = cookie.get('domain', '.snapchat.com')
                        if name and value and self._session:
                            self._session.cookies.set(name, value, domain=domain)

                    # Check if we have a stored user-agent (important for cf_clearance match)
                    try:
                        import json as _json
                        with self.unified_db.get_connection() as conn:
                            cursor = conn.cursor()
                            cursor.execute(
                                "SELECT user_agent FROM scrapers WHERE id = ?",
                                (scraper_id,)
                            )
                            row = cursor.fetchone()
                            if row and row[0]:
                                self.user_agent = row[0]
                                if self._session:
                                    self._session.headers['User-Agent'] = self.user_agent
                    except Exception:
                        pass

                    return
            except Exception as e:
                self.log(f"Error loading cookies from '{scraper_id}': {e}", "debug")

    def _fetch_page(self, url: str) -> Optional[str]:
        """Fetch a page via HTTP and return the HTML content.

        Tries story.snapchat.com first (no Cloudflare), falls back to www.snapchat.com.
        """
        session = self._get_session()

        # If URL uses www.snapchat.com, try story.snapchat.com first
        story_url = url.replace('www.snapchat.com', 'story.snapchat.com')
        www_url = url.replace('story.snapchat.com', 'www.snapchat.com')

        # Try story.snapchat.com first (likely no Cloudflare)
        for attempt_url in [story_url, www_url]:
            try:
                resp = session.get(attempt_url, timeout=30)
                if resp.status_code == 200 and '__NEXT_DATA__' in resp.text:
                    return resp.text
                elif resp.status_code == 403:
                    self.log(f"403 Forbidden from {attempt_url.split('/@')[0]}", "debug")
                    continue
                elif resp.status_code != 200:
                    self.log(f"HTTP {resp.status_code} from {attempt_url.split('/@')[0]}", "debug")
                    continue
            except Exception as e:
                self.log(f"Error fetching {attempt_url.split('/@')[0]}: {e}", "debug")
                continue

        return None

    def _extract_next_data(self, html: str) -> Optional[Dict]:
        """Extract __NEXT_DATA__ JSON from HTML page."""
        match = re.search(r'<script id="__NEXT_DATA__"[^>]*>(.*?)</script>', html, re.DOTALL)
        if not match:
            return None
        try:
            return json.loads(match.group(1))
        except json.JSONDecodeError as e:
            self.log(f"Failed to parse __NEXT_DATA__ JSON: {e}", "error")
            return None

    def get_profile_content(self, username: str) -> Dict[str, List]:
        """Get all spotlight URLs, highlight URLs, and inline story/highlight data from a profile.

        Parses __NEXT_DATA__ JSON to extract:
        - spotlights: list of spotlight URL strings
        - highlights: list of highlight URL strings
        - story_collection: SnapCollection from story.snapList (recent stories), or None
        - highlight_collections: list of SnapCollection from curatedHighlights (inline data)

        The inline data avoids needing separate HTTP requests for stories and highlights.
        """
        result = {'spotlights': [], 'highlights': [], 'story_collection': None, 'highlight_collections': []}

        url = f"https://story.snapchat.com/@{username}"
        self.log(f"Fetching profile for @{username}", "info")

        html = self._fetch_page(url)
        if not html:
            self.log(f"Failed to fetch profile page for @{username}", "warning")
            return result

        # Extract spotlight URLs via regex (still needed — spotlight metadata requires per-URL fetch)
        spotlight_pattern = rf'/@{re.escape(username)}/spotlight/([A-Za-z0-9_-]+)'
        spotlight_ids = list(set(re.findall(spotlight_pattern, html)))
        result['spotlights'] = [
            f"https://story.snapchat.com/@{username}/spotlight/{sid}"
            for sid in spotlight_ids
        ]
        self.log(f"Found {len(result['spotlights'])} spotlights", "info")

        # Parse __NEXT_DATA__ for stories and highlights (much more reliable than regex)
        data = self._extract_next_data(html)
        if not data:
            # Fall back to regex for highlights
            highlight_pattern = rf'/@{re.escape(username)}/highlight/([A-Za-z0-9-]+)'
            highlight_ids = list(set(re.findall(highlight_pattern, html)))
            result['highlights'] = [
                f"https://story.snapchat.com/@{username}/highlight/{hid}"
                for hid in highlight_ids
            ]
            self.log(f"Found {len(result['highlights'])} highlights (regex fallback)", "info")
            return result

        props = (data.get('props') or {}).get('pageProps') or {}

        # Extract story snapList (recent stories — not available via individual URLs)
        story = props.get('story') or {}
        story_snaps = story.get('snapList') or []
        if story_snaps:
            story_id = story.get('storyId') or {}
            if isinstance(story_id, dict):
                story_id = story_id.get('value', 'story')
            story_collection = SnapCollection(
                collection_id=story_id or 'story',
                collection_type='story',
                title=story.get('storyTitle', '') or 'Stories',
                username=username,
                url=url
            )
            for snap_data in story_snaps:
                snap = self._parse_snap_data(snap_data)
                if snap:
                    story_collection.snaps.append(snap)
            if story_collection.snaps:
                result['story_collection'] = story_collection
                self.log(f"Found {len(story_collection.snaps)} story snaps", "info")

        # Extract curatedHighlights inline (avoids per-highlight HTTP requests)
        curated_highlights = props.get('curatedHighlights') or []
        for highlight in curated_highlights:
            highlight_id = highlight.get('highlightId') or {}
            if isinstance(highlight_id, dict):
                highlight_id = highlight_id.get('value', '')

            title = highlight.get('storyTitle') or {}
            if isinstance(title, dict):
                title = title.get('value', '')

            collection = SnapCollection(
                collection_id=highlight_id,
                collection_type='highlight',
                title=title or 'Untitled Highlight',
                username=username,
                url=f"https://story.snapchat.com/@{username}/highlight/{highlight_id}"
            )
            for snap_data in highlight.get('snapList') or []:
                snap = self._parse_snap_data(snap_data)
                if snap:
                    collection.snaps.append(snap)
            if collection.snaps:
                result['highlight_collections'].append(collection)

        self.log(f"Found {len(result['highlight_collections'])} highlights (inline)", "info")

        return result

    def _parse_snap_data(self, snap_data: Dict) -> Optional[SnapMedia]:
        """Parse a snap from __NEXT_DATA__ snapList into a SnapMedia object."""
        snap_urls = snap_data.get('snapUrls') or {}
        media_url = snap_urls.get('mediaUrl', '')
        if not media_url:
            return None

        snap_id = (snap_data.get('snapId') or {}).get('value', '')
        media_id = ''
        if '/d/' in media_url:
            media_id = media_url.split('/d/')[1].split('.')[0]

        ts_str = (snap_data.get('timestampInSec') or {}).get('value', '0')
        timestamp = datetime.fromtimestamp(int(ts_str)) if ts_str and ts_str != '0' else datetime.now()

        lat = snap_data.get('lat')
        lng = snap_data.get('lng')

        return SnapMedia(
            media_id=media_id or snap_id,
            media_type='video' if snap_data.get('snapMediaType') == 1 else 'image',
            media_url=media_url,
            timestamp=timestamp,
            index=snap_data.get('snapIndex', 0),
            thumbnail_url=(snap_urls.get('mediaPreviewUrl') or {}).get('value', ''),
            lat=float(lat) if lat else None,
            lng=float(lng) if lng else None
        )

    def get_spotlight_metadata(self, url: str) -> Optional[SnapCollection]:
        """Extract full metadata from a spotlight URL via __NEXT_DATA__."""
        html = self._fetch_page(url)
        if not html:
            return None

        data = self._extract_next_data(html)
        if not data:
            return None

        props = (data.get('props') or {}).get('pageProps') or {}
        feed = props.get('spotlightFeed') or {}
        stories = feed.get('spotlightStories') or []

        if not stories:
            return None

        story_data = stories[0]
        story = story_data.get('story') or {}
        metadata = (story_data.get('metadata') or {}).get('videoMetadata') or {}

        story_id = (story.get('storyId') or {}).get('value', '')
        creator = (metadata.get('creator') or {}).get('personCreator') or {}
        username = creator.get('username', '')

        collection = SnapCollection(
            collection_id=story_id,
            collection_type='spotlight',
            title=metadata.get('description', ''),
            username=username,
            url=url
        )

        for snap_data in story.get('snapList') or []:
            snap_id = (snap_data.get('snapId') or {}).get('value', '')
            snap_urls = snap_data.get('snapUrls') or {}
            media_url = snap_urls.get('mediaUrl', '')

            media_id = ''
            if '/d/' in media_url:
                media_id = media_url.split('/d/')[1].split('.')[0]

            ts_str = (snap_data.get('timestampInSec') or {}).get('value', '0')
            timestamp = datetime.fromtimestamp(int(ts_str)) if ts_str else datetime.now()

            snap = SnapMedia(
                media_id=media_id or snap_id,
                media_type='video' if snap_data.get('snapMediaType') == 1 else 'image',
                media_url=media_url,
                timestamp=timestamp,
                index=snap_data.get('snapIndex', 0),
                thumbnail_url=(snap_urls.get('mediaPreviewUrl') or {}).get('value', ''),
                duration_ms=int(metadata.get('durationMs', 0)),
                description=metadata.get('description', ''),
                view_count=int(metadata.get('viewCount', 0)),
                width=int(metadata.get('width', 540)),
                height=int(metadata.get('height', 960))
            )
            collection.snaps.append(snap)

        return collection

    def get_highlight_metadata(self, url: str) -> Optional[SnapCollection]:
        """Extract full metadata from a highlight URL via __NEXT_DATA__."""
        html = self._fetch_page(url)
        if not html:
            return None

        data = self._extract_next_data(html)
        if not data:
            return None

        props = (data.get('props') or {}).get('pageProps') or {}
        highlight = props.get('highlight') or {}

        if not highlight:
            return None

        highlight_id = highlight.get('highlightId') or {}
        if isinstance(highlight_id, dict):
            highlight_id = highlight_id.get('value', '')

        username_match = re.search(r'@([^/]+)', url)
        username = username_match.group(1) if username_match else ''

        title = highlight.get('storyTitle') or {}
        if isinstance(title, dict):
            title = title.get('value', '')

        collection = SnapCollection(
            collection_id=highlight_id,
            collection_type='highlight',
            title=title or 'Untitled Highlight',
            username=username,
            url=url
        )

        for snap_data in highlight.get('snapList') or []:
            snap_urls = snap_data.get('snapUrls') or {}
            media_url = snap_urls.get('mediaUrl', '')

            media_id = ''
            if '/d/' in media_url:
                media_id = media_url.split('/d/')[1].split('.')[0]

            ts_str = (snap_data.get('timestampInSec') or {}).get('value', '0')
            timestamp = datetime.fromtimestamp(int(ts_str)) if ts_str else datetime.now()

            lat = snap_data.get('lat')
            lng = snap_data.get('lng')

            snap = SnapMedia(
                media_id=media_id,
                media_type='video' if snap_data.get('snapMediaType') == 1 else 'image',
                media_url=media_url,
                timestamp=timestamp,
                index=snap_data.get('snapIndex', 0),
                thumbnail_url=(snap_urls.get('mediaPreviewUrl') or {}).get('value', ''),
                lat=float(lat) if lat else None,
                lng=float(lng) if lng else None
            )
            collection.snaps.append(snap)

        return collection

    def _download_media_file(self, snap: SnapMedia, output_path: str) -> bool:
        """Download a single media file via curl_cffi."""
        try:
            url = snap.media_url.replace('&amp;', '&')
            session = self._get_session()

            resp = session.get(url, timeout=60)
            if resp.status_code == 200 and len(resp.content) > 0:
                os.makedirs(os.path.dirname(output_path), exist_ok=True)
                with open(output_path, 'wb') as f:
                    f.write(resp.content)
                self._set_metadata(output_path, snap)
                return True

            self.log(f"Download failed: HTTP {resp.status_code}", "debug")
            return False

        except Exception as e:
            self.log(f"Error downloading media: {e}", "error")
            return False

    def _set_metadata(self, file_path: str, snap: SnapMedia, description: str = None):
        """Set EXIF metadata and file timestamp."""
        try:
            date_str = snap.timestamp.strftime('%Y:%m:%d %H:%M:%S')
            desc = description or snap.description or ""
            if snap.view_count:
                desc += f" [Views: {snap.view_count}]"
            desc = desc.strip()

            ext = os.path.splitext(file_path)[1].lower()
            is_video = ext in ['.mp4', '.mov', '.avi', '.webm']
            is_image = ext in ['.jpg', '.jpeg', '.png', '.webp']

            exif_args = [
                'exiftool', '-overwrite_original', '-ignoreMinorErrors',
                f'-FileModifyDate={date_str}',
            ]

            if is_image:
                exif_args.extend([
                    f'-DateTimeOriginal={date_str}',
                    f'-CreateDate={date_str}',
                    f'-ModifyDate={date_str}',
                    f'-MetadataDate={date_str}',
                ])
                if desc:
                    exif_args.extend([
                        f'-ImageDescription={desc}',
                        f'-XPComment={desc}',
                        f'-UserComment={desc}',
                    ])
                if snap.lat and snap.lng:
                    lat_ref = 'N' if snap.lat >= 0 else 'S'
                    lng_ref = 'E' if snap.lng >= 0 else 'W'
                    exif_args.extend([
                        f'-GPSLatitude={abs(snap.lat)}',
                        f'-GPSLatitudeRef={lat_ref}',
                        f'-GPSLongitude={abs(snap.lng)}',
                        f'-GPSLongitudeRef={lng_ref}',
                    ])

            elif is_video:
                exif_args.extend([
                    f'-CreateDate={date_str}',
                    f'-ModifyDate={date_str}',
                    f'-MediaCreateDate={date_str}',
                    f'-MediaModifyDate={date_str}',
                    f'-TrackCreateDate={date_str}',
                    f'-TrackModifyDate={date_str}',
                ])
                if desc:
                    exif_args.extend([
                        f'-Description={desc}',
                        f'-Comment={desc}',
                    ])

            exif_args.append(file_path)
            subprocess.run(exif_args, capture_output=True, timeout=30)

            # Set filesystem modification time
            ts = snap.timestamp.timestamp()
            os.utime(file_path, (ts, ts))

        except Exception as e:
            self.log(f"Warning: Could not set metadata for {file_path}: {e}", "debug")

    def _generate_filename(self, username: str, snap: SnapMedia, ext: str) -> str:
        """Generate filename with timestamp and media ID."""
        date_str = snap.timestamp.strftime('%Y%m%d_%H%M%S')
        return f"{username}_{date_str}_{snap.media_id}.{ext}"

    def _get_processed_posts(self, username: str) -> Set[str]:
        """Get set of media IDs that have been processed."""
        processed = set()
        if not self.db:
            return processed

        try:
            with self.db.get_connection() as conn:
                cursor = conn.cursor()
                cursor.execute('''
                    SELECT filename, metadata FROM downloads
                    WHERE platform = 'snapchat'
                    AND source = ?
                ''', (username,))

                for row in cursor.fetchall():
                    filename, metadata_str = row
                    if filename:
                        parts = filename.split('_')
                        if len(parts) >= 4:
                            media_id = '_'.join(parts[3:]).split('.')[0]
                            processed.add(media_id)

                    if metadata_str:
                        try:
                            metadata = json.loads(metadata_str)
                            if 'media_id' in metadata:
                                processed.add(metadata['media_id'])
                        except (json.JSONDecodeError, TypeError, KeyError):
                            pass

        except Exception as e:
            self.log(f"Error loading processed posts: {e}", "debug")

        return processed

    def _record_download(self, username: str, url: str, filename: str,
                         post_date=None, metadata: dict = None, file_path: str = None,
                         deferred: bool = False):
        """Record a download in the database."""
        if deferred:
            self.pending_downloads.append({
                'username': username,
                'url': url,
                'filename': filename,
                'post_date': post_date.isoformat() if hasattr(post_date, 'isoformat') else post_date,
                'file_path': file_path,
                'metadata': metadata
            })
            return True

        if not self.db:
            return

        try:
            self.db.mark_downloaded(
                username=username,
                url=url,
                filename=filename,
                post_date=post_date,
                metadata=metadata,
                file_path=file_path
            )
        except Exception as e:
            self.log(f"Failed to record download: {e}", "debug")

    def get_pending_downloads(self) -> list:
        """Get list of pending downloads for deferred recording."""
        return self.pending_downloads

    def clear_pending_downloads(self):
        """Clear pending downloads list."""
        self.pending_downloads = []

    def download(self, username: str, content_type: str = "all", days_back: int = 14,
                 max_downloads: int = 50, output_dir: str = None,
                 spotlight_dir: str = None, stories_dir: str = None,
                 stitch_highlights: bool = True, defer_database: bool = False,
                 phrase_config: dict = None) -> int:
        """Download content from a user - compatible with media-downloader interface.

        Args:
            username: Snapchat username
            content_type: "spotlight", "stories", "highlights", or "all"
            days_back: How many days back to download (filters by post date)
            max_downloads: Maximum items to download per content type
            output_dir: Default output directory (used if specific dirs not set)
            spotlight_dir: Output directory for spotlights
            stories_dir: Output directory for stories/highlights
            stitch_highlights: Ignored (kept for backwards compatibility)
            defer_database: If True, defer database recording
            phrase_config: Not used (for interface compatibility)

        Returns:
            Number of files downloaded
        """
        self.defer_database = defer_database
        self.downloaded_files.clear()

        # Set output directories
        if spotlight_dir:
            spotlight_output = Path(spotlight_dir)
        elif output_dir:
            spotlight_output = Path(output_dir)
        else:
            spotlight_output = Path(f"/opt/media-downloader/downloads/snapchat_client/spotlight/{username}")

        if stories_dir:
            stories_output = Path(stories_dir)
        elif output_dir:
            stories_output = Path(output_dir)
        else:
            stories_output = Path(f"/opt/media-downloader/downloads/snapchat_client/stories/{username}")

        spotlight_output.mkdir(parents=True, exist_ok=True)
        stories_output.mkdir(parents=True, exist_ok=True)

        # Update activity status
        if self.activity_manager:
            self.activity_manager.update_status("Checking Snapchat")

        # Get processed posts (shared with snapchat module - both use platform='snapchat')
        processed = self._get_processed_posts(username)
        self.log(f"Loaded {len(processed)} processed posts from database", "debug")

        cutoff_date = datetime.now() - timedelta(days=days_back)
        downloaded_count = 0

        # Crash recovery checkpoint
        from modules.task_checkpoint import TaskCheckpoint
        checkpoint = TaskCheckpoint(f'snapchat_client:{username}', 'scraping')

        try:
            # Get profile content via HTTP
            content = self.get_profile_content(username)

            # Count total items for checkpoint
            total_items = 0
            if content_type in ['spotlight', 'all'] and content['spotlights']:
                total_items += min(len(content['spotlights']), max_downloads)
            if content_type in ['stories', 'highlights', 'all'] and content['highlights']:
                total_items += min(len(content['highlights']), max_downloads)
            checkpoint.start(total_items=total_items)
            if checkpoint.is_recovering():
                self.log(f"Snapchat Client @{username}: recovering — skipping already-processed URLs", "info")

            # Download spotlights
            if content_type in ['spotlight', 'all'] and content['spotlights']:
                spotlight_items = content['spotlights'][:max_downloads]
                self.log(f"Processing {len(spotlight_items)} spotlights...", "info")

                if self.activity_manager:
                    self.activity_manager.update_status(
                        "Downloading spotlights",
                        progress_current=0,
                        progress_total=len(spotlight_items)
                    )

                for spot_idx, url in enumerate(spotlight_items):
                    if self.activity_manager:
                        self.activity_manager.update_status(
                            "Downloading spotlights",
                            progress_current=spot_idx + 1,
                            progress_total=len(spotlight_items)
                        )

                    if checkpoint.is_completed(url):
                        continue

                    checkpoint.set_current(url)

                    try:
                        # Rate limit between page fetches
                        if spot_idx > 0:
                            time.sleep(random.uniform(1.5, 2.5))

                        spotlight = self.get_spotlight_metadata(url)
                        if not spotlight or not spotlight.snaps:
                            continue

                        snap = spotlight.snaps[0]

                        # Check date filter
                        if snap.timestamp < cutoff_date:
                            self.log(f"Spotlight {snap.media_id} is older than {days_back} days, skipping", "debug")
                            continue

                        # Check if already processed
                        if snap.media_id in processed or snap.media_id in self.downloaded_files:
                            self.log(f"Spotlight {snap.media_id} already processed, skipping", "debug")
                            continue

                        # Download
                        ext = 'mp4' if snap.media_type == 'video' else 'jpg'
                        filename = self._generate_filename(username, snap, ext)
                        output_path = str(spotlight_output / filename)

                        # Rate limit between CDN downloads
                        time.sleep(random.uniform(0.3, 0.5))

                        if self._download_media_file(snap, output_path):
                            self.downloaded_files.add(snap.media_id)
                            downloaded_count += 1
                            self.log(f"Downloaded spotlight: {filename}", "info")

                            self._record_download(
                                username=username,
                                url=url,
                                filename=filename,
                                post_date=snap.timestamp,
                                metadata={
                                    'media_id': snap.media_id,
                                    'description': snap.description,
                                    'view_count': snap.view_count,
                                    'content_type': 'spotlight'
                                },
                                file_path=output_path,
                                deferred=defer_database
                            )

                    except Exception as e:
                        self.log(f"Error processing spotlight: {e}", "error")

                    checkpoint.mark_completed(url)

            # Rate limit between content types
            if content_type == 'all' and content['spotlights'] and content['highlights']:
                time.sleep(random.uniform(2, 3))

            # Download highlights (stories)
            if content_type in ['stories', 'highlights', 'all'] and content['highlights']:
                highlight_items = content['highlights'][:max_downloads]
                self.log(f"Processing {len(highlight_items)} highlights...", "info")

                if self.activity_manager:
                    self.activity_manager.update_status(
                        "Downloading highlights",
                        progress_current=0,
                        progress_total=len(highlight_items)
                    )

                for hi_idx, url in enumerate(highlight_items):
                    if self.activity_manager:
                        self.activity_manager.update_status(
                            "Downloading highlights",
                            progress_current=hi_idx + 1,
                            progress_total=len(highlight_items)
                        )

                    if checkpoint.is_completed(url):
                        continue

                    checkpoint.set_current(url)

                    try:
                        # Rate limit between page fetches
                        if hi_idx > 0:
                            time.sleep(random.uniform(1.5, 2.5))

                        highlight = self.get_highlight_metadata(url)
                        if not highlight or not highlight.snaps:
                            continue

                        # Check if any snap is within date range
                        newest_snap = max(highlight.snaps, key=lambda s: s.timestamp)
                        if newest_snap.timestamp < cutoff_date:
                            self.log(f"Highlight {highlight.collection_id} is older than {days_back} days, skipping", "debug")
                            continue

                        # Check if already processed
                        if highlight.collection_id in processed or highlight.collection_id in self.downloaded_files:
                            self.log(f"Highlight {highlight.collection_id} already processed, skipping", "debug")
                            continue

                        # Separate videos and images
                        videos = [s for s in highlight.snaps if s.media_type == 'video']
                        images = [s for s in highlight.snaps if s.media_type == 'image']

                        # Download images individually
                        for snap in images:
                            if snap.timestamp < cutoff_date:
                                continue
                            if snap.media_id in processed or snap.media_id in self.downloaded_files:
                                continue

                            time.sleep(random.uniform(0.3, 0.5))

                            filename = self._generate_filename(username, snap, 'jpg')
                            output_path = str(stories_output / filename)

                            if self._download_media_file(snap, output_path):
                                self.downloaded_files.add(snap.media_id)
                                downloaded_count += 1
                                self.log(f"Downloaded image: {filename}", "info")

                                self._record_download(
                                    username=username,
                                    url=highlight.url,
                                    filename=filename,
                                    post_date=snap.timestamp,
                                    metadata={
                                        'media_id': snap.media_id,
                                        'highlight_id': highlight.collection_id,
                                        'content_type': 'highlight_image'
                                    },
                                    file_path=output_path,
                                    deferred=defer_database
                                )

                        # Download videos individually
                        for snap in videos:
                            if snap.timestamp < cutoff_date:
                                continue
                            if snap.media_id in processed or snap.media_id in self.downloaded_files:
                                continue

                            time.sleep(random.uniform(0.3, 0.5))

                            filename = self._generate_filename(username, snap, 'mp4')
                            output_path = str(stories_output / filename)

                            if self._download_media_file(snap, output_path):
                                self._set_metadata(output_path, snap)
                                self.downloaded_files.add(snap.media_id)
                                downloaded_count += 1
                                self.log(f"Downloaded video: {filename}", "info")

                                self._record_download(
                                    username=username,
                                    url=highlight.url,
                                    filename=filename,
                                    post_date=snap.timestamp,
                                    metadata={
                                        'media_id': snap.media_id,
                                        'highlight_id': highlight.collection_id,
                                        'content_type': 'highlight_video'
                                    },
                                    file_path=output_path,
                                    deferred=defer_database
                                )

                    except Exception as e:
                        self.log(f"Error processing highlight: {e}", "error")

                    checkpoint.mark_completed(url)

        except Exception as e:
            self.log(f"Error during download: {e}", "error")

        checkpoint.finish()
        self.log(f"Downloaded {downloaded_count} files for @{username}", "info")
        return downloaded_count