Initial commit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-29 22:42:55 -04:00
commit 0d7b2b1aab
389 changed files with 280296 additions and 0 deletions
--- a/modules/snapchat_client_module.py
+++ b/modules/snapchat_client_module.py
@@ -0,0 +1,871 @@
+#!/usr/bin/env python3
+"""
+Snapchat Client Module - Direct HTTP-based Snapchat downloader using curl_cffi.
+
+Replaces Playwright-based scraping with direct HTTP requests. Snapchat embeds
+all page data in <script id="__NEXT_DATA__"> JSON tags, so no JavaScript
+execution is needed. Uses story.snapchat.com which may not require Cloudflare.
+
+Follows the same pattern as instagram_client_module.py.
+"""
+
+import os
+import json
+import re
+import subprocess
+import time
+import random
+import platform
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Optional, Dict, List, Set
+
+from modules.base_module import LoggingMixin
+from modules.snapchat_scraper import SnapMedia, SnapCollection
+
+
+class SnapchatClientDownloader(LoggingMixin):
+    """Snapchat downloader using direct HTTP via curl_cffi (no Playwright)"""
+
+    def __init__(self,
+                 show_progress: bool = True,
+                 use_database: bool = True,
+                 log_callback=None,
+                 unified_db=None):
+        """Initialize the Snapchat Client downloader.
+
+        Args:
+            show_progress: Whether to show download progress
+            use_database: Whether to use database for dedup
+            log_callback: Optional logging callback
+            unified_db: UnifiedDatabase instance
+        """
+        self._init_logger('SnapchatClient', log_callback, default_module='Download')
+
+        self.scraper_id = 'snapchat_client'
+        self.show_progress = show_progress
+        self.use_database = use_database
+        self.download_count = 0
+        self.downloaded_files: Set[str] = set()
+        self.pending_downloads = []
+
+        # Session (lazy-initialized)
+        self._session = None
+
+        # Database
+        if unified_db and use_database:
+            from modules.unified_database import SnapchatDatabaseAdapter
+            self.db = SnapchatDatabaseAdapter(unified_db)
+            self.unified_db = unified_db
+        else:
+            self.db = None
+            self.unified_db = None
+            self.use_database = False
+
+        # Activity status manager
+        try:
+            from modules.activity_status import get_activity_manager
+            self.activity_manager = get_activity_manager(unified_db)
+        except ImportError:
+            self.activity_manager = None
+
+        # Cookie data from DB
+        self.cookies = []
+        self.user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
+
+    def _get_session(self):
+        """Get or create a curl_cffi session with browser TLS fingerprinting."""
+        if self._session is None:
+            from curl_cffi.requests import Session
+            # Try multiple browser versions for curl_cffi compatibility
+            for _browser in ("chrome131", "chrome136", "chrome"):
+                try:
+                    self._session = Session(impersonate=_browser)
+                    break
+                except Exception:
+                    continue
+            else:
+                self._session = Session()
+            self._session.headers.update({
+                'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+                'accept-language': 'en-US,en;q=0.9',
+                'cache-control': 'no-cache',
+            })
+            # Load cookies from database
+            self._load_cookies()
+        return self._session
+
+    def _load_cookies(self):
+        """Load cookies from database for authenticated requests."""
+        if not self.unified_db:
+            return
+
+        # Try snapchat_client cookies first, fall back to snapchat
+        for scraper_id in ['snapchat_client', 'snapchat']:
+            try:
+                cookies = self.unified_db.get_scraper_cookies(scraper_id)
+                if cookies:
+                    self.log(f"Loaded {len(cookies)} cookies from '{scraper_id}' scraper", "debug")
+                    self.cookies = cookies
+                    for cookie in cookies:
+                        name = cookie.get('name', '')
+                        value = cookie.get('value', '')
+                        domain = cookie.get('domain', '.snapchat.com')
+                        if name and value and self._session:
+                            self._session.cookies.set(name, value, domain=domain)
+
+                    # Check if we have a stored user-agent (important for cf_clearance match)
+                    try:
+                        import json as _json
+                        with self.unified_db.get_connection() as conn:
+                            cursor = conn.cursor()
+                            cursor.execute(
+                                "SELECT user_agent FROM scrapers WHERE id = ?",
+                                (scraper_id,)
+                            )
+                            row = cursor.fetchone()
+                            if row and row[0]:
+                                self.user_agent = row[0]
+                                if self._session:
+                                    self._session.headers['User-Agent'] = self.user_agent
+                    except Exception:
+                        pass
+
+                    return
+            except Exception as e:
+                self.log(f"Error loading cookies from '{scraper_id}': {e}", "debug")
+
+    def _fetch_page(self, url: str) -> Optional[str]:
+        """Fetch a page via HTTP and return the HTML content.
+
+        Tries story.snapchat.com first (no Cloudflare), falls back to www.snapchat.com.
+        """
+        session = self._get_session()
+
+        # If URL uses www.snapchat.com, try story.snapchat.com first
+        story_url = url.replace('www.snapchat.com', 'story.snapchat.com')
+        www_url = url.replace('story.snapchat.com', 'www.snapchat.com')
+
+        # Try story.snapchat.com first (likely no Cloudflare)
+        for attempt_url in [story_url, www_url]:
+            try:
+                resp = session.get(attempt_url, timeout=30)
+                if resp.status_code == 200 and '__NEXT_DATA__' in resp.text:
+                    return resp.text
+                elif resp.status_code == 403:
+                    self.log(f"403 Forbidden from {attempt_url.split('/@')[0]}", "debug")
+                    continue
+                elif resp.status_code != 200:
+                    self.log(f"HTTP {resp.status_code} from {attempt_url.split('/@')[0]}", "debug")
+                    continue
+            except Exception as e:
+                self.log(f"Error fetching {attempt_url.split('/@')[0]}: {e}", "debug")
+                continue
+
+        return None
+
+    def _extract_next_data(self, html: str) -> Optional[Dict]:
+        """Extract __NEXT_DATA__ JSON from HTML page."""
+        match = re.search(r'<script id="__NEXT_DATA__"[^>]*>(.*?)</script>', html, re.DOTALL)
+        if not match:
+            return None
+        try:
+            return json.loads(match.group(1))
+        except json.JSONDecodeError as e:
+            self.log(f"Failed to parse __NEXT_DATA__ JSON: {e}", "error")
+            return None
+
+    def get_profile_content(self, username: str) -> Dict[str, List]:
+        """Get all spotlight URLs, highlight URLs, and inline story/highlight data from a profile.
+
+        Parses __NEXT_DATA__ JSON to extract:
+        - spotlights: list of spotlight URL strings
+        - highlights: list of highlight URL strings
+        - story_collection: SnapCollection from story.snapList (recent stories), or None
+        - highlight_collections: list of SnapCollection from curatedHighlights (inline data)
+
+        The inline data avoids needing separate HTTP requests for stories and highlights.
+        """
+        result = {'spotlights': [], 'highlights': [], 'story_collection': None, 'highlight_collections': []}
+
+        url = f"https://story.snapchat.com/@{username}"
+        self.log(f"Fetching profile for @{username}", "info")
+
+        html = self._fetch_page(url)
+        if not html:
+            self.log(f"Failed to fetch profile page for @{username}", "warning")
+            return result
+
+        # Extract spotlight URLs via regex (still needed — spotlight metadata requires per-URL fetch)
+        spotlight_pattern = rf'/@{re.escape(username)}/spotlight/([A-Za-z0-9_-]+)'
+        spotlight_ids = list(set(re.findall(spotlight_pattern, html)))
+        result['spotlights'] = [
+            f"https://story.snapchat.com/@{username}/spotlight/{sid}"
+            for sid in spotlight_ids
+        ]
+        self.log(f"Found {len(result['spotlights'])} spotlights", "info")
+
+        # Parse __NEXT_DATA__ for stories and highlights (much more reliable than regex)
+        data = self._extract_next_data(html)
+        if not data:
+            # Fall back to regex for highlights
+            highlight_pattern = rf'/@{re.escape(username)}/highlight/([A-Za-z0-9-]+)'
+            highlight_ids = list(set(re.findall(highlight_pattern, html)))
+            result['highlights'] = [
+                f"https://story.snapchat.com/@{username}/highlight/{hid}"
+                for hid in highlight_ids
+            ]
+            self.log(f"Found {len(result['highlights'])} highlights (regex fallback)", "info")
+            return result
+
+        props = (data.get('props') or {}).get('pageProps') or {}
+
+        # Extract story snapList (recent stories — not available via individual URLs)
+        story = props.get('story') or {}
+        story_snaps = story.get('snapList') or []
+        if story_snaps:
+            story_id = story.get('storyId') or {}
+            if isinstance(story_id, dict):
+                story_id = story_id.get('value', 'story')
+            story_collection = SnapCollection(
+                collection_id=story_id or 'story',
+                collection_type='story',
+                title=story.get('storyTitle', '') or 'Stories',
+                username=username,
+                url=url
+            )
+            for snap_data in story_snaps:
+                snap = self._parse_snap_data(snap_data)
+                if snap:
+                    story_collection.snaps.append(snap)
+            if story_collection.snaps:
+                result['story_collection'] = story_collection
+                self.log(f"Found {len(story_collection.snaps)} story snaps", "info")
+
+        # Extract curatedHighlights inline (avoids per-highlight HTTP requests)
+        curated_highlights = props.get('curatedHighlights') or []
+        for highlight in curated_highlights:
+            highlight_id = highlight.get('highlightId') or {}
+            if isinstance(highlight_id, dict):
+                highlight_id = highlight_id.get('value', '')
+
+            title = highlight.get('storyTitle') or {}
+            if isinstance(title, dict):
+                title = title.get('value', '')
+
+            collection = SnapCollection(
+                collection_id=highlight_id,
+                collection_type='highlight',
+                title=title or 'Untitled Highlight',
+                username=username,
+                url=f"https://story.snapchat.com/@{username}/highlight/{highlight_id}"
+            )
+            for snap_data in highlight.get('snapList') or []:
+                snap = self._parse_snap_data(snap_data)
+                if snap:
+                    collection.snaps.append(snap)
+            if collection.snaps:
+                result['highlight_collections'].append(collection)
+
+        self.log(f"Found {len(result['highlight_collections'])} highlights (inline)", "info")
+
+        return result
+
+    def _parse_snap_data(self, snap_data: Dict) -> Optional[SnapMedia]:
+        """Parse a snap from __NEXT_DATA__ snapList into a SnapMedia object."""
+        snap_urls = snap_data.get('snapUrls') or {}
+        media_url = snap_urls.get('mediaUrl', '')
+        if not media_url:
+            return None
+
+        snap_id = (snap_data.get('snapId') or {}).get('value', '')
+        media_id = ''
+        if '/d/' in media_url:
+            media_id = media_url.split('/d/')[1].split('.')[0]
+
+        ts_str = (snap_data.get('timestampInSec') or {}).get('value', '0')
+        timestamp = datetime.fromtimestamp(int(ts_str)) if ts_str and ts_str != '0' else datetime.now()
+
+        lat = snap_data.get('lat')
+        lng = snap_data.get('lng')
+
+        return SnapMedia(
+            media_id=media_id or snap_id,
+            media_type='video' if snap_data.get('snapMediaType') == 1 else 'image',
+            media_url=media_url,
+            timestamp=timestamp,
+            index=snap_data.get('snapIndex', 0),
+            thumbnail_url=(snap_urls.get('mediaPreviewUrl') or {}).get('value', ''),
+            lat=float(lat) if lat else None,
+            lng=float(lng) if lng else None
+        )
+
+    def get_spotlight_metadata(self, url: str) -> Optional[SnapCollection]:
+        """Extract full metadata from a spotlight URL via __NEXT_DATA__."""
+        html = self._fetch_page(url)
+        if not html:
+            return None
+
+        data = self._extract_next_data(html)
+        if not data:
+            return None
+
+        props = (data.get('props') or {}).get('pageProps') or {}
+        feed = props.get('spotlightFeed') or {}
+        stories = feed.get('spotlightStories') or []
+
+        if not stories:
+            return None
+
+        story_data = stories[0]
+        story = story_data.get('story') or {}
+        metadata = (story_data.get('metadata') or {}).get('videoMetadata') or {}
+
+        story_id = (story.get('storyId') or {}).get('value', '')
+        creator = (metadata.get('creator') or {}).get('personCreator') or {}
+        username = creator.get('username', '')
+
+        collection = SnapCollection(
+            collection_id=story_id,
+            collection_type='spotlight',
+            title=metadata.get('description', ''),
+            username=username,
+            url=url
+        )
+
+        for snap_data in story.get('snapList') or []:
+            snap_id = (snap_data.get('snapId') or {}).get('value', '')
+            snap_urls = snap_data.get('snapUrls') or {}
+            media_url = snap_urls.get('mediaUrl', '')
+
+            media_id = ''
+            if '/d/' in media_url:
+                media_id = media_url.split('/d/')[1].split('.')[0]
+
+            ts_str = (snap_data.get('timestampInSec') or {}).get('value', '0')
+            timestamp = datetime.fromtimestamp(int(ts_str)) if ts_str else datetime.now()
+
+            snap = SnapMedia(
+                media_id=media_id or snap_id,
+                media_type='video' if snap_data.get('snapMediaType') == 1 else 'image',
+                media_url=media_url,
+                timestamp=timestamp,
+                index=snap_data.get('snapIndex', 0),
+                thumbnail_url=(snap_urls.get('mediaPreviewUrl') or {}).get('value', ''),
+                duration_ms=int(metadata.get('durationMs', 0)),
+                description=metadata.get('description', ''),
+                view_count=int(metadata.get('viewCount', 0)),
+                width=int(metadata.get('width', 540)),
+                height=int(metadata.get('height', 960))
+            )
+            collection.snaps.append(snap)
+
+        return collection
+
+    def get_highlight_metadata(self, url: str) -> Optional[SnapCollection]:
+        """Extract full metadata from a highlight URL via __NEXT_DATA__."""
+        html = self._fetch_page(url)
+        if not html:
+            return None
+
+        data = self._extract_next_data(html)
+        if not data:
+            return None
+
+        props = (data.get('props') or {}).get('pageProps') or {}
+        highlight = props.get('highlight') or {}
+
+        if not highlight:
+            return None
+
+        highlight_id = highlight.get('highlightId') or {}
+        if isinstance(highlight_id, dict):
+            highlight_id = highlight_id.get('value', '')
+
+        username_match = re.search(r'@([^/]+)', url)
+        username = username_match.group(1) if username_match else ''
+
+        title = highlight.get('storyTitle') or {}
+        if isinstance(title, dict):
+            title = title.get('value', '')
+
+        collection = SnapCollection(
+            collection_id=highlight_id,
+            collection_type='highlight',
+            title=title or 'Untitled Highlight',
+            username=username,
+            url=url
+        )
+
+        for snap_data in highlight.get('snapList') or []:
+            snap_urls = snap_data.get('snapUrls') or {}
+            media_url = snap_urls.get('mediaUrl', '')
+
+            media_id = ''
+            if '/d/' in media_url:
+                media_id = media_url.split('/d/')[1].split('.')[0]
+
+            ts_str = (snap_data.get('timestampInSec') or {}).get('value', '0')
+            timestamp = datetime.fromtimestamp(int(ts_str)) if ts_str else datetime.now()
+
+            lat = snap_data.get('lat')
+            lng = snap_data.get('lng')
+
+            snap = SnapMedia(
+                media_id=media_id,
+                media_type='video' if snap_data.get('snapMediaType') == 1 else 'image',
+                media_url=media_url,
+                timestamp=timestamp,
+                index=snap_data.get('snapIndex', 0),
+                thumbnail_url=(snap_urls.get('mediaPreviewUrl') or {}).get('value', ''),
+                lat=float(lat) if lat else None,
+                lng=float(lng) if lng else None
+            )
+            collection.snaps.append(snap)
+
+        return collection
+
+    def _download_media_file(self, snap: SnapMedia, output_path: str) -> bool:
+        """Download a single media file via curl_cffi."""
+        try:
+            url = snap.media_url.replace('&amp;', '&')
+            session = self._get_session()
+
+            resp = session.get(url, timeout=60)
+            if resp.status_code == 200 and len(resp.content) > 0:
+                os.makedirs(os.path.dirname(output_path), exist_ok=True)
+                with open(output_path, 'wb') as f:
+                    f.write(resp.content)
+                self._set_metadata(output_path, snap)
+                return True
+
+            self.log(f"Download failed: HTTP {resp.status_code}", "debug")
+            return False
+
+        except Exception as e:
+            self.log(f"Error downloading media: {e}", "error")
+            return False
+
+    def _set_metadata(self, file_path: str, snap: SnapMedia, description: str = None):
+        """Set EXIF metadata and file timestamp."""
+        try:
+            date_str = snap.timestamp.strftime('%Y:%m:%d %H:%M:%S')
+            desc = description or snap.description or ""
+            if snap.view_count:
+                desc += f" [Views: {snap.view_count}]"
+            desc = desc.strip()
+
+            ext = os.path.splitext(file_path)[1].lower()
+            is_video = ext in ['.mp4', '.mov', '.avi', '.webm']
+            is_image = ext in ['.jpg', '.jpeg', '.png', '.webp']
+
+            exif_args = [
+                'exiftool', '-overwrite_original', '-ignoreMinorErrors',
+                f'-FileModifyDate={date_str}',
+            ]
+
+            if is_image:
+                exif_args.extend([
+                    f'-DateTimeOriginal={date_str}',
+                    f'-CreateDate={date_str}',
+                    f'-ModifyDate={date_str}',
+                    f'-MetadataDate={date_str}',
+                ])
+                if desc:
+                    exif_args.extend([
+                        f'-ImageDescription={desc}',
+                        f'-XPComment={desc}',
+                        f'-UserComment={desc}',
+                    ])
+                if snap.lat and snap.lng:
+                    lat_ref = 'N' if snap.lat >= 0 else 'S'
+                    lng_ref = 'E' if snap.lng >= 0 else 'W'
+                    exif_args.extend([
+                        f'-GPSLatitude={abs(snap.lat)}',
+                        f'-GPSLatitudeRef={lat_ref}',
+                        f'-GPSLongitude={abs(snap.lng)}',
+                        f'-GPSLongitudeRef={lng_ref}',
+                    ])
+
+            elif is_video:
+                exif_args.extend([
+                    f'-CreateDate={date_str}',
+                    f'-ModifyDate={date_str}',
+                    f'-MediaCreateDate={date_str}',
+                    f'-MediaModifyDate={date_str}',
+                    f'-TrackCreateDate={date_str}',
+                    f'-TrackModifyDate={date_str}',
+                ])
+                if desc:
+                    exif_args.extend([
+                        f'-Description={desc}',
+                        f'-Comment={desc}',
+                    ])
+
+            exif_args.append(file_path)
+            subprocess.run(exif_args, capture_output=True, timeout=30)
+
+            # Set filesystem modification time
+            ts = snap.timestamp.timestamp()
+            os.utime(file_path, (ts, ts))
+
+        except Exception as e:
+            self.log(f"Warning: Could not set metadata for {file_path}: {e}", "debug")
+
+    def _generate_filename(self, username: str, snap: SnapMedia, ext: str) -> str:
+        """Generate filename with timestamp and media ID."""
+        date_str = snap.timestamp.strftime('%Y%m%d_%H%M%S')
+        return f"{username}_{date_str}_{snap.media_id}.{ext}"
+
+    def _get_processed_posts(self, username: str) -> Set[str]:
+        """Get set of media IDs that have been processed."""
+        processed = set()
+        if not self.db:
+            return processed
+
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    SELECT filename, metadata FROM downloads
+                    WHERE platform = 'snapchat'
+                    AND source = ?
+                ''', (username,))
+
+                for row in cursor.fetchall():
+                    filename, metadata_str = row
+                    if filename:
+                        parts = filename.split('_')
+                        if len(parts) >= 4:
+                            media_id = '_'.join(parts[3:]).split('.')[0]
+                            processed.add(media_id)
+
+                    if metadata_str:
+                        try:
+                            metadata = json.loads(metadata_str)
+                            if 'media_id' in metadata:
+                                processed.add(metadata['media_id'])
+                        except (json.JSONDecodeError, TypeError, KeyError):
+                            pass
+
+        except Exception as e:
+            self.log(f"Error loading processed posts: {e}", "debug")
+
+        return processed
+
+    def _record_download(self, username: str, url: str, filename: str,
+                         post_date=None, metadata: dict = None, file_path: str = None,
+                         deferred: bool = False):
+        """Record a download in the database."""
+        if deferred:
+            self.pending_downloads.append({
+                'username': username,
+                'url': url,
+                'filename': filename,
+                'post_date': post_date.isoformat() if hasattr(post_date, 'isoformat') else post_date,
+                'file_path': file_path,
+                'metadata': metadata
+            })
+            return True
+
+        if not self.db:
+            return
+
+        try:
+            self.db.mark_downloaded(
+                username=username,
+                url=url,
+                filename=filename,
+                post_date=post_date,
+                metadata=metadata,
+                file_path=file_path
+            )
+        except Exception as e:
+            self.log(f"Failed to record download: {e}", "debug")
+
+    def get_pending_downloads(self) -> list:
+        """Get list of pending downloads for deferred recording."""
+        return self.pending_downloads
+
+    def clear_pending_downloads(self):
+        """Clear pending downloads list."""
+        self.pending_downloads = []
+
+    def download(self, username: str, content_type: str = "all", days_back: int = 14,
+                 max_downloads: int = 50, output_dir: str = None,
+                 spotlight_dir: str = None, stories_dir: str = None,
+                 stitch_highlights: bool = True, defer_database: bool = False,
+                 phrase_config: dict = None) -> int:
+        """Download content from a user - compatible with media-downloader interface.
+
+        Args:
+            username: Snapchat username
+            content_type: "spotlight", "stories", "highlights", or "all"
+            days_back: How many days back to download (filters by post date)
+            max_downloads: Maximum items to download per content type
+            output_dir: Default output directory (used if specific dirs not set)
+            spotlight_dir: Output directory for spotlights
+            stories_dir: Output directory for stories/highlights
+            stitch_highlights: Ignored (kept for backwards compatibility)
+            defer_database: If True, defer database recording
+            phrase_config: Not used (for interface compatibility)
+
+        Returns:
+            Number of files downloaded
+        """
+        self.defer_database = defer_database
+        self.downloaded_files.clear()
+
+        # Set output directories
+        if spotlight_dir:
+            spotlight_output = Path(spotlight_dir)
+        elif output_dir:
+            spotlight_output = Path(output_dir)
+        else:
+            spotlight_output = Path(f"/opt/media-downloader/downloads/snapchat_client/spotlight/{username}")
+
+        if stories_dir:
+            stories_output = Path(stories_dir)
+        elif output_dir:
+            stories_output = Path(output_dir)
+        else:
+            stories_output = Path(f"/opt/media-downloader/downloads/snapchat_client/stories/{username}")
+
+        spotlight_output.mkdir(parents=True, exist_ok=True)
+        stories_output.mkdir(parents=True, exist_ok=True)
+
+        # Update activity status
+        if self.activity_manager:
+            self.activity_manager.update_status("Checking Snapchat")
+
+        # Get processed posts (shared with snapchat module - both use platform='snapchat')
+        processed = self._get_processed_posts(username)
+        self.log(f"Loaded {len(processed)} processed posts from database", "debug")
+
+        cutoff_date = datetime.now() - timedelta(days=days_back)
+        downloaded_count = 0
+
+        # Crash recovery checkpoint
+        from modules.task_checkpoint import TaskCheckpoint
+        checkpoint = TaskCheckpoint(f'snapchat_client:{username}', 'scraping')
+
+        try:
+            # Get profile content via HTTP
+            content = self.get_profile_content(username)
+
+            # Count total items for checkpoint
+            total_items = 0
+            if content_type in ['spotlight', 'all'] and content['spotlights']:
+                total_items += min(len(content['spotlights']), max_downloads)
+            if content_type in ['stories', 'highlights', 'all'] and content['highlights']:
+                total_items += min(len(content['highlights']), max_downloads)
+            checkpoint.start(total_items=total_items)
+            if checkpoint.is_recovering():
+                self.log(f"Snapchat Client @{username}: recovering — skipping already-processed URLs", "info")
+
+            # Download spotlights
+            if content_type in ['spotlight', 'all'] and content['spotlights']:
+                spotlight_items = content['spotlights'][:max_downloads]
+                self.log(f"Processing {len(spotlight_items)} spotlights...", "info")
+
+                if self.activity_manager:
+                    self.activity_manager.update_status(
+                        "Downloading spotlights",
+                        progress_current=0,
+                        progress_total=len(spotlight_items)
+                    )
+
+                for spot_idx, url in enumerate(spotlight_items):
+                    if self.activity_manager:
+                        self.activity_manager.update_status(
+                            "Downloading spotlights",
+                            progress_current=spot_idx + 1,
+                            progress_total=len(spotlight_items)
+                        )
+
+                    if checkpoint.is_completed(url):
+                        continue
+
+                    checkpoint.set_current(url)
+
+                    try:
+                        # Rate limit between page fetches
+                        if spot_idx > 0:
+                            time.sleep(random.uniform(1.5, 2.5))
+
+                        spotlight = self.get_spotlight_metadata(url)
+                        if not spotlight or not spotlight.snaps:
+                            continue
+
+                        snap = spotlight.snaps[0]
+
+                        # Check date filter
+                        if snap.timestamp < cutoff_date:
+                            self.log(f"Spotlight {snap.media_id} is older than {days_back} days, skipping", "debug")
+                            continue
+
+                        # Check if already processed
+                        if snap.media_id in processed or snap.media_id in self.downloaded_files:
+                            self.log(f"Spotlight {snap.media_id} already processed, skipping", "debug")
+                            continue
+
+                        # Download
+                        ext = 'mp4' if snap.media_type == 'video' else 'jpg'
+                        filename = self._generate_filename(username, snap, ext)
+                        output_path = str(spotlight_output / filename)
+
+                        # Rate limit between CDN downloads
+                        time.sleep(random.uniform(0.3, 0.5))
+
+                        if self._download_media_file(snap, output_path):
+                            self.downloaded_files.add(snap.media_id)
+                            downloaded_count += 1
+                            self.log(f"Downloaded spotlight: {filename}", "info")
+
+                            self._record_download(
+                                username=username,
+                                url=url,
+                                filename=filename,
+                                post_date=snap.timestamp,
+                                metadata={
+                                    'media_id': snap.media_id,
+                                    'description': snap.description,
+                                    'view_count': snap.view_count,
+                                    'content_type': 'spotlight'
+                                },
+                                file_path=output_path,
+                                deferred=defer_database
+                            )
+
+                    except Exception as e:
+                        self.log(f"Error processing spotlight: {e}", "error")
+
+                    checkpoint.mark_completed(url)
+
+            # Rate limit between content types
+            if content_type == 'all' and content['spotlights'] and content['highlights']:
+                time.sleep(random.uniform(2, 3))
+
+            # Download highlights (stories)
+            if content_type in ['stories', 'highlights', 'all'] and content['highlights']:
+                highlight_items = content['highlights'][:max_downloads]
+                self.log(f"Processing {len(highlight_items)} highlights...", "info")
+
+                if self.activity_manager:
+                    self.activity_manager.update_status(
+                        "Downloading highlights",
+                        progress_current=0,
+                        progress_total=len(highlight_items)
+                    )
+
+                for hi_idx, url in enumerate(highlight_items):
+                    if self.activity_manager:
+                        self.activity_manager.update_status(
+                            "Downloading highlights",
+                            progress_current=hi_idx + 1,
+                            progress_total=len(highlight_items)
+                        )
+
+                    if checkpoint.is_completed(url):
+                        continue
+
+                    checkpoint.set_current(url)
+
+                    try:
+                        # Rate limit between page fetches
+                        if hi_idx > 0:
+                            time.sleep(random.uniform(1.5, 2.5))
+
+                        highlight = self.get_highlight_metadata(url)
+                        if not highlight or not highlight.snaps:
+                            continue
+
+                        # Check if any snap is within date range
+                        newest_snap = max(highlight.snaps, key=lambda s: s.timestamp)
+                        if newest_snap.timestamp < cutoff_date:
+                            self.log(f"Highlight {highlight.collection_id} is older than {days_back} days, skipping", "debug")
+                            continue
+
+                        # Check if already processed
+                        if highlight.collection_id in processed or highlight.collection_id in self.downloaded_files:
+                            self.log(f"Highlight {highlight.collection_id} already processed, skipping", "debug")
+                            continue
+
+                        # Separate videos and images
+                        videos = [s for s in highlight.snaps if s.media_type == 'video']
+                        images = [s for s in highlight.snaps if s.media_type == 'image']
+
+                        # Download images individually
+                        for snap in images:
+                            if snap.timestamp < cutoff_date:
+                                continue
+                            if snap.media_id in processed or snap.media_id in self.downloaded_files:
+                                continue
+
+                            time.sleep(random.uniform(0.3, 0.5))
+
+                            filename = self._generate_filename(username, snap, 'jpg')
+                            output_path = str(stories_output / filename)
+
+                            if self._download_media_file(snap, output_path):
+                                self.downloaded_files.add(snap.media_id)
+                                downloaded_count += 1
+                                self.log(f"Downloaded image: {filename}", "info")
+
+                                self._record_download(
+                                    username=username,
+                                    url=highlight.url,
+                                    filename=filename,
+                                    post_date=snap.timestamp,
+                                    metadata={
+                                        'media_id': snap.media_id,
+                                        'highlight_id': highlight.collection_id,
+                                        'content_type': 'highlight_image'
+                                    },
+                                    file_path=output_path,
+                                    deferred=defer_database
+                                )
+
+                        # Download videos individually
+                        for snap in videos:
+                            if snap.timestamp < cutoff_date:
+                                continue
+                            if snap.media_id in processed or snap.media_id in self.downloaded_files:
+                                continue
+
+                            time.sleep(random.uniform(0.3, 0.5))
+
+                            filename = self._generate_filename(username, snap, 'mp4')
+                            output_path = str(stories_output / filename)
+
+                            if self._download_media_file(snap, output_path):
+                                self._set_metadata(output_path, snap)
+                                self.downloaded_files.add(snap.media_id)
+                                downloaded_count += 1
+                                self.log(f"Downloaded video: {filename}", "info")
+
+                                self._record_download(
+                                    username=username,
+                                    url=highlight.url,
+                                    filename=filename,
+                                    post_date=snap.timestamp,
+                                    metadata={
+                                        'media_id': snap.media_id,
+                                        'highlight_id': highlight.collection_id,
+                                        'content_type': 'highlight_video'
+                                    },
+                                    file_path=output_path,
+                                    deferred=defer_database
+                                )
+
+                    except Exception as e:
+                        self.log(f"Error processing highlight: {e}", "error")
+
+                    checkpoint.mark_completed(url)
+
+        except Exception as e:
+            self.log(f"Error during download: {e}", "error")
+
+        checkpoint.finish()
+        self.log(f"Downloaded {downloaded_count} files for @{username}", "info")
+        return downloaded_count