media-downloader/modules/paid_content/snapchat_client.py

"""
Snapchat Client for Paid Content - Wraps SnapchatClientDownloader for paid content system.

Maps spotlights and highlights to the Post/Attachment model used by the paid content scraper.
"""

from datetime import datetime
from typing import Dict, List, Optional

from modules.base_module import LoggingMixin
from .models import Creator, Post, Attachment


class SnapchatPaidContentClient(LoggingMixin):
    """
    Client for fetching Snapchat creator content via the existing SnapchatClientDownloader.

    Each spotlight/highlight collection maps to one Post with snaps as Attachments.
    """

    SERVICE_ID = 'snapchat'
    PLATFORM = 'snapchat'

    def __init__(self, unified_db=None, log_callback=None):
        self._init_logger('PaidContent', log_callback, default_module='Snapchat')
        self.unified_db = unified_db
        self._downloader = None

    def _get_downloader(self):
        """Lazy-init the underlying SnapchatClientDownloader."""
        if self._downloader is None:
            from modules.snapchat_client_module import SnapchatClientDownloader
            self._downloader = SnapchatClientDownloader(
                show_progress=False,
                use_database=False,
                log_callback=self.log_callback,
                unified_db=self.unified_db,
            )
        return self._downloader

    def get_creator_info(self, username: str) -> Optional[Dict]:
        """Get creator information from profile page __NEXT_DATA__.

        Returns dict with display_name and avatar_url if found.
        """
        downloader = self._get_downloader()

        profile_url = f"https://story.snapchat.com/@{username}"
        html = downloader._fetch_page(profile_url)
        if not html:
            return {'creator_id': username, 'creator_name': username}

        data = downloader._extract_next_data(html)
        display_name = username
        avatar_url = None

        if data:
            props = data.get('props', {}).get('pageProps', {})

            # userProfile uses a $case/userInfo wrapper
            user_profile = props.get('userProfile', {})
            user_info = user_profile.get('userInfo', {})
            if user_info:
                name = user_info.get('displayName', '').strip()
                if name:
                    display_name = name

                # Bitmoji 3D avatar URL (best quality)
                bitmoji = user_info.get('bitmoji3d') or {}
                if isinstance(bitmoji, dict):
                    avatar_url = bitmoji.get('avatarUrl') or bitmoji.get('url')

            # linkPreview OG images as avatar (preview/square.jpeg — good quality)
            if not avatar_url:
                link_preview = props.get('linkPreview', {})
                for img_key in ('facebookImage', 'twitterImage'):
                    img = link_preview.get(img_key, {})
                    if isinstance(img, dict) and img.get('url'):
                        avatar_url = img['url']
                        break

            # pageMetadata.pageTitle sometimes has the display name
            if display_name == username:
                page_meta = props.get('pageMetadata', {})
                page_title = page_meta.get('pageTitle', '')
                # Format: "DisplayName (@username) | Snapchat..."
                if page_title and '(@' in page_title:
                    name_part = page_title.split('(@')[0].strip()
                    if name_part:
                        display_name = name_part

        return {
            'creator_id': username,
            'creator_name': display_name,
            'profile_image_url': avatar_url,
        }

    def get_creator(self, username: str) -> Optional[Creator]:
        """Get Creator model for a Snapchat user."""
        info = self.get_creator_info(username)
        if not info:
            return None

        return Creator(
            creator_id=username,
            service_id=self.SERVICE_ID,
            platform=self.PLATFORM,
            username=info.get('creator_name', username),
            display_name=info.get('creator_name'),
            profile_image_url=info.get('profile_image_url'),
        )

    def get_posts(self, username: str, since_date: str = None) -> List[Post]:
        """Fetch spotlights and highlights as Post objects.

        Args:
            username: Snapchat username (without @)
            since_date: ISO date string; skip snaps older than this

        Returns:
            List of Post objects (one per spotlight/highlight collection)
        """
        downloader = self._get_downloader()

        # Parse cutoff date
        cutoff_dt = None
        if since_date:
            try:
                if 'T' in since_date:
                    cutoff_dt = datetime.fromisoformat(since_date.replace('Z', '+00:00').replace('+00:00', ''))
                else:
                    cutoff_dt = datetime.strptime(since_date[:10], '%Y-%m-%d')
            except (ValueError, IndexError):
                pass

        # Discover content from profile (spotlights, highlights, stories)
        profile_content = downloader.get_profile_content(username)
        self.log(f"Found {len(profile_content.get('spotlights', []))} spotlights, "
                 f"{len(profile_content.get('highlight_collections', []))} highlights, "
                 f"{'stories' if profile_content.get('story_collection') else 'no stories'} "
                 f"for @{username}", 'info')

        posts = []

        # Process story snaps (inline from profile page — no extra HTTP requests)
        story_collection = profile_content.get('story_collection')
        if story_collection and story_collection.snaps:
            post = self._collection_to_post(story_collection, username, cutoff_dt)
            if post and post.attachments:
                posts.append(post)

        # Process highlights (inline from profile page — no extra HTTP requests)
        for collection in profile_content.get('highlight_collections', []):
            post = self._collection_to_post(collection, username, cutoff_dt)
            if post and post.attachments:
                posts.append(post)

        # Process spotlights (still requires per-URL fetch for full metadata)
        for url in profile_content.get('spotlights', []):
            collection = downloader.get_spotlight_metadata(url)
            if not collection:
                continue
            post = self._collection_to_post(collection, username, cutoff_dt)
            if post and post.attachments:
                posts.append(post)

        self.log(f"Mapped {len(posts)} posts with attachments for @{username}", 'info')
        return posts

    def _collection_to_post(self, collection, username: str, cutoff_dt=None) -> Optional[Post]:
        """Convert a SnapCollection to a Post with Attachments."""
        if not collection.snaps:
            return None

        # Use the earliest snap timestamp as the post date
        timestamps = [s.timestamp for s in collection.snaps if s.timestamp]
        if timestamps:
            earliest = min(timestamps)
            published_at = earliest.strftime('%Y-%m-%d')
        else:
            published_at = None

        # Skip if all snaps are older than cutoff
        if cutoff_dt and timestamps:
            latest = max(timestamps)
            if latest < cutoff_dt:
                return None

        attachments = []
        for snap in collection.snaps:
            if not snap.media_url:
                continue

            # Determine extension from media type
            ext = '.mp4' if snap.media_type == 'video' else '.jpg'
            name = f"{snap.media_id}{ext}" if snap.media_id else f"snap_{snap.index}{ext}"

            attachment = Attachment(
                name=name,
                file_type=snap.media_type,
                extension=ext,
                server_path=snap.media_url,
                download_url=snap.media_url,
                width=snap.width if snap.width else None,
                height=snap.height if snap.height else None,
                duration=snap.duration_ms // 1000 if snap.duration_ms else None,
            )
            attachments.append(attachment)

        if not attachments:
            return None

        # Build content/title from collection metadata
        title = collection.title or None
        content = collection.title if collection.title else None

        # Tag as spotlight or highlight
        tag_name = collection.collection_type.title()  # "Spotlight" or "Highlight"

        return Post(
            post_id=collection.collection_id,
            service_id=self.SERVICE_ID,
            platform=self.PLATFORM,
            creator_id=username,
            title=title,
            content=content,
            published_at=published_at,
            attachments=attachments,
            auto_tags=[tag_name],
        )

    def download_snap(self, media_url: str, output_path: str) -> bool:
        """Download a single snap file via curl_cffi.

        Args:
            media_url: Direct URL to the media file
            output_path: Local path to save the file

        Returns:
            True if download succeeded
        """
        import os
        downloader = self._get_downloader()
        session = downloader._get_session()

        try:
            url = media_url.replace('&amp;', '&')
            resp = session.get(url, timeout=60)
            if resp.status_code == 200 and len(resp.content) > 0:
                os.makedirs(os.path.dirname(output_path), exist_ok=True)
                with open(output_path, 'wb') as f:
                    f.write(resp.content)
                return True
            else:
                self.log(f"Download failed: HTTP {resp.status_code}, size={len(resp.content)}", 'warning')
                return False
        except Exception as e:
            self.log(f"Download error: {e}", 'error')
            return False