Initial commit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-29 22:42:55 -04:00
commit 0d7b2b1aab
389 changed files with 280296 additions and 0 deletions
--- a/modules/paid_content/snapchat_client.py
+++ b/modules/paid_content/snapchat_client.py
@@ -0,0 +1,259 @@
+"""
+Snapchat Client for Paid Content - Wraps SnapchatClientDownloader for paid content system.
+
+Maps spotlights and highlights to the Post/Attachment model used by the paid content scraper.
+"""
+
+from datetime import datetime
+from typing import Dict, List, Optional
+
+from modules.base_module import LoggingMixin
+from .models import Creator, Post, Attachment
+
+
+class SnapchatPaidContentClient(LoggingMixin):
+    """
+    Client for fetching Snapchat creator content via the existing SnapchatClientDownloader.
+
+    Each spotlight/highlight collection maps to one Post with snaps as Attachments.
+    """
+
+    SERVICE_ID = 'snapchat'
+    PLATFORM = 'snapchat'
+
+    def __init__(self, unified_db=None, log_callback=None):
+        self._init_logger('PaidContent', log_callback, default_module='Snapchat')
+        self.unified_db = unified_db
+        self._downloader = None
+
+    def _get_downloader(self):
+        """Lazy-init the underlying SnapchatClientDownloader."""
+        if self._downloader is None:
+            from modules.snapchat_client_module import SnapchatClientDownloader
+            self._downloader = SnapchatClientDownloader(
+                show_progress=False,
+                use_database=False,
+                log_callback=self.log_callback,
+                unified_db=self.unified_db,
+            )
+        return self._downloader
+
+    def get_creator_info(self, username: str) -> Optional[Dict]:
+        """Get creator information from profile page __NEXT_DATA__.
+
+        Returns dict with display_name and avatar_url if found.
+        """
+        downloader = self._get_downloader()
+
+        profile_url = f"https://story.snapchat.com/@{username}"
+        html = downloader._fetch_page(profile_url)
+        if not html:
+            return {'creator_id': username, 'creator_name': username}
+
+        data = downloader._extract_next_data(html)
+        display_name = username
+        avatar_url = None
+
+        if data:
+            props = data.get('props', {}).get('pageProps', {})
+
+            # userProfile uses a $case/userInfo wrapper
+            user_profile = props.get('userProfile', {})
+            user_info = user_profile.get('userInfo', {})
+            if user_info:
+                name = user_info.get('displayName', '').strip()
+                if name:
+                    display_name = name
+
+                # Bitmoji 3D avatar URL (best quality)
+                bitmoji = user_info.get('bitmoji3d') or {}
+                if isinstance(bitmoji, dict):
+                    avatar_url = bitmoji.get('avatarUrl') or bitmoji.get('url')
+
+            # linkPreview OG images as avatar (preview/square.jpeg — good quality)
+            if not avatar_url:
+                link_preview = props.get('linkPreview', {})
+                for img_key in ('facebookImage', 'twitterImage'):
+                    img = link_preview.get(img_key, {})
+                    if isinstance(img, dict) and img.get('url'):
+                        avatar_url = img['url']
+                        break
+
+            # pageMetadata.pageTitle sometimes has the display name
+            if display_name == username:
+                page_meta = props.get('pageMetadata', {})
+                page_title = page_meta.get('pageTitle', '')
+                # Format: "DisplayName (@username) | Snapchat..."
+                if page_title and '(@' in page_title:
+                    name_part = page_title.split('(@')[0].strip()
+                    if name_part:
+                        display_name = name_part
+
+        return {
+            'creator_id': username,
+            'creator_name': display_name,
+            'profile_image_url': avatar_url,
+        }
+
+    def get_creator(self, username: str) -> Optional[Creator]:
+        """Get Creator model for a Snapchat user."""
+        info = self.get_creator_info(username)
+        if not info:
+            return None
+
+        return Creator(
+            creator_id=username,
+            service_id=self.SERVICE_ID,
+            platform=self.PLATFORM,
+            username=info.get('creator_name', username),
+            display_name=info.get('creator_name'),
+            profile_image_url=info.get('profile_image_url'),
+        )
+
+    def get_posts(self, username: str, since_date: str = None) -> List[Post]:
+        """Fetch spotlights and highlights as Post objects.
+
+        Args:
+            username: Snapchat username (without @)
+            since_date: ISO date string; skip snaps older than this
+
+        Returns:
+            List of Post objects (one per spotlight/highlight collection)
+        """
+        downloader = self._get_downloader()
+
+        # Parse cutoff date
+        cutoff_dt = None
+        if since_date:
+            try:
+                if 'T' in since_date:
+                    cutoff_dt = datetime.fromisoformat(since_date.replace('Z', '+00:00').replace('+00:00', ''))
+                else:
+                    cutoff_dt = datetime.strptime(since_date[:10], '%Y-%m-%d')
+            except (ValueError, IndexError):
+                pass
+
+        # Discover content from profile (spotlights, highlights, stories)
+        profile_content = downloader.get_profile_content(username)
+        self.log(f"Found {len(profile_content.get('spotlights', []))} spotlights, "
+                 f"{len(profile_content.get('highlight_collections', []))} highlights, "
+                 f"{'stories' if profile_content.get('story_collection') else 'no stories'} "
+                 f"for @{username}", 'info')
+
+        posts = []
+
+        # Process story snaps (inline from profile page — no extra HTTP requests)
+        story_collection = profile_content.get('story_collection')
+        if story_collection and story_collection.snaps:
+            post = self._collection_to_post(story_collection, username, cutoff_dt)
+            if post and post.attachments:
+                posts.append(post)
+
+        # Process highlights (inline from profile page — no extra HTTP requests)
+        for collection in profile_content.get('highlight_collections', []):
+            post = self._collection_to_post(collection, username, cutoff_dt)
+            if post and post.attachments:
+                posts.append(post)
+
+        # Process spotlights (still requires per-URL fetch for full metadata)
+        for url in profile_content.get('spotlights', []):
+            collection = downloader.get_spotlight_metadata(url)
+            if not collection:
+                continue
+            post = self._collection_to_post(collection, username, cutoff_dt)
+            if post and post.attachments:
+                posts.append(post)
+
+        self.log(f"Mapped {len(posts)} posts with attachments for @{username}", 'info')
+        return posts
+
+    def _collection_to_post(self, collection, username: str, cutoff_dt=None) -> Optional[Post]:
+        """Convert a SnapCollection to a Post with Attachments."""
+        if not collection.snaps:
+            return None
+
+        # Use the earliest snap timestamp as the post date
+        timestamps = [s.timestamp for s in collection.snaps if s.timestamp]
+        if timestamps:
+            earliest = min(timestamps)
+            published_at = earliest.strftime('%Y-%m-%d')
+        else:
+            published_at = None
+
+        # Skip if all snaps are older than cutoff
+        if cutoff_dt and timestamps:
+            latest = max(timestamps)
+            if latest < cutoff_dt:
+                return None
+
+        attachments = []
+        for snap in collection.snaps:
+            if not snap.media_url:
+                continue
+
+            # Determine extension from media type
+            ext = '.mp4' if snap.media_type == 'video' else '.jpg'
+            name = f"{snap.media_id}{ext}" if snap.media_id else f"snap_{snap.index}{ext}"
+
+            attachment = Attachment(
+                name=name,
+                file_type=snap.media_type,
+                extension=ext,
+                server_path=snap.media_url,
+                download_url=snap.media_url,
+                width=snap.width if snap.width else None,
+                height=snap.height if snap.height else None,
+                duration=snap.duration_ms // 1000 if snap.duration_ms else None,
+            )
+            attachments.append(attachment)
+
+        if not attachments:
+            return None
+
+        # Build content/title from collection metadata
+        title = collection.title or None
+        content = collection.title if collection.title else None
+
+        # Tag as spotlight or highlight
+        tag_name = collection.collection_type.title()  # "Spotlight" or "Highlight"
+
+        return Post(
+            post_id=collection.collection_id,
+            service_id=self.SERVICE_ID,
+            platform=self.PLATFORM,
+            creator_id=username,
+            title=title,
+            content=content,
+            published_at=published_at,
+            attachments=attachments,
+            auto_tags=[tag_name],
+        )
+
+    def download_snap(self, media_url: str, output_path: str) -> bool:
+        """Download a single snap file via curl_cffi.
+
+        Args:
+            media_url: Direct URL to the media file
+            output_path: Local path to save the file
+
+        Returns:
+            True if download succeeded
+        """
+        import os
+        downloader = self._get_downloader()
+        session = downloader._get_session()
+
+        try:
+            url = media_url.replace('&amp;', '&')
+            resp = session.get(url, timeout=60)
+            if resp.status_code == 200 and len(resp.content) > 0:
+                os.makedirs(os.path.dirname(output_path), exist_ok=True)
+                with open(output_path, 'wb') as f:
+                    f.write(resp.content)
+                return True
+            else:
+                self.log(f"Download failed: HTTP {resp.status_code}, size={len(resp.content)}", 'warning')
+                return False
+        except Exception as e:
+            self.log(f"Download error: {e}", 'error')
+            return False