Initial commit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Todd
2026-03-29 22:42:55 -04:00
commit 0d7b2b1aab
389 changed files with 280296 additions and 0 deletions

View File

@@ -0,0 +1,259 @@
"""
Snapchat Client for Paid Content - Wraps SnapchatClientDownloader for paid content system.
Maps spotlights and highlights to the Post/Attachment model used by the paid content scraper.
"""
from datetime import datetime
from typing import Dict, List, Optional
from modules.base_module import LoggingMixin
from .models import Creator, Post, Attachment
class SnapchatPaidContentClient(LoggingMixin):
"""
Client for fetching Snapchat creator content via the existing SnapchatClientDownloader.
Each spotlight/highlight collection maps to one Post with snaps as Attachments.
"""
SERVICE_ID = 'snapchat'
PLATFORM = 'snapchat'
def __init__(self, unified_db=None, log_callback=None):
self._init_logger('PaidContent', log_callback, default_module='Snapchat')
self.unified_db = unified_db
self._downloader = None
def _get_downloader(self):
"""Lazy-init the underlying SnapchatClientDownloader."""
if self._downloader is None:
from modules.snapchat_client_module import SnapchatClientDownloader
self._downloader = SnapchatClientDownloader(
show_progress=False,
use_database=False,
log_callback=self.log_callback,
unified_db=self.unified_db,
)
return self._downloader
def get_creator_info(self, username: str) -> Optional[Dict]:
"""Get creator information from profile page __NEXT_DATA__.
Returns dict with display_name and avatar_url if found.
"""
downloader = self._get_downloader()
profile_url = f"https://story.snapchat.com/@{username}"
html = downloader._fetch_page(profile_url)
if not html:
return {'creator_id': username, 'creator_name': username}
data = downloader._extract_next_data(html)
display_name = username
avatar_url = None
if data:
props = data.get('props', {}).get('pageProps', {})
# userProfile uses a $case/userInfo wrapper
user_profile = props.get('userProfile', {})
user_info = user_profile.get('userInfo', {})
if user_info:
name = user_info.get('displayName', '').strip()
if name:
display_name = name
# Bitmoji 3D avatar URL (best quality)
bitmoji = user_info.get('bitmoji3d') or {}
if isinstance(bitmoji, dict):
avatar_url = bitmoji.get('avatarUrl') or bitmoji.get('url')
# linkPreview OG images as avatar (preview/square.jpeg — good quality)
if not avatar_url:
link_preview = props.get('linkPreview', {})
for img_key in ('facebookImage', 'twitterImage'):
img = link_preview.get(img_key, {})
if isinstance(img, dict) and img.get('url'):
avatar_url = img['url']
break
# pageMetadata.pageTitle sometimes has the display name
if display_name == username:
page_meta = props.get('pageMetadata', {})
page_title = page_meta.get('pageTitle', '')
# Format: "DisplayName (@username) | Snapchat..."
if page_title and '(@' in page_title:
name_part = page_title.split('(@')[0].strip()
if name_part:
display_name = name_part
return {
'creator_id': username,
'creator_name': display_name,
'profile_image_url': avatar_url,
}
def get_creator(self, username: str) -> Optional[Creator]:
"""Get Creator model for a Snapchat user."""
info = self.get_creator_info(username)
if not info:
return None
return Creator(
creator_id=username,
service_id=self.SERVICE_ID,
platform=self.PLATFORM,
username=info.get('creator_name', username),
display_name=info.get('creator_name'),
profile_image_url=info.get('profile_image_url'),
)
def get_posts(self, username: str, since_date: str = None) -> List[Post]:
"""Fetch spotlights and highlights as Post objects.
Args:
username: Snapchat username (without @)
since_date: ISO date string; skip snaps older than this
Returns:
List of Post objects (one per spotlight/highlight collection)
"""
downloader = self._get_downloader()
# Parse cutoff date
cutoff_dt = None
if since_date:
try:
if 'T' in since_date:
cutoff_dt = datetime.fromisoformat(since_date.replace('Z', '+00:00').replace('+00:00', ''))
else:
cutoff_dt = datetime.strptime(since_date[:10], '%Y-%m-%d')
except (ValueError, IndexError):
pass
# Discover content from profile (spotlights, highlights, stories)
profile_content = downloader.get_profile_content(username)
self.log(f"Found {len(profile_content.get('spotlights', []))} spotlights, "
f"{len(profile_content.get('highlight_collections', []))} highlights, "
f"{'stories' if profile_content.get('story_collection') else 'no stories'} "
f"for @{username}", 'info')
posts = []
# Process story snaps (inline from profile page — no extra HTTP requests)
story_collection = profile_content.get('story_collection')
if story_collection and story_collection.snaps:
post = self._collection_to_post(story_collection, username, cutoff_dt)
if post and post.attachments:
posts.append(post)
# Process highlights (inline from profile page — no extra HTTP requests)
for collection in profile_content.get('highlight_collections', []):
post = self._collection_to_post(collection, username, cutoff_dt)
if post and post.attachments:
posts.append(post)
# Process spotlights (still requires per-URL fetch for full metadata)
for url in profile_content.get('spotlights', []):
collection = downloader.get_spotlight_metadata(url)
if not collection:
continue
post = self._collection_to_post(collection, username, cutoff_dt)
if post and post.attachments:
posts.append(post)
self.log(f"Mapped {len(posts)} posts with attachments for @{username}", 'info')
return posts
def _collection_to_post(self, collection, username: str, cutoff_dt=None) -> Optional[Post]:
"""Convert a SnapCollection to a Post with Attachments."""
if not collection.snaps:
return None
# Use the earliest snap timestamp as the post date
timestamps = [s.timestamp for s in collection.snaps if s.timestamp]
if timestamps:
earliest = min(timestamps)
published_at = earliest.strftime('%Y-%m-%d')
else:
published_at = None
# Skip if all snaps are older than cutoff
if cutoff_dt and timestamps:
latest = max(timestamps)
if latest < cutoff_dt:
return None
attachments = []
for snap in collection.snaps:
if not snap.media_url:
continue
# Determine extension from media type
ext = '.mp4' if snap.media_type == 'video' else '.jpg'
name = f"{snap.media_id}{ext}" if snap.media_id else f"snap_{snap.index}{ext}"
attachment = Attachment(
name=name,
file_type=snap.media_type,
extension=ext,
server_path=snap.media_url,
download_url=snap.media_url,
width=snap.width if snap.width else None,
height=snap.height if snap.height else None,
duration=snap.duration_ms // 1000 if snap.duration_ms else None,
)
attachments.append(attachment)
if not attachments:
return None
# Build content/title from collection metadata
title = collection.title or None
content = collection.title if collection.title else None
# Tag as spotlight or highlight
tag_name = collection.collection_type.title() # "Spotlight" or "Highlight"
return Post(
post_id=collection.collection_id,
service_id=self.SERVICE_ID,
platform=self.PLATFORM,
creator_id=username,
title=title,
content=content,
published_at=published_at,
attachments=attachments,
auto_tags=[tag_name],
)
def download_snap(self, media_url: str, output_path: str) -> bool:
"""Download a single snap file via curl_cffi.
Args:
media_url: Direct URL to the media file
output_path: Local path to save the file
Returns:
True if download succeeded
"""
import os
downloader = self._get_downloader()
session = downloader._get_session()
try:
url = media_url.replace('&amp;', '&')
resp = session.get(url, timeout=60)
if resp.status_code == 200 and len(resp.content) > 0:
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'wb') as f:
f.write(resp.content)
return True
else:
self.log(f"Download failed: HTTP {resp.status_code}, size={len(resp.content)}", 'warning')
return False
except Exception as e:
self.log(f"Download error: {e}", 'error')
return False