media-downloader/modules/paid_content/models.py

"""
Pydantic models for Paid Content feature
"""

from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict, List, Optional, Any


@dataclass
class Attachment:
    """Represents a file attachment from a post"""
    name: str
    server_path: str
    file_type: Optional[str] = None
    extension: Optional[str] = None
    download_url: Optional[str] = None
    file_size: Optional[int] = None
    width: Optional[int] = None
    height: Optional[int] = None
    duration: Optional[int] = None
    needs_quality_recheck: bool = False
    is_preview: bool = False

    @classmethod
    def from_api(cls, data: Dict, base_url: str = '') -> 'Attachment':
        """Create Attachment from API response"""
        name = data.get('name', '')
        path = data.get('path', '')

        # Detect file type from extension
        ext = ''
        if '.' in name:
            ext = name.rsplit('.', 1)[-1].lower()

        file_type = 'unknown'
        image_exts = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff', 'heic'}
        video_exts = {'mp4', 'mov', 'avi', 'mkv', 'webm', 'm4v', 'wmv', 'flv'}
        archive_exts = {'zip', 'rar', '7z', 'tar', 'gz'}

        if ext in image_exts:
            file_type = 'image'
        elif ext in video_exts:
            file_type = 'video'
        elif ext in archive_exts:
            file_type = 'archive'
        elif ext in {'pdf', 'doc', 'docx', 'txt'}:
            file_type = 'document'

        return cls(
            name=name,
            server_path=path,
            file_type=file_type,
            extension=ext if ext else None,
            download_url=f"{base_url}/data{path}" if base_url and path else None
        )

    def to_dict(self) -> Dict:
        """Convert to dictionary for database storage"""
        d = {
            'name': self.name,
            'server_path': self.server_path,
            'file_type': self.file_type,
            'extension': self.extension,
            'download_url': self.download_url,
            'file_size': self.file_size,
            'width': self.width,
            'height': self.height,
            'duration': self.duration
        }
        if self.needs_quality_recheck:
            d['needs_quality_recheck'] = 1
        return d


@dataclass
class Post:
    """Represents a post from a creator"""
    post_id: str
    service_id: str
    platform: str
    creator_id: str
    title: Optional[str] = None
    content: Optional[str] = None
    published_at: Optional[str] = None
    added_at: Optional[str] = None
    edited_at: Optional[str] = None
    attachments: List[Attachment] = field(default_factory=list)
    embed_urls: List[str] = field(default_factory=list)
    is_pinned: bool = False
    pinned_at: Optional[str] = None
    auto_tags: List[str] = field(default_factory=list)  # Tag names to auto-apply on sync
    tagged_users: List[str] = field(default_factory=list)  # Instagram users tagged in the post

    @classmethod
    def from_api(cls, data: Dict, service_id: str, platform: str, creator_id: str, base_url: str = '') -> 'Post':
        """Create Post from API response"""
        # Parse attachments
        attachments = []
        for att_data in data.get('attachments', []):
            attachments.append(Attachment.from_api(att_data, base_url))

        # Also check file field (some APIs use this instead of attachments)
        if 'file' in data and data['file']:
            file_data = data['file']
            if isinstance(file_data, dict):
                attachments.append(Attachment.from_api(file_data, base_url))
            elif isinstance(file_data, str):
                attachments.append(Attachment(
                    name=file_data.split('/')[-1] if '/' in file_data else file_data,
                    server_path=file_data
                ))

        # Parse dates
        published = data.get('published')
        added = data.get('added')
        edited = data.get('edited')

        # Content: use 'content' if available, fallback to 'substring' (list endpoint returns truncated)
        content = data.get('content') or data.get('substring') or ''

        # Single post endpoint returns HTML content (e.g. <p>text</p>), strip tags
        if content and '<' in content:
            import re
            content = re.sub(r'<br\s*/?>', '\n', content)
            content = re.sub(r'</p>\s*<p>', '\n\n', content)
            content = re.sub(r'<[^>]+>', '', content)
            content = content.strip()

        title = data.get('title')

        # OnlyFans posts on Coomer have the post text in 'title' and empty 'content'.
        # Copy title to content and clear title (OF posts don't have real titles).
        if not content and title:
            content = title
            title = None

        return cls(
            post_id=str(data.get('id', '')),
            service_id=service_id,
            platform=platform,
            creator_id=creator_id,
            title=title,
            content=content,
            published_at=published,
            added_at=added,
            edited_at=edited,
            attachments=attachments,
            embed_urls=data.get('embed', []) or []
        )

    def to_dict(self) -> Dict:
        """Convert to dictionary for database storage"""
        return {
            'post_id': self.post_id,
            'title': self.title,
            'content': self.content,
            'published_at': self.published_at,
            'added_at': self.added_at,
            'edited_at': self.edited_at,
            'has_attachments': 1 if self.attachments else 0,
            'attachment_count': len(self.attachments),
            'embed_count': len(self.embed_urls),
            'is_pinned': 1 if self.is_pinned else 0,
            'pinned_at': self.pinned_at
        }


@dataclass
class Message:
    """Represents a chat message from/to a creator"""
    message_id: str
    platform: str
    service_id: str
    creator_id: str  # Platform-specific creator ID
    text: Optional[str] = None
    sent_at: Optional[str] = None
    is_from_creator: bool = True
    is_tip: bool = False
    tip_amount: Optional[float] = None
    price: Optional[float] = None
    is_free: bool = True
    is_purchased: bool = False
    reply_to_message_id: Optional[str] = None
    attachments: List[Attachment] = field(default_factory=list)

    def to_dict(self) -> Dict:
        """Convert to dictionary for database storage"""
        return {
            'message_id': self.message_id,
            'text': self.text,
            'sent_at': self.sent_at,
            'is_from_creator': 1 if self.is_from_creator else 0,
            'is_tip': 1 if self.is_tip else 0,
            'tip_amount': self.tip_amount,
            'price': self.price,
            'is_free': 1 if self.is_free else 0,
            'is_purchased': 1 if self.is_purchased else 0,
            'has_attachments': 1 if self.attachments else 0,
            'attachment_count': len(self.attachments),
            'reply_to_message_id': self.reply_to_message_id,
        }


@dataclass
class Creator:
    """Represents a creator from Coomer/Kemono"""
    creator_id: str
    service_id: str
    platform: str
    username: str
    display_name: Optional[str] = None
    profile_image_url: Optional[str] = None
    banner_image_url: Optional[str] = None
    bio: Optional[str] = None
    post_count: int = 0

    @classmethod
    def from_api(cls, data: Dict, service_id: str, platform: str, base_url: str = None) -> 'Creator':
        """Create Creator from API response"""
        creator_id = str(data.get('id', ''))

        # Construct image domain - use .st instead of .party (coomer.party redirects to coomer.st)
        img_domain = None
        if base_url and creator_id:
            from urllib.parse import urlparse
            parsed = urlparse(base_url)
            # Convert .party to .st for image URLs (coomer.party/kemono.party images are at .st)
            netloc = parsed.netloc.replace('.party', '.st')
            img_domain = f"img.{netloc}"

        # Construct profile image URL from icon endpoint
        profile_image_url = data.get('profile_image')
        if not profile_image_url and img_domain:
            # Icon URLs are at img.{domain}/icons/{platform}/{creator_id}
            profile_image_url = f"https://{img_domain}/icons/{platform}/{creator_id}"

        # Construct banner image URL
        banner_image_url = data.get('banner_image')
        if not banner_image_url and img_domain:
            # Banner URLs are at img.{domain}/banners/{platform}/{creator_id}
            banner_image_url = f"https://{img_domain}/banners/{platform}/{creator_id}"

        return cls(
            creator_id=creator_id,
            service_id=service_id,
            platform=platform,
            username=data.get('name', ''),
            display_name=data.get('name'),
            profile_image_url=profile_image_url,
            banner_image_url=banner_image_url,
            post_count=data.get('post_count', 0)
        )

    def to_dict(self) -> Dict:
        """Convert to dictionary for database storage"""
        return {
            'service_id': self.service_id,
            'platform': self.platform,
            'creator_id': self.creator_id,
            'username': self.username,
            'display_name': self.display_name,
            'profile_image_url': self.profile_image_url,
            'banner_image_url': self.banner_image_url,
            'bio': self.bio,
            'post_count': self.post_count
        }


@dataclass
class SyncResult:
    """Result of a creator sync operation"""
    success: bool
    new_posts: int = 0
    new_attachments: int = 0
    downloaded_files: int = 0
    failed_files: int = 0
    skipped_files: int = 0
    error: Optional[str] = None
    downloaded_file_info: Optional[List[Dict]] = None  # List of {file_path, filename, source, content_type}

    def to_dict(self) -> Dict:
        return {
            'success': self.success,
            'new_posts': self.new_posts,
            'new_attachments': self.new_attachments,
            'downloaded_files': self.downloaded_files,
            'failed_files': self.failed_files,
            'skipped_files': self.skipped_files,
            'error': self.error
        }


@dataclass
class DownloadResult:
    """Result of a download operation"""
    success: bool
    file_path: Optional[str] = None
    file_hash: Optional[str] = None
    file_size: Optional[int] = None
    error: Optional[str] = None
    is_duplicate: bool = False

    def to_dict(self) -> Dict:
        return {
            'success': self.success,
            'file_path': self.file_path,
            'file_hash': self.file_hash,
            'file_size': self.file_size,
            'error': self.error,
            'is_duplicate': self.is_duplicate
        }