""" Filename parser for extracting dates and metadata from Fansly/paid content filenames. Supports: 1. Fansly snowflake IDs: 871257582885416960.mp4 2. Embedded date format: 2023-05-11_at_15-51_id_513099759796367360-zRvVUZeP.mp4 3. Date-prefixed files: 2022-07-08.mp4 or 2022-07-08_video.mp4 """ import re from datetime import datetime, timezone from typing import Optional, Dict, Tuple from pathlib import Path # Fansly epoch calibrated from known files # Based on: 513099759796367360 = 2023-05-11 15:51 UTC FANSLY_EPOCH_MS = 1561483337101 def decode_fansly_snowflake(snowflake_id: str) -> Optional[datetime]: """ Decode a Fansly snowflake ID to a datetime. Fansly uses Twitter-style snowflake IDs where the timestamp is encoded in the upper bits (shifted right by 22). """ try: sid = int(snowflake_id) # Timestamp is in upper bits timestamp_ms = (sid >> 22) + FANSLY_EPOCH_MS return datetime.fromtimestamp(timestamp_ms / 1000, tz=timezone.utc) except (ValueError, OverflowError, OSError): return None def parse_filename(filename: str) -> Dict: """ Parse a filename and extract any date/metadata information. Returns: { 'original_filename': str, 'detected_date': datetime or None, 'fansly_id': str or None, 'date_source': str or None, # 'snowflake', 'embedded', 'prefix', None 'confidence': str, # 'high', 'medium', 'low' } """ result = { 'original_filename': filename, 'detected_date': None, 'fansly_id': None, 'date_source': None, 'confidence': 'low', } # Get the base name without extension name = Path(filename).stem # Pattern 1: Embedded date format # 2023-05-11_at_15-51_id_513099759796367360-zRvVUZeP-YcNs55W9.mp4 # 2026-01-24_at_06-22_id_871257582885416960_hash2_4547ab5367c6d7ea3a28ac4fc79df018.mp4 # Also handles spaces: 2023 05 11_at_15 51_id_513099759796367360 embedded_pattern = r'(\d{4})[-_ ](\d{2})[-_ ](\d{2})[-_ ]?at[-_ ](\d{2})[-_ ](\d{2})[-_ ]?id[-_ ](\d{15,20})' match = re.search(embedded_pattern, name, re.IGNORECASE) if match: year, month, day, hour, minute, fansly_id = match.groups() try: result['detected_date'] = datetime( int(year), int(month), int(day), int(hour), int(minute), 0, tzinfo=timezone.utc ) result['fansly_id'] = fansly_id result['date_source'] = 'embedded' result['confidence'] = 'high' return result except ValueError: pass # Pattern 2: Date prefix (YYYY-MM-DD or YYYY_MM_DD) # 2022-07-08.mp4 or 2022-07-08_video.mp4 date_prefix_pattern = r'^(\d{4})[-_](\d{2})[-_](\d{2})(?:[_\-\s]|$)' match = re.match(date_prefix_pattern, name) if match: year, month, day = match.groups() try: result['detected_date'] = datetime( int(year), int(month), int(day), 12, 0, 0, # Default to noon tzinfo=timezone.utc ) result['date_source'] = 'prefix' result['confidence'] = 'high' return result except ValueError: pass # Pattern 3: Pure Fansly snowflake ID # 871257582885416960.mp4 (15-20 digit number) snowflake_pattern = r'^(\d{15,20})(?:_\d+)?$' match = re.match(snowflake_pattern, name) if match: fansly_id = match.group(1) decoded_date = decode_fansly_snowflake(fansly_id) if decoded_date: # Sanity check: date should be between 2020 and 2030 if 2020 <= decoded_date.year <= 2030: result['detected_date'] = decoded_date result['fansly_id'] = fansly_id result['date_source'] = 'snowflake' result['confidence'] = 'high' return result # Pattern 4: Fansly ID embedded anywhere in filename # e.g., video_871257582885416960_hd.mp4 embedded_id_pattern = r'(\d{15,20})' matches = re.findall(embedded_id_pattern, name) for potential_id in matches: decoded_date = decode_fansly_snowflake(potential_id) if decoded_date and 2020 <= decoded_date.year <= 2030: result['detected_date'] = decoded_date result['fansly_id'] = potential_id result['date_source'] = 'snowflake' result['confidence'] = 'medium' return result return result def parse_filenames(filenames: list) -> Dict: """ Parse multiple filenames and return analysis. Returns: { 'files': [parsed result for each file], 'earliest_date': datetime or None, 'latest_date': datetime or None, 'suggested_date': datetime or None, # Most common or earliest 'has_dates': bool, } """ results = [parse_filename(f) for f in filenames] dates = [r['detected_date'] for r in results if r['detected_date']] analysis = { 'files': results, 'earliest_date': min(dates) if dates else None, 'latest_date': max(dates) if dates else None, 'suggested_date': min(dates) if dates else None, # Use earliest as default 'has_dates': len(dates) > 0, } return analysis def format_date_for_display(dt: datetime) -> str: """Format datetime for display: 'May 11, 2023 at 3:51 PM'""" if dt is None: return '' return dt.strftime('%b %d, %Y at %-I:%M %p') def format_date_for_input(dt: datetime) -> Tuple[str, str]: """Format datetime for HTML inputs: (date_str, time_str)""" if dt is None: return ('', '') return (dt.strftime('%Y-%m-%d'), dt.strftime('%H:%M'))