172 lines
5.7 KiB
Python
172 lines
5.7 KiB
Python
"""
|
|
Filename parser for extracting dates and metadata from Fansly/paid content filenames.
|
|
|
|
Supports:
|
|
1. Fansly snowflake IDs: 871257582885416960.mp4
|
|
2. Embedded date format: 2023-05-11_at_15-51_id_513099759796367360-zRvVUZeP.mp4
|
|
3. Date-prefixed files: 2022-07-08.mp4 or 2022-07-08_video.mp4
|
|
"""
|
|
|
|
import re
|
|
from datetime import datetime, timezone
|
|
from typing import Optional, Dict, Tuple
|
|
from pathlib import Path
|
|
|
|
|
|
# Fansly epoch calibrated from known files
|
|
# Based on: 513099759796367360 = 2023-05-11 15:51 UTC
|
|
FANSLY_EPOCH_MS = 1561483337101
|
|
|
|
|
|
def decode_fansly_snowflake(snowflake_id: str) -> Optional[datetime]:
|
|
"""
|
|
Decode a Fansly snowflake ID to a datetime.
|
|
|
|
Fansly uses Twitter-style snowflake IDs where the timestamp
|
|
is encoded in the upper bits (shifted right by 22).
|
|
"""
|
|
try:
|
|
sid = int(snowflake_id)
|
|
# Timestamp is in upper bits
|
|
timestamp_ms = (sid >> 22) + FANSLY_EPOCH_MS
|
|
return datetime.fromtimestamp(timestamp_ms / 1000, tz=timezone.utc)
|
|
except (ValueError, OverflowError, OSError):
|
|
return None
|
|
|
|
|
|
def parse_filename(filename: str) -> Dict:
|
|
"""
|
|
Parse a filename and extract any date/metadata information.
|
|
|
|
Returns:
|
|
{
|
|
'original_filename': str,
|
|
'detected_date': datetime or None,
|
|
'fansly_id': str or None,
|
|
'date_source': str or None, # 'snowflake', 'embedded', 'prefix', None
|
|
'confidence': str, # 'high', 'medium', 'low'
|
|
}
|
|
"""
|
|
result = {
|
|
'original_filename': filename,
|
|
'detected_date': None,
|
|
'fansly_id': None,
|
|
'date_source': None,
|
|
'confidence': 'low',
|
|
}
|
|
|
|
# Get the base name without extension
|
|
name = Path(filename).stem
|
|
|
|
# Pattern 1: Embedded date format
|
|
# 2023-05-11_at_15-51_id_513099759796367360-zRvVUZeP-YcNs55W9.mp4
|
|
# 2026-01-24_at_06-22_id_871257582885416960_hash2_4547ab5367c6d7ea3a28ac4fc79df018.mp4
|
|
# Also handles spaces: 2023 05 11_at_15 51_id_513099759796367360
|
|
embedded_pattern = r'(\d{4})[-_ ](\d{2})[-_ ](\d{2})[-_ ]?at[-_ ](\d{2})[-_ ](\d{2})[-_ ]?id[-_ ](\d{15,20})'
|
|
match = re.search(embedded_pattern, name, re.IGNORECASE)
|
|
if match:
|
|
year, month, day, hour, minute, fansly_id = match.groups()
|
|
try:
|
|
result['detected_date'] = datetime(
|
|
int(year), int(month), int(day),
|
|
int(hour), int(minute), 0,
|
|
tzinfo=timezone.utc
|
|
)
|
|
result['fansly_id'] = fansly_id
|
|
result['date_source'] = 'embedded'
|
|
result['confidence'] = 'high'
|
|
return result
|
|
except ValueError:
|
|
pass
|
|
|
|
# Pattern 2: Date prefix (YYYY-MM-DD or YYYY_MM_DD)
|
|
# 2022-07-08.mp4 or 2022-07-08_video.mp4
|
|
date_prefix_pattern = r'^(\d{4})[-_](\d{2})[-_](\d{2})(?:[_\-\s]|$)'
|
|
match = re.match(date_prefix_pattern, name)
|
|
if match:
|
|
year, month, day = match.groups()
|
|
try:
|
|
result['detected_date'] = datetime(
|
|
int(year), int(month), int(day),
|
|
12, 0, 0, # Default to noon
|
|
tzinfo=timezone.utc
|
|
)
|
|
result['date_source'] = 'prefix'
|
|
result['confidence'] = 'high'
|
|
return result
|
|
except ValueError:
|
|
pass
|
|
|
|
# Pattern 3: Pure Fansly snowflake ID
|
|
# 871257582885416960.mp4 (15-20 digit number)
|
|
snowflake_pattern = r'^(\d{15,20})(?:_\d+)?$'
|
|
match = re.match(snowflake_pattern, name)
|
|
if match:
|
|
fansly_id = match.group(1)
|
|
decoded_date = decode_fansly_snowflake(fansly_id)
|
|
if decoded_date:
|
|
# Sanity check: date should be between 2020 and 2030
|
|
if 2020 <= decoded_date.year <= 2030:
|
|
result['detected_date'] = decoded_date
|
|
result['fansly_id'] = fansly_id
|
|
result['date_source'] = 'snowflake'
|
|
result['confidence'] = 'high'
|
|
return result
|
|
|
|
# Pattern 4: Fansly ID embedded anywhere in filename
|
|
# e.g., video_871257582885416960_hd.mp4
|
|
embedded_id_pattern = r'(\d{15,20})'
|
|
matches = re.findall(embedded_id_pattern, name)
|
|
for potential_id in matches:
|
|
decoded_date = decode_fansly_snowflake(potential_id)
|
|
if decoded_date and 2020 <= decoded_date.year <= 2030:
|
|
result['detected_date'] = decoded_date
|
|
result['fansly_id'] = potential_id
|
|
result['date_source'] = 'snowflake'
|
|
result['confidence'] = 'medium'
|
|
return result
|
|
|
|
return result
|
|
|
|
|
|
def parse_filenames(filenames: list) -> Dict:
|
|
"""
|
|
Parse multiple filenames and return analysis.
|
|
|
|
Returns:
|
|
{
|
|
'files': [parsed result for each file],
|
|
'earliest_date': datetime or None,
|
|
'latest_date': datetime or None,
|
|
'suggested_date': datetime or None, # Most common or earliest
|
|
'has_dates': bool,
|
|
}
|
|
"""
|
|
results = [parse_filename(f) for f in filenames]
|
|
|
|
dates = [r['detected_date'] for r in results if r['detected_date']]
|
|
|
|
analysis = {
|
|
'files': results,
|
|
'earliest_date': min(dates) if dates else None,
|
|
'latest_date': max(dates) if dates else None,
|
|
'suggested_date': min(dates) if dates else None, # Use earliest as default
|
|
'has_dates': len(dates) > 0,
|
|
}
|
|
|
|
return analysis
|
|
|
|
|
|
def format_date_for_display(dt: datetime) -> str:
|
|
"""Format datetime for display: 'May 11, 2023 at 3:51 PM'"""
|
|
if dt is None:
|
|
return ''
|
|
return dt.strftime('%b %d, %Y at %-I:%M %p')
|
|
|
|
|
|
def format_date_for_input(dt: datetime) -> Tuple[str, str]:
|
|
"""Format datetime for HTML inputs: (date_str, time_str)"""
|
|
if dt is None:
|
|
return ('', '')
|
|
return (dt.strftime('%Y-%m-%d'), dt.strftime('%H:%M'))
|