171
modules/paid_content/filename_parser.py
Normal file
171
modules/paid_content/filename_parser.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""
|
||||
Filename parser for extracting dates and metadata from Fansly/paid content filenames.
|
||||
|
||||
Supports:
|
||||
1. Fansly snowflake IDs: 871257582885416960.mp4
|
||||
2. Embedded date format: 2023-05-11_at_15-51_id_513099759796367360-zRvVUZeP.mp4
|
||||
3. Date-prefixed files: 2022-07-08.mp4 or 2022-07-08_video.mp4
|
||||
"""
|
||||
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional, Dict, Tuple
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# Fansly epoch calibrated from known files
|
||||
# Based on: 513099759796367360 = 2023-05-11 15:51 UTC
|
||||
FANSLY_EPOCH_MS = 1561483337101
|
||||
|
||||
|
||||
def decode_fansly_snowflake(snowflake_id: str) -> Optional[datetime]:
|
||||
"""
|
||||
Decode a Fansly snowflake ID to a datetime.
|
||||
|
||||
Fansly uses Twitter-style snowflake IDs where the timestamp
|
||||
is encoded in the upper bits (shifted right by 22).
|
||||
"""
|
||||
try:
|
||||
sid = int(snowflake_id)
|
||||
# Timestamp is in upper bits
|
||||
timestamp_ms = (sid >> 22) + FANSLY_EPOCH_MS
|
||||
return datetime.fromtimestamp(timestamp_ms / 1000, tz=timezone.utc)
|
||||
except (ValueError, OverflowError, OSError):
|
||||
return None
|
||||
|
||||
|
||||
def parse_filename(filename: str) -> Dict:
|
||||
"""
|
||||
Parse a filename and extract any date/metadata information.
|
||||
|
||||
Returns:
|
||||
{
|
||||
'original_filename': str,
|
||||
'detected_date': datetime or None,
|
||||
'fansly_id': str or None,
|
||||
'date_source': str or None, # 'snowflake', 'embedded', 'prefix', None
|
||||
'confidence': str, # 'high', 'medium', 'low'
|
||||
}
|
||||
"""
|
||||
result = {
|
||||
'original_filename': filename,
|
||||
'detected_date': None,
|
||||
'fansly_id': None,
|
||||
'date_source': None,
|
||||
'confidence': 'low',
|
||||
}
|
||||
|
||||
# Get the base name without extension
|
||||
name = Path(filename).stem
|
||||
|
||||
# Pattern 1: Embedded date format
|
||||
# 2023-05-11_at_15-51_id_513099759796367360-zRvVUZeP-YcNs55W9.mp4
|
||||
# 2026-01-24_at_06-22_id_871257582885416960_hash2_4547ab5367c6d7ea3a28ac4fc79df018.mp4
|
||||
# Also handles spaces: 2023 05 11_at_15 51_id_513099759796367360
|
||||
embedded_pattern = r'(\d{4})[-_ ](\d{2})[-_ ](\d{2})[-_ ]?at[-_ ](\d{2})[-_ ](\d{2})[-_ ]?id[-_ ](\d{15,20})'
|
||||
match = re.search(embedded_pattern, name, re.IGNORECASE)
|
||||
if match:
|
||||
year, month, day, hour, minute, fansly_id = match.groups()
|
||||
try:
|
||||
result['detected_date'] = datetime(
|
||||
int(year), int(month), int(day),
|
||||
int(hour), int(minute), 0,
|
||||
tzinfo=timezone.utc
|
||||
)
|
||||
result['fansly_id'] = fansly_id
|
||||
result['date_source'] = 'embedded'
|
||||
result['confidence'] = 'high'
|
||||
return result
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Pattern 2: Date prefix (YYYY-MM-DD or YYYY_MM_DD)
|
||||
# 2022-07-08.mp4 or 2022-07-08_video.mp4
|
||||
date_prefix_pattern = r'^(\d{4})[-_](\d{2})[-_](\d{2})(?:[_\-\s]|$)'
|
||||
match = re.match(date_prefix_pattern, name)
|
||||
if match:
|
||||
year, month, day = match.groups()
|
||||
try:
|
||||
result['detected_date'] = datetime(
|
||||
int(year), int(month), int(day),
|
||||
12, 0, 0, # Default to noon
|
||||
tzinfo=timezone.utc
|
||||
)
|
||||
result['date_source'] = 'prefix'
|
||||
result['confidence'] = 'high'
|
||||
return result
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Pattern 3: Pure Fansly snowflake ID
|
||||
# 871257582885416960.mp4 (15-20 digit number)
|
||||
snowflake_pattern = r'^(\d{15,20})(?:_\d+)?$'
|
||||
match = re.match(snowflake_pattern, name)
|
||||
if match:
|
||||
fansly_id = match.group(1)
|
||||
decoded_date = decode_fansly_snowflake(fansly_id)
|
||||
if decoded_date:
|
||||
# Sanity check: date should be between 2020 and 2030
|
||||
if 2020 <= decoded_date.year <= 2030:
|
||||
result['detected_date'] = decoded_date
|
||||
result['fansly_id'] = fansly_id
|
||||
result['date_source'] = 'snowflake'
|
||||
result['confidence'] = 'high'
|
||||
return result
|
||||
|
||||
# Pattern 4: Fansly ID embedded anywhere in filename
|
||||
# e.g., video_871257582885416960_hd.mp4
|
||||
embedded_id_pattern = r'(\d{15,20})'
|
||||
matches = re.findall(embedded_id_pattern, name)
|
||||
for potential_id in matches:
|
||||
decoded_date = decode_fansly_snowflake(potential_id)
|
||||
if decoded_date and 2020 <= decoded_date.year <= 2030:
|
||||
result['detected_date'] = decoded_date
|
||||
result['fansly_id'] = potential_id
|
||||
result['date_source'] = 'snowflake'
|
||||
result['confidence'] = 'medium'
|
||||
return result
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def parse_filenames(filenames: list) -> Dict:
|
||||
"""
|
||||
Parse multiple filenames and return analysis.
|
||||
|
||||
Returns:
|
||||
{
|
||||
'files': [parsed result for each file],
|
||||
'earliest_date': datetime or None,
|
||||
'latest_date': datetime or None,
|
||||
'suggested_date': datetime or None, # Most common or earliest
|
||||
'has_dates': bool,
|
||||
}
|
||||
"""
|
||||
results = [parse_filename(f) for f in filenames]
|
||||
|
||||
dates = [r['detected_date'] for r in results if r['detected_date']]
|
||||
|
||||
analysis = {
|
||||
'files': results,
|
||||
'earliest_date': min(dates) if dates else None,
|
||||
'latest_date': max(dates) if dates else None,
|
||||
'suggested_date': min(dates) if dates else None, # Use earliest as default
|
||||
'has_dates': len(dates) > 0,
|
||||
}
|
||||
|
||||
return analysis
|
||||
|
||||
|
||||
def format_date_for_display(dt: datetime) -> str:
|
||||
"""Format datetime for display: 'May 11, 2023 at 3:51 PM'"""
|
||||
if dt is None:
|
||||
return ''
|
||||
return dt.strftime('%b %d, %Y at %-I:%M %p')
|
||||
|
||||
|
||||
def format_date_for_input(dt: datetime) -> Tuple[str, str]:
|
||||
"""Format datetime for HTML inputs: (date_str, time_str)"""
|
||||
if dt is None:
|
||||
return ('', '')
|
||||
return (dt.strftime('%Y-%m-%d'), dt.strftime('%H:%M'))
|
||||
Reference in New Issue
Block a user