1513 lines
66 KiB
Python
1513 lines
66 KiB
Python
"""TMDb API client for celebrity appearances tracking"""
|
|
import asyncio
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Optional
|
|
from web.backend.core.http_client import http_client
|
|
from modules.universal_logger import get_logger
|
|
|
|
logger = get_logger('TMDb')
|
|
|
|
class TMDbClient:
|
|
"""Client for interacting with The Movie Database API"""
|
|
|
|
BASE_URL = "https://api.themoviedb.org/3"
|
|
|
|
# Shows where celebrity guests are featured in episode titles
|
|
# These shows don't always have proper TMDB credits, so we scan episode titles
|
|
CELEBRITY_TITLE_SHOWS = [
|
|
{"id": 100347, "name": "Celebrity IOU"},
|
|
{"id": 3956, "name": "Who Do You Think You Are?"},
|
|
{"id": 43221, "name": "Finding Your Roots"},
|
|
{"id": 59717, "name": "Comedians in Cars Getting Coffee"},
|
|
{"id": 61120, "name": "Running Wild with Bear Grylls"},
|
|
{"id": 72610, "name": "Carpool Karaoke: The Series"},
|
|
{"id": 72649, "name": "Hot Ones"},
|
|
{"id": 82108, "name": "Celebrity Family Feud"},
|
|
]
|
|
|
|
def __init__(self, api_key: str):
|
|
self.api_key = api_key
|
|
|
|
async def search_person(self, name: str) -> Optional[Dict]:
|
|
"""Search for person by name, returns first result"""
|
|
try:
|
|
url = f"{self.BASE_URL}/search/person"
|
|
params = {
|
|
"api_key": self.api_key,
|
|
"query": name,
|
|
"page": 1
|
|
}
|
|
|
|
response = await http_client.get(url, params=params)
|
|
data = response.json()
|
|
|
|
if data.get("results"):
|
|
return data["results"][0]
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"TMDb search person error for '{name}': {e}")
|
|
return None
|
|
|
|
async def get_person_combined_credits(self, person_id: int) -> Dict:
|
|
"""Get all TV/movie credits for a person"""
|
|
try:
|
|
url = f"{self.BASE_URL}/person/{person_id}/combined_credits"
|
|
params = {"api_key": self.api_key}
|
|
|
|
response = await http_client.get(url, params=params)
|
|
return response.json()
|
|
|
|
except Exception as e:
|
|
logger.error(f"TMDb get credits error for person_id {person_id}: {e}")
|
|
return {"cast": [], "crew": []}
|
|
|
|
async def get_tv_show_details(self, tv_id: int) -> Optional[Dict]:
|
|
"""Get TV show details including next episode air date"""
|
|
try:
|
|
url = f"{self.BASE_URL}/tv/{tv_id}"
|
|
params = {"api_key": self.api_key}
|
|
|
|
response = await http_client.get(url, params=params)
|
|
return response.json()
|
|
|
|
except Exception as e:
|
|
logger.error(f"TMDb get TV show error for tv_id {tv_id}: {e}")
|
|
return None
|
|
|
|
async def get_tv_season_details(self, tv_id: int, season_number: int) -> Optional[Dict]:
|
|
"""Get season details with episode list and air dates"""
|
|
try:
|
|
url = f"{self.BASE_URL}/tv/{tv_id}/season/{season_number}"
|
|
params = {"api_key": self.api_key}
|
|
|
|
response = await http_client.get(url, params=params)
|
|
return response.json()
|
|
|
|
except Exception as e:
|
|
logger.error(f"TMDb get season error for tv_id {tv_id} season {season_number}: {e}")
|
|
return None
|
|
|
|
async def get_tv_aggregate_credits(self, tv_id: int) -> Optional[Dict]:
|
|
"""Get aggregate credits for a TV show - includes all cast/guest appearances with episode details"""
|
|
try:
|
|
url = f"{self.BASE_URL}/tv/{tv_id}/aggregate_credits"
|
|
params = {"api_key": self.api_key}
|
|
|
|
response = await http_client.get(url, params=params)
|
|
return response.json()
|
|
|
|
except Exception as e:
|
|
logger.error(f"TMDb get aggregate credits error for tv_id {tv_id}: {e}")
|
|
return None
|
|
|
|
async def get_episode_credits(self, tv_id: int, season_number: int, episode_number: int) -> Optional[Dict]:
|
|
"""Get credits for a specific TV episode including cast and guest_stars"""
|
|
try:
|
|
url = f"{self.BASE_URL}/tv/{tv_id}/season/{season_number}/episode/{episode_number}/credits"
|
|
params = {"api_key": self.api_key}
|
|
|
|
response = await http_client.get(url, params=params)
|
|
return response.json()
|
|
|
|
except Exception as e:
|
|
logger.error(f"TMDb get episode credits error for tv_id {tv_id} S{season_number}E{episode_number}: {e}")
|
|
return None
|
|
|
|
async def is_person_in_episode(self, person_id: int, tv_id: int, season_number: int, episode_number: int) -> bool:
|
|
"""Check if a person is credited in a specific episode's cast or guest_stars"""
|
|
try:
|
|
credits = await self.get_episode_credits(tv_id, season_number, episode_number)
|
|
if not credits:
|
|
return False
|
|
|
|
# Check cast
|
|
for cast_member in credits.get('cast', []):
|
|
if cast_member.get('id') == person_id:
|
|
return True
|
|
|
|
# Check guest_stars
|
|
for guest in credits.get('guest_stars', []):
|
|
if guest.get('id') == person_id:
|
|
return True
|
|
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(f"TMDb check person in episode error: {e}")
|
|
return False
|
|
|
|
async def find_appearances_by_episode_title(self, celebrity_name: str, lookback_days: int = 30, lookahead_days: int = 90) -> List[Dict]:
|
|
"""
|
|
Scan CELEBRITY_TITLE_SHOWS for episodes where the celebrity's name appears in the title.
|
|
|
|
This catches appearances on shows like Celebrity IOU where TMDB doesn't have proper credits
|
|
but the celebrity's name appears in the episode title.
|
|
|
|
Args:
|
|
celebrity_name: Name of the celebrity to search for
|
|
lookback_days: How many days back to check for aired episodes
|
|
lookahead_days: How many days forward to check for upcoming episodes
|
|
|
|
Returns:
|
|
List of appearance dicts ready for database insertion
|
|
"""
|
|
appearances = []
|
|
today = datetime.now().date()
|
|
start_date = today - timedelta(days=lookback_days)
|
|
end_date = today + timedelta(days=lookahead_days)
|
|
|
|
# Split name for matching (first name, last name, full name)
|
|
name_lower = celebrity_name.lower()
|
|
name_parts = name_lower.split()
|
|
# Use last name for primary matching (more unique), require 3+ chars
|
|
search_terms = [name_lower] # Full name
|
|
if len(name_parts) >= 2:
|
|
last_name = name_parts[-1]
|
|
if len(last_name) >= 3:
|
|
search_terms.append(last_name)
|
|
|
|
for show_info in self.CELEBRITY_TITLE_SHOWS:
|
|
show_id = show_info["id"]
|
|
show_name = show_info["name"]
|
|
|
|
try:
|
|
# Get show details to find current/recent seasons
|
|
show_details = await self.get_tv_show_details(show_id)
|
|
if not show_details:
|
|
continue
|
|
|
|
# Check the most recent seasons (up to 3)
|
|
num_seasons = show_details.get("number_of_seasons", 0)
|
|
if num_seasons == 0:
|
|
continue
|
|
|
|
seasons_to_check = range(max(1, num_seasons - 2), num_seasons + 1)
|
|
|
|
for season_num in seasons_to_check:
|
|
season_data = await self.get_tv_season_details(show_id, season_num)
|
|
if not season_data or not season_data.get("episodes"):
|
|
continue
|
|
|
|
for episode in season_data.get("episodes", []):
|
|
episode_title = episode.get("name", "")
|
|
episode_title_lower = episode_title.lower()
|
|
air_date_str = episode.get("air_date")
|
|
|
|
if not air_date_str:
|
|
continue
|
|
|
|
try:
|
|
air_date = datetime.strptime(air_date_str, "%Y-%m-%d").date()
|
|
except ValueError:
|
|
continue
|
|
|
|
# Check if within date range
|
|
if not (start_date <= air_date <= end_date):
|
|
continue
|
|
|
|
# Check if celebrity name appears in episode title
|
|
name_found = False
|
|
for term in search_terms:
|
|
if term in episode_title_lower:
|
|
name_found = True
|
|
break
|
|
|
|
if not name_found:
|
|
continue
|
|
|
|
# Found a match! Build appearance dict
|
|
logger.info(f"Found episode title match: '{celebrity_name}' in '{show_name}' - {episode_title}")
|
|
|
|
status = "upcoming" if air_date >= today else "aired"
|
|
|
|
appearances.append({
|
|
"appearance_type": "TV",
|
|
"show_name": show_name,
|
|
"episode_title": episode_title,
|
|
"season_number": season_num,
|
|
"episode_number": episode.get("episode_number"),
|
|
"appearance_date": air_date_str,
|
|
"status": status,
|
|
"tmdb_show_id": show_id,
|
|
"tmdb_episode_id": episode.get("id"),
|
|
"description": episode.get("overview"),
|
|
"poster_url": show_details.get("poster_path"),
|
|
"credit_type": "guest", # These are always guest appearances
|
|
"character_name": "Self",
|
|
"network": show_details.get("networks", [{}])[0].get("name") if show_details.get("networks") else None,
|
|
})
|
|
|
|
# Small delay between season requests
|
|
await asyncio.sleep(0.2)
|
|
|
|
# Small delay between shows
|
|
await asyncio.sleep(0.3)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error scanning {show_name} for {celebrity_name}: {e}")
|
|
continue
|
|
|
|
return appearances
|
|
|
|
async def get_person_tv_episodes(self, person_id: int, tv_id: int, person_name: str, credit_type: str = "acting") -> List[Dict]:
|
|
"""
|
|
Get specific episode appearances for a person on a TV show.
|
|
Uses aggregate_credits to find which episodes they appeared in.
|
|
|
|
Returns list of episodes with air dates.
|
|
"""
|
|
episodes = []
|
|
try:
|
|
# Get show details first
|
|
show_details = await self.get_tv_show_details(tv_id)
|
|
if not show_details:
|
|
return []
|
|
|
|
show_name = show_details.get("name", "")
|
|
networks = show_details.get("networks", [])
|
|
network = networks[0].get("name", "") if networks else ""
|
|
poster_url = show_details.get("poster_path")
|
|
num_seasons = show_details.get("number_of_seasons", 0)
|
|
|
|
# Get aggregate credits to find if person is in the show
|
|
agg_credits = await self.get_tv_aggregate_credits(tv_id)
|
|
if not agg_credits:
|
|
return []
|
|
|
|
# Check if person is in cast (regular or recurring)
|
|
person_in_cast = False
|
|
person_character = None
|
|
person_episode_count = 0
|
|
|
|
for cast_member in agg_credits.get("cast", []):
|
|
# Match by ID or name
|
|
if cast_member.get("id") == person_id or cast_member.get("name", "").lower() == person_name.lower():
|
|
person_in_cast = True
|
|
# Get primary character name and total episode count
|
|
roles = cast_member.get("roles", [])
|
|
if roles:
|
|
# Use the role with most episodes as primary
|
|
primary_role = max(roles, key=lambda r: r.get("episode_count", 0))
|
|
person_character = primary_role.get("character", "Self")
|
|
person_episode_count = sum(r.get("episode_count", 0) for r in roles)
|
|
break
|
|
|
|
# Calculate total episodes in the show
|
|
total_episodes = show_details.get("number_of_episodes", 0)
|
|
|
|
# Determine if person is "main cast" (in most episodes) vs occasional appearance
|
|
# Main cast criteria:
|
|
# - For short series (<20 eps): appeared in at least 50% of episodes
|
|
# - For medium series (20-100 eps): appeared in at least 30% of episodes
|
|
# - For long series (100+ eps): appeared in at least 20% of episodes AND at least 10 eps
|
|
is_main_cast = False
|
|
if person_in_cast and total_episodes > 0:
|
|
appearance_ratio = person_episode_count / total_episodes
|
|
if total_episodes < 20:
|
|
# Short series: need 50%+
|
|
is_main_cast = appearance_ratio >= 0.5
|
|
elif total_episodes < 100:
|
|
# Medium series: need 30%+
|
|
is_main_cast = appearance_ratio >= 0.3
|
|
else:
|
|
# Long series: need 20%+ AND at least 10 eps
|
|
is_main_cast = appearance_ratio >= 0.2 and person_episode_count >= 10
|
|
|
|
# Iterate through seasons to get episode-level details
|
|
for season_num in range(1, num_seasons + 1):
|
|
await asyncio.sleep(0.1) # Rate limiting
|
|
season = await self.get_tv_season_details(tv_id, season_num)
|
|
if not season:
|
|
continue
|
|
|
|
for episode in season.get("episodes", []):
|
|
ep_air_date = episode.get("air_date")
|
|
if not ep_air_date:
|
|
continue
|
|
|
|
# Check if person is in guest_stars for this episode
|
|
guest_stars = episode.get("guest_stars", [])
|
|
person_is_guest = False
|
|
guest_character = None
|
|
|
|
for guest in guest_stars:
|
|
if guest.get("id") == person_id or guest.get("name", "").lower() == person_name.lower():
|
|
person_is_guest = True
|
|
guest_character = guest.get("character")
|
|
break
|
|
|
|
# Include episode if:
|
|
# 1. Person is listed as guest in this episode, OR
|
|
# 2. Person is main cast (in most episodes of the show)
|
|
if person_is_guest or is_main_cast:
|
|
# Determine credit type and character
|
|
ep_credit_type = credit_type
|
|
ep_character = None
|
|
|
|
if person_is_guest:
|
|
ep_credit_type = "guest"
|
|
ep_character = guest_character or "Self"
|
|
elif is_main_cast:
|
|
ep_character = person_character or "Self"
|
|
|
|
episodes.append({
|
|
"tmdb_show_id": tv_id,
|
|
"show_name": show_name,
|
|
"network": network,
|
|
"episode_title": episode.get("name"),
|
|
"season_number": episode.get("season_number"),
|
|
"episode_number": episode.get("episode_number"),
|
|
"appearance_date": ep_air_date,
|
|
"description": episode.get("overview"),
|
|
"poster_url": poster_url,
|
|
"tmdb_episode_id": episode.get("id"),
|
|
"credit_type": ep_credit_type,
|
|
"character_name": ep_character,
|
|
"job_title": None,
|
|
})
|
|
|
|
logger.info(f"Found {len(episodes)} episodes for person {person_id} on {show_name} (in_cast={person_in_cast}, is_main={is_main_cast}, agg_count={person_episode_count}/{total_episodes})")
|
|
return episodes
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting person episodes for {person_id} on tv {tv_id}: {e}")
|
|
return []
|
|
|
|
async def get_person_tv_crew_episodes(self, person_id: int, tv_id: int, person_name: str, credit_type: str, job_title: str = None) -> List[Dict]:
|
|
"""
|
|
Get specific episodes where a person worked as crew (director, producer, writer, etc.)
|
|
Iterates through all episodes and checks the crew array for each.
|
|
|
|
Args:
|
|
person_id: TMDB person ID
|
|
tv_id: TMDB TV show ID
|
|
person_name: Person's name for matching
|
|
credit_type: The crew credit type (directing, producing, writing, creator)
|
|
job_title: Specific job title to match (e.g., "Director", "Executive Producer")
|
|
|
|
Returns list of episodes with air dates.
|
|
"""
|
|
episodes = []
|
|
try:
|
|
# Get show details first
|
|
show_details = await self.get_tv_show_details(tv_id)
|
|
if not show_details:
|
|
return []
|
|
|
|
show_name = show_details.get("name", "")
|
|
networks = show_details.get("networks", [])
|
|
network = networks[0].get("name", "") if networks else ""
|
|
poster_url = show_details.get("poster_path")
|
|
num_seasons = show_details.get("number_of_seasons", 0)
|
|
|
|
# Map credit_type to TMDB department
|
|
department_map = {
|
|
'directing': 'Directing',
|
|
'producing': 'Production',
|
|
'writing': 'Writing',
|
|
'creator': 'Production', # Creators are often listed under Production
|
|
}
|
|
target_department = department_map.get(credit_type, credit_type.capitalize())
|
|
|
|
# Iterate through seasons to get episode-level details
|
|
for season_num in range(1, num_seasons + 1):
|
|
await asyncio.sleep(0.1) # Rate limiting
|
|
season = await self.get_tv_season_details(tv_id, season_num)
|
|
if not season:
|
|
continue
|
|
|
|
for episode in season.get("episodes", []):
|
|
ep_air_date = episode.get("air_date")
|
|
if not ep_air_date:
|
|
continue
|
|
|
|
# Check if person is in crew for this episode
|
|
crew = episode.get("crew", [])
|
|
person_in_crew = False
|
|
found_job = None
|
|
|
|
for crew_member in crew:
|
|
# Match by ID or name
|
|
if crew_member.get("id") == person_id or crew_member.get("name", "").lower() == person_name.lower():
|
|
# Check if department/job matches
|
|
dept = crew_member.get("department", "")
|
|
job = crew_member.get("job", "")
|
|
|
|
# Match by department or specific job
|
|
if dept == target_department or (job_title and job_title.lower() in job.lower()):
|
|
person_in_crew = True
|
|
found_job = job
|
|
break
|
|
# Also check for Creator in Writing department
|
|
if credit_type == 'creator' and 'creator' in job.lower():
|
|
person_in_crew = True
|
|
found_job = job
|
|
break
|
|
|
|
if person_in_crew:
|
|
episodes.append({
|
|
"tmdb_show_id": tv_id,
|
|
"show_name": show_name,
|
|
"network": network,
|
|
"episode_title": episode.get("name"),
|
|
"season_number": episode.get("season_number"),
|
|
"episode_number": episode.get("episode_number"),
|
|
"appearance_date": ep_air_date,
|
|
"description": episode.get("overview"),
|
|
"poster_url": poster_url,
|
|
"tmdb_episode_id": episode.get("id"),
|
|
"credit_type": credit_type,
|
|
"character_name": None,
|
|
"job_title": found_job or job_title,
|
|
})
|
|
|
|
logger.info(f"Found {len(episodes)} crew episodes for person {person_id} on {show_name} ({credit_type})")
|
|
return episodes
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting crew episodes for {person_id} on tv {tv_id}: {e}")
|
|
return []
|
|
|
|
async def find_upcoming_tv_appearances(self, person_id: int) -> List[Dict]:
|
|
"""
|
|
Find all upcoming TV show appearances for a person.
|
|
Includes both cast (acting/guest) and crew (directing/producing/writing) credits.
|
|
Returns list of upcoming episodes with air dates.
|
|
"""
|
|
appearances = []
|
|
today = datetime.now().date()
|
|
|
|
try:
|
|
# Get all TV credits (cast AND crew)
|
|
credits = await self.get_person_combined_credits(person_id)
|
|
cast_shows = [show for show in credits.get("cast", []) if show.get("media_type") == "tv"]
|
|
crew_shows = [show for show in credits.get("crew", []) if show.get("media_type") == "tv"]
|
|
|
|
# Process and dedupe by show_id + credit_type
|
|
processed_shows = {}
|
|
|
|
# Process cast (acting or guest credits)
|
|
for show in cast_shows:
|
|
tv_id = show.get("id")
|
|
if not tv_id:
|
|
continue
|
|
|
|
character = show.get('character')
|
|
episode_count = show.get('episode_count', 0)
|
|
genre_ids = show.get('genre_ids', [])
|
|
show_name = show.get('name') or show.get('original_name', '')
|
|
|
|
# Determine credit type (acting, guest, host, etc.)
|
|
# Use _determine_credit_type directly to properly detect hosts (e.g., SNL hosts)
|
|
credit_type = self._determine_credit_type(character, episode_count, genre_ids, show_name)
|
|
|
|
key = (tv_id, credit_type)
|
|
if key not in processed_shows:
|
|
processed_shows[key] = {
|
|
'show': show,
|
|
'credit_type': credit_type,
|
|
'character_name': character,
|
|
'job_title': None,
|
|
}
|
|
|
|
# Process crew (directing, producing, writing credits)
|
|
for show in crew_shows:
|
|
tv_id = show.get("id")
|
|
if not tv_id:
|
|
continue
|
|
|
|
credit_type = self._map_department_to_credit_type(
|
|
show.get('department'),
|
|
show.get('job')
|
|
)
|
|
key = (tv_id, credit_type)
|
|
if key not in processed_shows:
|
|
processed_shows[key] = {
|
|
'show': show,
|
|
'credit_type': credit_type,
|
|
'character_name': None,
|
|
'job_title': show.get('job'),
|
|
}
|
|
|
|
# Check each TV show for upcoming episodes
|
|
for (tv_id, credit_type), show_data in processed_shows.items():
|
|
# Rate limiting
|
|
await asyncio.sleep(0.25)
|
|
|
|
# Get show details
|
|
show_details = await self.get_tv_show_details(tv_id)
|
|
if not show_details:
|
|
continue
|
|
|
|
# Check if show has upcoming episodes
|
|
next_episode = show_details.get("next_episode_to_air")
|
|
if next_episode:
|
|
air_date_str = next_episode.get("air_date")
|
|
if air_date_str:
|
|
air_date = datetime.strptime(air_date_str, "%Y-%m-%d").date()
|
|
|
|
if air_date >= today:
|
|
season_num = next_episode.get("season_number")
|
|
episode_num = next_episode.get("episode_number")
|
|
|
|
# For guest/host/cameo credits, verify the person is in the episode credits
|
|
# These are one-time appearances and may not continue to future episodes
|
|
episode_count = show_data['show'].get('episode_count', 0)
|
|
requires_verification = (
|
|
credit_type in ('guest', 'host', 'cameo', 'self') or
|
|
episode_count <= 5 # Few episodes suggests guest appearances
|
|
)
|
|
|
|
if requires_verification and season_num and episode_num:
|
|
# Rate limiting for episode credits check
|
|
await asyncio.sleep(0.25)
|
|
is_in_episode = await self.is_person_in_episode(
|
|
person_id, tv_id, season_num, episode_num
|
|
)
|
|
if not is_in_episode:
|
|
logger.debug(
|
|
f"Skipping {show_details.get('name')} - person not in episode credits"
|
|
)
|
|
continue
|
|
|
|
appearances.append({
|
|
"tmdb_show_id": tv_id,
|
|
"show_name": show_details.get("name"),
|
|
"network": show_details.get("networks", [{}])[0].get("name", ""),
|
|
"episode_title": next_episode.get("name"),
|
|
"season_number": season_num,
|
|
"episode_number": episode_num,
|
|
"appearance_date": air_date_str,
|
|
"description": next_episode.get("overview"),
|
|
"poster_url": show_details.get("poster_path"),
|
|
"tmdb_episode_id": next_episode.get("id"),
|
|
"credit_type": show_data['credit_type'],
|
|
"character_name": show_data['character_name'],
|
|
"job_title": show_data['job_title'],
|
|
})
|
|
|
|
return appearances
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error finding upcoming appearances for person {person_id}: {e}")
|
|
return []
|
|
|
|
async def find_recent_aired_tv_appearances(self, person_id: int, days_back: int = 90) -> List[Dict]:
|
|
"""
|
|
Find recent aired TV show appearances for a person
|
|
Goes back through recent seasons to find aired episodes
|
|
|
|
Args:
|
|
person_id: TMDb person ID
|
|
days_back: How many days back to look for aired episodes (default 90)
|
|
|
|
Returns:
|
|
List of aired episodes with air dates
|
|
"""
|
|
appearances = []
|
|
today = datetime.now().date()
|
|
cutoff_date = today - timedelta(days=days_back)
|
|
|
|
try:
|
|
# Get all TV credits
|
|
credits = await self.get_person_combined_credits(person_id)
|
|
tv_shows = credits.get("cast", [])
|
|
|
|
# Filter for TV shows only
|
|
tv_shows = [show for show in tv_shows if show.get("media_type") == "tv"]
|
|
|
|
logger.info(f"Checking {len(tv_shows)} TV shows for recent aired episodes (last {days_back} days)")
|
|
|
|
# Check each TV show for recent aired episodes
|
|
for show in tv_shows:
|
|
tv_id = show.get("id")
|
|
if not tv_id:
|
|
continue
|
|
|
|
# Rate limiting
|
|
await asyncio.sleep(0.25)
|
|
|
|
# Get show details
|
|
show_details = await self.get_tv_show_details(tv_id)
|
|
if not show_details:
|
|
continue
|
|
|
|
show_name = show_details.get("name")
|
|
network = show_details.get("networks", [{}])[0].get("name", "")
|
|
poster_url = show_details.get("poster_path")
|
|
|
|
# Get the last aired episode
|
|
last_episode = show_details.get("last_episode_to_air")
|
|
if not last_episode:
|
|
continue
|
|
|
|
# Check if last episode is within our window
|
|
last_air_date_str = last_episode.get("air_date")
|
|
if not last_air_date_str:
|
|
continue
|
|
|
|
try:
|
|
last_air_date = datetime.strptime(last_air_date_str, "%Y-%m-%d").date()
|
|
except ValueError:
|
|
continue
|
|
|
|
# If the last episode is too old, skip this show
|
|
if last_air_date < cutoff_date:
|
|
continue
|
|
|
|
# Get the season details for the last aired episode
|
|
last_season = last_episode.get("season_number")
|
|
if not last_season:
|
|
continue
|
|
|
|
# Rate limiting
|
|
await asyncio.sleep(0.25)
|
|
|
|
# Get all episodes from this season
|
|
season_details = await self.get_tv_season_details(tv_id, last_season)
|
|
if not season_details:
|
|
continue
|
|
|
|
episodes = season_details.get("episodes", [])
|
|
|
|
# Filter for episodes within our date range that have aired
|
|
for episode in episodes:
|
|
ep_air_date_str = episode.get("air_date")
|
|
if not ep_air_date_str:
|
|
continue
|
|
|
|
try:
|
|
ep_air_date = datetime.strptime(ep_air_date_str, "%Y-%m-%d").date()
|
|
except ValueError:
|
|
continue
|
|
|
|
# Only include episodes that:
|
|
# 1. Have already aired (< today)
|
|
# 2. Are within our lookback window (>= cutoff_date)
|
|
if cutoff_date <= ep_air_date < today:
|
|
appearances.append({
|
|
"tmdb_show_id": tv_id,
|
|
"show_name": show_name,
|
|
"network": network,
|
|
"episode_title": episode.get("name"),
|
|
"season_number": last_season,
|
|
"episode_number": episode.get("episode_number"),
|
|
"appearance_date": ep_air_date_str,
|
|
"description": episode.get("overview"),
|
|
"poster_url": poster_url,
|
|
"tmdb_episode_id": episode.get("id"),
|
|
"status": "aired"
|
|
})
|
|
|
|
logger.info(f"Found {len(appearances)} aired episodes for person {person_id}")
|
|
return appearances
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error finding aired appearances for person {person_id}: {e}")
|
|
return []
|
|
|
|
async def find_all_aired_tv_appearances(self, person_id: int) -> List[Dict]:
|
|
"""
|
|
Find ALL aired TV show appearances for a person across all seasons
|
|
This is a complete historical sync - can be slow for people with many shows
|
|
|
|
Args:
|
|
person_id: TMDb person ID
|
|
|
|
Returns:
|
|
List of ALL aired episodes with air dates
|
|
"""
|
|
appearances = []
|
|
today = datetime.now().date()
|
|
|
|
try:
|
|
# Get all TV credits
|
|
credits = await self.get_person_combined_credits(person_id)
|
|
tv_shows = credits.get("cast", [])
|
|
|
|
# Filter for TV shows only
|
|
tv_shows = [show for show in tv_shows if show.get("media_type") == "tv"]
|
|
|
|
logger.info(f"Starting FULL historical sync for {len(tv_shows)} TV shows")
|
|
|
|
# Check each TV show for ALL aired episodes
|
|
for show in tv_shows:
|
|
tv_id = show.get("id")
|
|
if not tv_id:
|
|
continue
|
|
|
|
# Rate limiting
|
|
await asyncio.sleep(0.25)
|
|
|
|
# Get show details
|
|
show_details = await self.get_tv_show_details(tv_id)
|
|
if not show_details:
|
|
continue
|
|
|
|
show_name = show_details.get("name")
|
|
networks = show_details.get("networks", [])
|
|
network = networks[0].get("name", "") if networks else ""
|
|
poster_url = show_details.get("poster_path")
|
|
num_seasons = show_details.get("number_of_seasons", 0)
|
|
|
|
if num_seasons == 0:
|
|
continue
|
|
|
|
logger.info(f" Processing '{show_name}' ({num_seasons} seasons)")
|
|
|
|
# Get all episodes from ALL seasons
|
|
for season_num in range(1, num_seasons + 1):
|
|
# Rate limiting
|
|
await asyncio.sleep(0.25)
|
|
|
|
season_details = await self.get_tv_season_details(tv_id, season_num)
|
|
if not season_details:
|
|
continue
|
|
|
|
episodes = season_details.get("episodes", [])
|
|
|
|
# Get all aired episodes from this season
|
|
for episode in episodes:
|
|
ep_air_date_str = episode.get("air_date")
|
|
if not ep_air_date_str:
|
|
continue
|
|
|
|
try:
|
|
ep_air_date = datetime.strptime(ep_air_date_str, "%Y-%m-%d").date()
|
|
except ValueError:
|
|
continue
|
|
|
|
# Only include episodes that have already aired
|
|
if ep_air_date < today:
|
|
appearances.append({
|
|
"tmdb_show_id": tv_id,
|
|
"show_name": show_name,
|
|
"network": network,
|
|
"episode_title": episode.get("name"),
|
|
"season_number": season_num,
|
|
"episode_number": episode.get("episode_number"),
|
|
"appearance_date": ep_air_date_str,
|
|
"description": episode.get("overview"),
|
|
"poster_url": poster_url,
|
|
"tmdb_episode_id": episode.get("id"),
|
|
"status": "aired"
|
|
})
|
|
|
|
logger.info(f" Found {sum(1 for a in appearances if a['show_name'] == show_name)} aired episodes")
|
|
|
|
logger.info(f"FULL historical sync complete: {len(appearances)} total aired episodes for person {person_id}")
|
|
return appearances
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in full historical sync for person {person_id}: {e}")
|
|
return []
|
|
|
|
async def get_movie_details(self, movie_id: int) -> Optional[Dict]:
|
|
"""Get movie details including release date"""
|
|
try:
|
|
url = f"{self.BASE_URL}/movie/{movie_id}"
|
|
params = {"api_key": self.api_key}
|
|
|
|
response = await http_client.get(url, params=params)
|
|
return response.json()
|
|
|
|
except Exception as e:
|
|
logger.error(f"TMDb get movie error for movie_id {movie_id}: {e}")
|
|
return None
|
|
|
|
async def find_all_movie_appearances(self, person_id: int) -> List[Dict]:
|
|
"""
|
|
Find ALL movie appearances for a person (past, present, future).
|
|
Includes both cast (acting/guest) and crew (directing/producing/writing) credits.
|
|
Returns complete filmography with credit types.
|
|
"""
|
|
appearances = []
|
|
today = datetime.now().date()
|
|
|
|
try:
|
|
# Get all movie credits (cast AND crew)
|
|
credits = await self.get_person_combined_credits(person_id)
|
|
cast_movies = [movie for movie in credits.get("cast", []) if movie.get("media_type") == "movie"]
|
|
crew_movies = [movie for movie in credits.get("crew", []) if movie.get("media_type") == "movie"]
|
|
|
|
# Process and dedupe by movie_id + credit_type
|
|
processed_movies = {}
|
|
|
|
# Process cast (acting or guest credits)
|
|
for movie in cast_movies:
|
|
movie_id = movie.get("id")
|
|
if not movie_id:
|
|
continue
|
|
|
|
character = movie.get('character')
|
|
# For movies, guest = playing themselves (documentaries, etc.)
|
|
is_guest = self._is_guest_appearance(character, episode_count=1, genre_ids=None)
|
|
credit_type = 'guest' if is_guest else 'acting'
|
|
|
|
key = (movie_id, credit_type)
|
|
if key not in processed_movies:
|
|
processed_movies[key] = {
|
|
'movie': movie,
|
|
'credit_type': credit_type,
|
|
'character_name': character,
|
|
'job_title': None,
|
|
}
|
|
|
|
# Process crew (directing, producing, writing credits)
|
|
for movie in crew_movies:
|
|
movie_id = movie.get("id")
|
|
if not movie_id:
|
|
continue
|
|
|
|
credit_type = self._map_department_to_credit_type(
|
|
movie.get('department'),
|
|
movie.get('job')
|
|
)
|
|
key = (movie_id, credit_type)
|
|
if key not in processed_movies:
|
|
processed_movies[key] = {
|
|
'movie': movie,
|
|
'credit_type': credit_type,
|
|
'character_name': None,
|
|
'job_title': movie.get('job'),
|
|
}
|
|
|
|
logger.info(f"Processing {len(processed_movies)} movies for complete filmography (cast + crew)")
|
|
|
|
# Check each movie
|
|
for (movie_id, credit_type), movie_data in processed_movies.items():
|
|
# Rate limiting
|
|
await asyncio.sleep(0.25)
|
|
|
|
# Get movie details
|
|
movie_details = await self.get_movie_details(movie_id)
|
|
if not movie_details:
|
|
continue
|
|
|
|
# Check release date
|
|
release_date_str = movie_details.get("release_date")
|
|
if release_date_str:
|
|
try:
|
|
release_date = datetime.strptime(release_date_str, "%Y-%m-%d").date()
|
|
except ValueError:
|
|
continue
|
|
|
|
# Get production companies/studios
|
|
studios = movie_details.get("production_companies", [])
|
|
studio_name = studios[0].get("name") if studios else ""
|
|
|
|
# Determine status
|
|
status = "aired" if release_date < today else "upcoming"
|
|
|
|
appearances.append({
|
|
"tmdb_movie_id": movie_id,
|
|
"movie_name": movie_details.get("title"),
|
|
"studio": studio_name,
|
|
"release_date": release_date_str,
|
|
"description": movie_details.get("overview"),
|
|
"poster_url": movie_details.get("poster_path"),
|
|
"runtime": movie_details.get("runtime"),
|
|
"status": status,
|
|
"credit_type": movie_data['credit_type'],
|
|
"character_name": movie_data['character_name'],
|
|
"job_title": movie_data['job_title'],
|
|
})
|
|
|
|
logger.info(f"Found {len(appearances)} total movies (complete filmography)")
|
|
return appearances
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error finding complete filmography for person {person_id}: {e}")
|
|
return []
|
|
|
|
async def find_upcoming_movie_appearances(self, person_id: int) -> List[Dict]:
|
|
"""
|
|
Find all upcoming movie releases for a person.
|
|
Includes both cast (acting/guest) and crew (directing/producing/writing) credits.
|
|
Returns list of upcoming/recent movies with release dates.
|
|
"""
|
|
appearances = []
|
|
today = datetime.now().date()
|
|
# Look back 30 days and forward 365 days
|
|
cutoff_past = today - timedelta(days=30)
|
|
cutoff_future = today + timedelta(days=365)
|
|
|
|
try:
|
|
# Get all movie credits (cast AND crew)
|
|
credits = await self.get_person_combined_credits(person_id)
|
|
cast_movies = [movie for movie in credits.get("cast", []) if movie.get("media_type") == "movie"]
|
|
crew_movies = [movie for movie in credits.get("crew", []) if movie.get("media_type") == "movie"]
|
|
|
|
# Process and dedupe by movie_id + credit_type
|
|
processed_movies = {}
|
|
|
|
# Process cast (acting or guest credits)
|
|
for movie in cast_movies:
|
|
movie_id = movie.get("id")
|
|
if not movie_id:
|
|
continue
|
|
|
|
character = movie.get('character')
|
|
# For movies, guest = playing themselves (documentaries, etc.)
|
|
is_guest = self._is_guest_appearance(character, episode_count=1, genre_ids=None)
|
|
credit_type = 'guest' if is_guest else 'acting'
|
|
|
|
key = (movie_id, credit_type)
|
|
if key not in processed_movies:
|
|
processed_movies[key] = {
|
|
'movie': movie,
|
|
'credit_type': credit_type,
|
|
'character_name': character,
|
|
'job_title': None,
|
|
}
|
|
|
|
# Process crew (directing, producing, writing credits)
|
|
for movie in crew_movies:
|
|
movie_id = movie.get("id")
|
|
if not movie_id:
|
|
continue
|
|
|
|
credit_type = self._map_department_to_credit_type(
|
|
movie.get('department'),
|
|
movie.get('job')
|
|
)
|
|
key = (movie_id, credit_type)
|
|
if key not in processed_movies:
|
|
processed_movies[key] = {
|
|
'movie': movie,
|
|
'credit_type': credit_type,
|
|
'character_name': None,
|
|
'job_title': movie.get('job'),
|
|
}
|
|
|
|
# Check each movie for upcoming/recent releases
|
|
for (movie_id, credit_type), movie_data in processed_movies.items():
|
|
# Rate limiting
|
|
await asyncio.sleep(0.25)
|
|
|
|
# Get movie details
|
|
movie_details = await self.get_movie_details(movie_id)
|
|
if not movie_details:
|
|
continue
|
|
|
|
# Check release date
|
|
release_date_str = movie_details.get("release_date")
|
|
if release_date_str:
|
|
try:
|
|
release_date = datetime.strptime(release_date_str, "%Y-%m-%d").date()
|
|
|
|
# Include if within window (30 days past to 365 days future)
|
|
if cutoff_past <= release_date <= cutoff_future:
|
|
# Get production companies/studios
|
|
studios = movie_details.get("production_companies", [])
|
|
studio_name = studios[0].get("name") if studios else ""
|
|
|
|
appearances.append({
|
|
"tmdb_movie_id": movie_id,
|
|
"movie_name": movie_details.get("title"),
|
|
"studio": studio_name,
|
|
"release_date": release_date_str,
|
|
"description": movie_details.get("overview"),
|
|
"poster_url": movie_details.get("poster_path"),
|
|
"runtime": movie_details.get("runtime"),
|
|
"credit_type": movie_data['credit_type'],
|
|
"character_name": movie_data['character_name'],
|
|
"job_title": movie_data['job_title'],
|
|
})
|
|
except ValueError:
|
|
continue
|
|
|
|
return appearances
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error finding upcoming movie appearances for person {person_id}: {e}")
|
|
return []
|
|
|
|
def _determine_credit_type(self, character: str, episode_count: int, genre_ids: List[int] = None,
|
|
show_name: str = None, person_name: str = None, total_episodes: int = None) -> str:
|
|
"""
|
|
Determine the credit type for a TV appearance.
|
|
|
|
Args:
|
|
character: Character name from TMDB (e.g., 'Self', 'Gabrielle Solis')
|
|
episode_count: Number of episodes appeared in
|
|
genre_ids: List of TMDB genre IDs for the show
|
|
show_name: Name of the TV show
|
|
person_name: Name of the person (to check if they're the host/star of the show)
|
|
total_episodes: Total episodes in the show (to determine if they're main cast)
|
|
|
|
Returns:
|
|
Credit type: 'acting', 'host', or 'guest'
|
|
"""
|
|
character_lower = (character or '').lower().strip()
|
|
|
|
# Explicit guest indicators - "Self - Guest", "Herself - Interview", etc.
|
|
if 'guest' in character_lower or 'interview' in character_lower:
|
|
return 'guest'
|
|
|
|
# Voice roles in animated shows (e.g., "Isabel Gutiérrez (voice)") are guest appearances
|
|
# unless they're main cast (many episodes). Single/few episode voice roles = guest.
|
|
if '(voice)' in character_lower and episode_count <= 3:
|
|
return 'guest'
|
|
|
|
# Check if playing themselves (but not explicitly as guest)
|
|
is_playing_self = character_lower in ['self', 'herself', 'himself', 'themselves', 'themself']
|
|
|
|
# "Self - Host" = they're the host
|
|
if 'host' in character_lower or 'presenter' in character_lower:
|
|
return 'host'
|
|
|
|
# Plain "Self - " with other descriptions (not guest/interview) = check further
|
|
if character_lower.startswith(('self ', 'self-', 'herself ', 'himself ')):
|
|
is_playing_self = True
|
|
|
|
# Check if this is the person's own show (their name in the title)
|
|
is_their_show = False
|
|
if person_name and show_name:
|
|
person_parts = person_name.lower().split()
|
|
show_lower = show_name.lower()
|
|
for part in person_parts:
|
|
if len(part) > 2 and part in show_lower:
|
|
is_their_show = True
|
|
break
|
|
|
|
# If their name is in the show title and they play themselves, they're the HOST
|
|
# (e.g., "Ellen DeGeneres" on "The Ellen DeGeneres Show")
|
|
if is_their_show and is_playing_self:
|
|
return 'host'
|
|
|
|
# If they appear in most episodes, they're main cast
|
|
is_main_cast = False
|
|
if total_episodes and total_episodes > 0 and episode_count > 0:
|
|
appearance_ratio = episode_count / total_episodes
|
|
if episode_count >= 5 or appearance_ratio >= 0.3:
|
|
is_main_cast = True
|
|
|
|
# Main cast playing a CHARACTER = acting (not playing themselves)
|
|
# Note: We do NOT automatically mark main cast playing themselves as "host"
|
|
# because that would incorrectly label recurring guests as hosts.
|
|
# Only the show's actual host (name in title or explicit "Host" label) gets 'host'.
|
|
if is_main_cast and not is_playing_self:
|
|
return 'acting'
|
|
|
|
# Saturday Night Live special handling - only repertory players are 'acting'
|
|
# Everyone else (hosts, musical guests, cameos) should be 'host' or 'guest'
|
|
# SNL has ~20+ episodes per season, so main cast would have episode_count >= 5
|
|
if show_name and 'saturday night live' in show_name.lower():
|
|
if episode_count <= 3: # Hosts/guests appear in 1-2 episodes typically
|
|
# If they played themselves, they were a host (or cameo)
|
|
if is_playing_self or 'host' in character_lower:
|
|
return 'host'
|
|
return 'guest'
|
|
|
|
# Entertainment news/talk shows = always guest (unless it's their own show)
|
|
GUEST_SHOW_PATTERNS = [
|
|
'entertainment tonight',
|
|
'e! true hollywood story',
|
|
'true hollywood story',
|
|
'access hollywood',
|
|
'extra',
|
|
'inside edition',
|
|
'e! news',
|
|
'et canada',
|
|
'e.t. canada',
|
|
'hollywood access',
|
|
'the insider',
|
|
'omg! insider',
|
|
'celebrity page',
|
|
'dish nation',
|
|
'hollywood today',
|
|
]
|
|
|
|
if show_name:
|
|
show_name_lower = show_name.lower().strip()
|
|
for pattern in GUEST_SHOW_PATTERNS:
|
|
if pattern in show_name_lower:
|
|
return 'guest'
|
|
|
|
# Talk show / variety / game show genre IDs from TMDB with limited episodes = guest
|
|
# 10767 = Talk, 10763 = News, 10764 = Reality (includes game shows)
|
|
guest_show_genres = {10767, 10763, 10764}
|
|
if genre_ids and guest_show_genres.intersection(set(genre_ids)):
|
|
if episode_count <= 2:
|
|
return 'guest'
|
|
|
|
# Playing themselves on a show that's NOT theirs = guest (regardless of episode count)
|
|
# This covers recurring talk show guests who appear multiple times
|
|
if is_playing_self:
|
|
return 'guest'
|
|
|
|
# Default: acting (playing a character in a show)
|
|
return 'acting'
|
|
|
|
def _is_guest_appearance(self, character: str, episode_count: int, genre_ids: List[int] = None,
|
|
show_name: str = None, person_name: str = None, total_episodes: int = None) -> bool:
|
|
"""Legacy wrapper - returns True if credit_type is 'guest'"""
|
|
credit_type = self._determine_credit_type(character, episode_count, genre_ids, show_name, person_name, total_episodes)
|
|
return credit_type == 'guest'
|
|
|
|
def _map_department_to_credit_type(self, department: str, job: str) -> str:
|
|
"""
|
|
Map TMDB department/job to our credit_type values.
|
|
|
|
Args:
|
|
department: TMDB department (e.g., 'Directing', 'Production', 'Writing')
|
|
job: TMDB job title (e.g., 'Director', 'Executive Producer', 'Screenplay')
|
|
|
|
Returns:
|
|
Credit type: 'acting', 'directing', 'producing', 'writing', 'creator', 'guest'
|
|
"""
|
|
if not department:
|
|
return 'acting' # Default for cast entries
|
|
|
|
department_lower = department.lower()
|
|
job_lower = (job or '').lower()
|
|
|
|
# Check for creator role first
|
|
if 'creator' in job_lower or 'created by' in job_lower:
|
|
return 'creator'
|
|
|
|
# Map by department
|
|
if department_lower == 'directing':
|
|
return 'directing'
|
|
elif department_lower == 'production':
|
|
return 'producing'
|
|
elif department_lower == 'writing':
|
|
return 'writing'
|
|
elif department_lower == 'acting':
|
|
return 'acting'
|
|
else:
|
|
# Other crew roles map to producing as a catch-all
|
|
# (Sound, Art, Camera, Costume, Crew, Editing, Visual Effects, Lighting)
|
|
return 'producing'
|
|
|
|
async def find_all_tv_appearances_with_credits(self, person_id: int, person_name: str = "", progress_callback=None) -> List[Dict]:
|
|
"""
|
|
Find ALL TV show appearances for a person with credit type information.
|
|
Includes both cast (acting) and crew (directing, producing, writing, creator) credits.
|
|
|
|
For all cast roles (acting and guest), fetches specific episode appearances.
|
|
For crew credits, creates show-level entries.
|
|
|
|
Args:
|
|
person_id: TMDb person ID
|
|
person_name: Person's name for matching in episode credits
|
|
progress_callback: Optional callback(current_show, shows_processed, total_shows)
|
|
|
|
Returns:
|
|
List of TV appearances with credit_type, character_name, and job_title
|
|
"""
|
|
appearances = []
|
|
today = datetime.now().date()
|
|
|
|
# Threshold: if credited for more episodes, treat as regular cast and sync all episodes
|
|
REGULAR_CAST_THRESHOLD = 10
|
|
|
|
try:
|
|
# Get all combined credits
|
|
credits = await self.get_person_combined_credits(person_id)
|
|
|
|
# Process cast credits
|
|
cast_shows = [show for show in credits.get("cast", []) if show.get("media_type") == "tv"]
|
|
# Process crew credits
|
|
crew_shows = [show for show in credits.get("crew", []) if show.get("media_type") == "tv"]
|
|
|
|
# Combine and dedupe by tv_id + credit_type
|
|
processed_shows = {}
|
|
|
|
# Process cast (acting or guest credits)
|
|
for show in cast_shows:
|
|
tv_id = show.get("id")
|
|
if not tv_id:
|
|
continue
|
|
|
|
character = show.get('character')
|
|
episode_count = show.get('episode_count', 0)
|
|
genre_ids = show.get('genre_ids', [])
|
|
show_name = show.get('name') or show.get('original_name', '')
|
|
# Get total episodes from the show data if available
|
|
total_episodes = show.get('number_of_episodes') or show.get('episode_count', 0)
|
|
|
|
# Determine credit type (acting, host, or guest)
|
|
# Pass person_name to check if they're the star/host of the show
|
|
credit_type = self._determine_credit_type(
|
|
character, episode_count, genre_ids, show_name,
|
|
person_name=person_name, total_episodes=total_episodes
|
|
)
|
|
|
|
# Determine if we should sync all episodes or just show-level
|
|
# Regular cast/host (many episodes, not a guest) = sync all episodes
|
|
# Guest/few episodes = show-level only
|
|
sync_all_episodes = (credit_type in ('acting', 'host') and episode_count >= REGULAR_CAST_THRESHOLD)
|
|
|
|
key = (tv_id, credit_type)
|
|
if key not in processed_shows:
|
|
processed_shows[key] = {
|
|
'show': show,
|
|
'credit_type': credit_type,
|
|
'character_name': character,
|
|
'job_title': None,
|
|
'episode_count': episode_count,
|
|
'sync_all_episodes': sync_all_episodes
|
|
}
|
|
|
|
# Process crew (other credit types) - always show-level only
|
|
for show in crew_shows:
|
|
tv_id = show.get("id")
|
|
if not tv_id:
|
|
continue
|
|
credit_type = self._map_department_to_credit_type(
|
|
show.get('department'),
|
|
show.get('job')
|
|
)
|
|
key = (tv_id, credit_type)
|
|
if key not in processed_shows:
|
|
processed_shows[key] = {
|
|
'show': show,
|
|
'credit_type': credit_type,
|
|
'character_name': None,
|
|
'job_title': show.get('job'),
|
|
'episode_count': show.get('episode_count', 0),
|
|
'sync_all_episodes': False # Crew credits are always show-level
|
|
}
|
|
|
|
# Count by type for logging
|
|
total_shows = len(processed_shows)
|
|
logger.info(f"Processing {total_shows} TV shows for episode-level data")
|
|
|
|
# Process each show/credit combo
|
|
shows_processed = 0
|
|
for (tv_id, credit_type), show_data in processed_shows.items():
|
|
show = show_data['show']
|
|
preview_name = show.get('name') or show.get('original_name', 'Unknown')
|
|
|
|
# Update progress callback
|
|
if progress_callback:
|
|
progress_callback(preview_name, shows_processed, total_shows)
|
|
|
|
# Rate limiting
|
|
await asyncio.sleep(0.25)
|
|
|
|
# Get show details
|
|
show_details = await self.get_tv_show_details(tv_id)
|
|
if not show_details:
|
|
shows_processed += 1
|
|
continue
|
|
|
|
show_name = show_details.get("name")
|
|
networks = show_details.get("networks", [])
|
|
network = networks[0].get("name", "") if networks else ""
|
|
poster_url = show_details.get("poster_path")
|
|
num_seasons = show_details.get("number_of_seasons", 0)
|
|
first_air_date = show_details.get("first_air_date")
|
|
|
|
# For ALL cast appearances (acting, host, guest), fetch specific episode appearances
|
|
# This ensures we only get episodes where the person actually appeared
|
|
if show_data['credit_type'] in ('guest', 'acting', 'host') and person_name:
|
|
cast_episodes = await self.get_person_tv_episodes(person_id, tv_id, person_name, show_data['credit_type'])
|
|
if cast_episodes:
|
|
# Update credit_type and add status to each episode
|
|
for ep in cast_episodes:
|
|
# Use the show's credit_type for consistent labeling
|
|
# (host stays host, acting stays acting, guest can be overridden)
|
|
if show_data['credit_type'] in ('host', 'acting'):
|
|
ep['credit_type'] = show_data['credit_type']
|
|
ep['character_name'] = show_data['character_name'] or ep.get('character_name')
|
|
try:
|
|
ep_date = datetime.strptime(ep['appearance_date'], "%Y-%m-%d").date()
|
|
ep['status'] = "aired" if ep_date < today else "upcoming"
|
|
except (ValueError, TypeError):
|
|
ep['status'] = "aired"
|
|
appearances.extend(cast_episodes)
|
|
shows_processed += 1
|
|
continue
|
|
# Fall back to show-level if no episodes found
|
|
|
|
# For crew credits (directing, producing, writing), fetch specific episode appearances
|
|
if show_data['credit_type'] in ('directing', 'producing', 'writing', 'creator') and person_name:
|
|
crew_episodes = await self.get_person_tv_crew_episodes(
|
|
person_id, tv_id, person_name,
|
|
show_data['credit_type'], show_data['job_title']
|
|
)
|
|
if crew_episodes:
|
|
# Add status to each episode
|
|
for ep in crew_episodes:
|
|
try:
|
|
ep_date = datetime.strptime(ep['appearance_date'], "%Y-%m-%d").date()
|
|
ep['status'] = "aired" if ep_date < today else "upcoming"
|
|
except (ValueError, TypeError):
|
|
ep['status'] = "aired"
|
|
appearances.extend(crew_episodes)
|
|
shows_processed += 1
|
|
continue
|
|
# Fall back to show-level if no episodes found
|
|
|
|
# For show-level entries (fallback), create single entry
|
|
# Use first air date as appearance date for show-level entries
|
|
# If no first_air_date, use 1900-01-01 as placeholder (means "in production")
|
|
air_date_str = first_air_date or "1900-01-01"
|
|
try:
|
|
air_date = datetime.strptime(air_date_str, "%Y-%m-%d").date()
|
|
# 1900-01-01 means no release date = upcoming/in production
|
|
if air_date_str == "1900-01-01":
|
|
status = "upcoming"
|
|
else:
|
|
status = "aired" if air_date < today else "upcoming"
|
|
except ValueError:
|
|
status = "upcoming" # Unknown date = upcoming
|
|
|
|
appearances.append({
|
|
"tmdb_show_id": tv_id,
|
|
"show_name": show_name,
|
|
"network": network,
|
|
"episode_title": None,
|
|
"season_number": 0, # 0 indicates show-level entry
|
|
"episode_number": 0,
|
|
"appearance_date": air_date_str,
|
|
"description": show_details.get("overview"),
|
|
"poster_url": poster_url,
|
|
"tmdb_episode_id": None,
|
|
"status": status,
|
|
"credit_type": show_data['credit_type'],
|
|
"character_name": show_data['character_name'],
|
|
"job_title": show_data['job_title'],
|
|
})
|
|
shows_processed += 1
|
|
|
|
logger.info(f"FULL historical sync complete: {len(appearances)} total TV appearances with credits")
|
|
return appearances
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in full TV historical sync with credits for person {person_id}: {e}")
|
|
return []
|
|
|
|
async def find_all_movie_appearances_with_credits(self, person_id: int) -> List[Dict]:
|
|
"""
|
|
Find ALL movie appearances for a person with credit type information.
|
|
Includes both cast (acting) and crew (directing, producing, writing) credits.
|
|
|
|
Args:
|
|
person_id: TMDb person ID
|
|
|
|
Returns:
|
|
List of ALL movie appearances with credit_type, character_name, and job_title
|
|
"""
|
|
appearances = []
|
|
today = datetime.now().date()
|
|
|
|
try:
|
|
# Get all combined credits
|
|
credits = await self.get_person_combined_credits(person_id)
|
|
|
|
# Process cast credits
|
|
cast_movies = [movie for movie in credits.get("cast", []) if movie.get("media_type") == "movie"]
|
|
# Process crew credits
|
|
crew_movies = [movie for movie in credits.get("crew", []) if movie.get("media_type") == "movie"]
|
|
|
|
# Combine and dedupe by movie_id + credit_type
|
|
processed_movies = {}
|
|
|
|
# Process cast (acting or guest credits)
|
|
for movie in cast_movies:
|
|
movie_id = movie.get("id")
|
|
if not movie_id:
|
|
continue
|
|
|
|
character = movie.get('character')
|
|
# For movies, guest = playing themselves (documentaries, etc.)
|
|
is_guest = self._is_guest_appearance(character, episode_count=1, genre_ids=None)
|
|
credit_type = 'guest' if is_guest else 'acting'
|
|
|
|
key = (movie_id, credit_type)
|
|
if key not in processed_movies:
|
|
processed_movies[key] = {
|
|
'movie': movie,
|
|
'credit_type': credit_type,
|
|
'character_name': character,
|
|
'job_title': None,
|
|
}
|
|
|
|
# Process crew (other credit types)
|
|
for movie in crew_movies:
|
|
movie_id = movie.get("id")
|
|
if not movie_id:
|
|
continue
|
|
credit_type = self._map_department_to_credit_type(
|
|
movie.get('department'),
|
|
movie.get('job')
|
|
)
|
|
key = (movie_id, credit_type)
|
|
if key not in processed_movies:
|
|
processed_movies[key] = {
|
|
'movie': movie,
|
|
'credit_type': credit_type,
|
|
'character_name': None,
|
|
'job_title': movie.get('job'),
|
|
}
|
|
|
|
logger.info(f"Processing {len(processed_movies)} movie credits (cast + crew)")
|
|
|
|
# Process each movie/credit combo
|
|
for (movie_id, credit_type), movie_data in processed_movies.items():
|
|
movie = movie_data['movie']
|
|
|
|
# Rate limiting
|
|
await asyncio.sleep(0.25)
|
|
|
|
# Get movie details
|
|
movie_details = await self.get_movie_details(movie_id)
|
|
if not movie_details:
|
|
continue
|
|
|
|
# Check release date
|
|
release_date_str = movie_details.get("release_date")
|
|
if not release_date_str:
|
|
continue
|
|
|
|
try:
|
|
release_date = datetime.strptime(release_date_str, "%Y-%m-%d").date()
|
|
except ValueError:
|
|
continue
|
|
|
|
# Get production companies/studios
|
|
studios = movie_details.get("production_companies", [])
|
|
studio_name = studios[0].get("name") if studios else ""
|
|
|
|
# Determine status
|
|
status = "aired" if release_date < today else "upcoming"
|
|
|
|
appearances.append({
|
|
"tmdb_movie_id": movie_id,
|
|
"movie_name": movie_details.get("title"),
|
|
"studio": studio_name,
|
|
"release_date": release_date_str,
|
|
"description": movie_details.get("overview"),
|
|
"poster_url": movie_details.get("poster_path"),
|
|
"runtime": movie_details.get("runtime"),
|
|
"status": status,
|
|
"credit_type": movie_data['credit_type'],
|
|
"character_name": movie_data['character_name'],
|
|
"job_title": movie_data['job_title'],
|
|
})
|
|
|
|
logger.info(f"Found {len(appearances)} total movie credits (complete filmography with credits)")
|
|
return appearances
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error finding complete movie filmography with credits for person {person_id}: {e}")
|
|
return []
|
|
|
|
async def get_person_details(self, person_id: int) -> Optional[Dict]:
|
|
"""Get person details including credits where they are show creator"""
|
|
try:
|
|
url = f"{self.BASE_URL}/person/{person_id}"
|
|
params = {"api_key": self.api_key}
|
|
|
|
response = await http_client.get(url, params=params)
|
|
return response.json()
|
|
|
|
except Exception as e:
|
|
logger.error(f"TMDb get person error for person_id {person_id}: {e}")
|
|
return None
|