"""TMDb API client for celebrity appearances tracking""" import asyncio from datetime import datetime, timedelta from typing import Dict, List, Optional from web.backend.core.http_client import http_client from modules.universal_logger import get_logger logger = get_logger('TMDb') class TMDbClient: """Client for interacting with The Movie Database API""" BASE_URL = "https://api.themoviedb.org/3" # Shows where celebrity guests are featured in episode titles # These shows don't always have proper TMDB credits, so we scan episode titles CELEBRITY_TITLE_SHOWS = [ {"id": 100347, "name": "Celebrity IOU"}, {"id": 3956, "name": "Who Do You Think You Are?"}, {"id": 43221, "name": "Finding Your Roots"}, {"id": 59717, "name": "Comedians in Cars Getting Coffee"}, {"id": 61120, "name": "Running Wild with Bear Grylls"}, {"id": 72610, "name": "Carpool Karaoke: The Series"}, {"id": 72649, "name": "Hot Ones"}, {"id": 82108, "name": "Celebrity Family Feud"}, ] def __init__(self, api_key: str): self.api_key = api_key async def search_person(self, name: str) -> Optional[Dict]: """Search for person by name, returns first result""" try: url = f"{self.BASE_URL}/search/person" params = { "api_key": self.api_key, "query": name, "page": 1 } response = await http_client.get(url, params=params) data = response.json() if data.get("results"): return data["results"][0] return None except Exception as e: logger.error(f"TMDb search person error for '{name}': {e}") return None async def get_person_combined_credits(self, person_id: int) -> Dict: """Get all TV/movie credits for a person""" try: url = f"{self.BASE_URL}/person/{person_id}/combined_credits" params = {"api_key": self.api_key} response = await http_client.get(url, params=params) return response.json() except Exception as e: logger.error(f"TMDb get credits error for person_id {person_id}: {e}") return {"cast": [], "crew": []} async def get_tv_show_details(self, tv_id: int) -> Optional[Dict]: """Get TV show details including next episode air date""" try: url = f"{self.BASE_URL}/tv/{tv_id}" params = {"api_key": self.api_key} response = await http_client.get(url, params=params) return response.json() except Exception as e: logger.error(f"TMDb get TV show error for tv_id {tv_id}: {e}") return None async def get_tv_season_details(self, tv_id: int, season_number: int) -> Optional[Dict]: """Get season details with episode list and air dates""" try: url = f"{self.BASE_URL}/tv/{tv_id}/season/{season_number}" params = {"api_key": self.api_key} response = await http_client.get(url, params=params) return response.json() except Exception as e: logger.error(f"TMDb get season error for tv_id {tv_id} season {season_number}: {e}") return None async def get_tv_aggregate_credits(self, tv_id: int) -> Optional[Dict]: """Get aggregate credits for a TV show - includes all cast/guest appearances with episode details""" try: url = f"{self.BASE_URL}/tv/{tv_id}/aggregate_credits" params = {"api_key": self.api_key} response = await http_client.get(url, params=params) return response.json() except Exception as e: logger.error(f"TMDb get aggregate credits error for tv_id {tv_id}: {e}") return None async def get_episode_credits(self, tv_id: int, season_number: int, episode_number: int) -> Optional[Dict]: """Get credits for a specific TV episode including cast and guest_stars""" try: url = f"{self.BASE_URL}/tv/{tv_id}/season/{season_number}/episode/{episode_number}/credits" params = {"api_key": self.api_key} response = await http_client.get(url, params=params) return response.json() except Exception as e: logger.error(f"TMDb get episode credits error for tv_id {tv_id} S{season_number}E{episode_number}: {e}") return None async def is_person_in_episode(self, person_id: int, tv_id: int, season_number: int, episode_number: int) -> bool: """Check if a person is credited in a specific episode's cast or guest_stars""" try: credits = await self.get_episode_credits(tv_id, season_number, episode_number) if not credits: return False # Check cast for cast_member in credits.get('cast', []): if cast_member.get('id') == person_id: return True # Check guest_stars for guest in credits.get('guest_stars', []): if guest.get('id') == person_id: return True return False except Exception as e: logger.error(f"TMDb check person in episode error: {e}") return False async def find_appearances_by_episode_title(self, celebrity_name: str, lookback_days: int = 30, lookahead_days: int = 90) -> List[Dict]: """ Scan CELEBRITY_TITLE_SHOWS for episodes where the celebrity's name appears in the title. This catches appearances on shows like Celebrity IOU where TMDB doesn't have proper credits but the celebrity's name appears in the episode title. Args: celebrity_name: Name of the celebrity to search for lookback_days: How many days back to check for aired episodes lookahead_days: How many days forward to check for upcoming episodes Returns: List of appearance dicts ready for database insertion """ appearances = [] today = datetime.now().date() start_date = today - timedelta(days=lookback_days) end_date = today + timedelta(days=lookahead_days) # Split name for matching (first name, last name, full name) name_lower = celebrity_name.lower() name_parts = name_lower.split() # Use last name for primary matching (more unique), require 3+ chars search_terms = [name_lower] # Full name if len(name_parts) >= 2: last_name = name_parts[-1] if len(last_name) >= 3: search_terms.append(last_name) for show_info in self.CELEBRITY_TITLE_SHOWS: show_id = show_info["id"] show_name = show_info["name"] try: # Get show details to find current/recent seasons show_details = await self.get_tv_show_details(show_id) if not show_details: continue # Check the most recent seasons (up to 3) num_seasons = show_details.get("number_of_seasons", 0) if num_seasons == 0: continue seasons_to_check = range(max(1, num_seasons - 2), num_seasons + 1) for season_num in seasons_to_check: season_data = await self.get_tv_season_details(show_id, season_num) if not season_data or not season_data.get("episodes"): continue for episode in season_data.get("episodes", []): episode_title = episode.get("name", "") episode_title_lower = episode_title.lower() air_date_str = episode.get("air_date") if not air_date_str: continue try: air_date = datetime.strptime(air_date_str, "%Y-%m-%d").date() except ValueError: continue # Check if within date range if not (start_date <= air_date <= end_date): continue # Check if celebrity name appears in episode title name_found = False for term in search_terms: if term in episode_title_lower: name_found = True break if not name_found: continue # Found a match! Build appearance dict logger.info(f"Found episode title match: '{celebrity_name}' in '{show_name}' - {episode_title}") status = "upcoming" if air_date >= today else "aired" appearances.append({ "appearance_type": "TV", "show_name": show_name, "episode_title": episode_title, "season_number": season_num, "episode_number": episode.get("episode_number"), "appearance_date": air_date_str, "status": status, "tmdb_show_id": show_id, "tmdb_episode_id": episode.get("id"), "description": episode.get("overview"), "poster_url": show_details.get("poster_path"), "credit_type": "guest", # These are always guest appearances "character_name": "Self", "network": show_details.get("networks", [{}])[0].get("name") if show_details.get("networks") else None, }) # Small delay between season requests await asyncio.sleep(0.2) # Small delay between shows await asyncio.sleep(0.3) except Exception as e: logger.error(f"Error scanning {show_name} for {celebrity_name}: {e}") continue return appearances async def get_person_tv_episodes(self, person_id: int, tv_id: int, person_name: str, credit_type: str = "acting") -> List[Dict]: """ Get specific episode appearances for a person on a TV show. Uses aggregate_credits to find which episodes they appeared in. Returns list of episodes with air dates. """ episodes = [] try: # Get show details first show_details = await self.get_tv_show_details(tv_id) if not show_details: return [] show_name = show_details.get("name", "") networks = show_details.get("networks", []) network = networks[0].get("name", "") if networks else "" poster_url = show_details.get("poster_path") num_seasons = show_details.get("number_of_seasons", 0) # Get aggregate credits to find if person is in the show agg_credits = await self.get_tv_aggregate_credits(tv_id) if not agg_credits: return [] # Check if person is in cast (regular or recurring) person_in_cast = False person_character = None person_episode_count = 0 for cast_member in agg_credits.get("cast", []): # Match by ID or name if cast_member.get("id") == person_id or cast_member.get("name", "").lower() == person_name.lower(): person_in_cast = True # Get primary character name and total episode count roles = cast_member.get("roles", []) if roles: # Use the role with most episodes as primary primary_role = max(roles, key=lambda r: r.get("episode_count", 0)) person_character = primary_role.get("character", "Self") person_episode_count = sum(r.get("episode_count", 0) for r in roles) break # Calculate total episodes in the show total_episodes = show_details.get("number_of_episodes", 0) # Determine if person is "main cast" (in most episodes) vs occasional appearance # Main cast criteria: # - For short series (<20 eps): appeared in at least 50% of episodes # - For medium series (20-100 eps): appeared in at least 30% of episodes # - For long series (100+ eps): appeared in at least 20% of episodes AND at least 10 eps is_main_cast = False if person_in_cast and total_episodes > 0: appearance_ratio = person_episode_count / total_episodes if total_episodes < 20: # Short series: need 50%+ is_main_cast = appearance_ratio >= 0.5 elif total_episodes < 100: # Medium series: need 30%+ is_main_cast = appearance_ratio >= 0.3 else: # Long series: need 20%+ AND at least 10 eps is_main_cast = appearance_ratio >= 0.2 and person_episode_count >= 10 # Iterate through seasons to get episode-level details for season_num in range(1, num_seasons + 1): await asyncio.sleep(0.1) # Rate limiting season = await self.get_tv_season_details(tv_id, season_num) if not season: continue for episode in season.get("episodes", []): ep_air_date = episode.get("air_date") if not ep_air_date: continue # Check if person is in guest_stars for this episode guest_stars = episode.get("guest_stars", []) person_is_guest = False guest_character = None for guest in guest_stars: if guest.get("id") == person_id or guest.get("name", "").lower() == person_name.lower(): person_is_guest = True guest_character = guest.get("character") break # Include episode if: # 1. Person is listed as guest in this episode, OR # 2. Person is main cast (in most episodes of the show) if person_is_guest or is_main_cast: # Determine credit type and character ep_credit_type = credit_type ep_character = None if person_is_guest: ep_credit_type = "guest" ep_character = guest_character or "Self" elif is_main_cast: ep_character = person_character or "Self" episodes.append({ "tmdb_show_id": tv_id, "show_name": show_name, "network": network, "episode_title": episode.get("name"), "season_number": episode.get("season_number"), "episode_number": episode.get("episode_number"), "appearance_date": ep_air_date, "description": episode.get("overview"), "poster_url": poster_url, "tmdb_episode_id": episode.get("id"), "credit_type": ep_credit_type, "character_name": ep_character, "job_title": None, }) logger.info(f"Found {len(episodes)} episodes for person {person_id} on {show_name} (in_cast={person_in_cast}, is_main={is_main_cast}, agg_count={person_episode_count}/{total_episodes})") return episodes except Exception as e: logger.error(f"Error getting person episodes for {person_id} on tv {tv_id}: {e}") return [] async def get_person_tv_crew_episodes(self, person_id: int, tv_id: int, person_name: str, credit_type: str, job_title: str = None) -> List[Dict]: """ Get specific episodes where a person worked as crew (director, producer, writer, etc.) Iterates through all episodes and checks the crew array for each. Args: person_id: TMDB person ID tv_id: TMDB TV show ID person_name: Person's name for matching credit_type: The crew credit type (directing, producing, writing, creator) job_title: Specific job title to match (e.g., "Director", "Executive Producer") Returns list of episodes with air dates. """ episodes = [] try: # Get show details first show_details = await self.get_tv_show_details(tv_id) if not show_details: return [] show_name = show_details.get("name", "") networks = show_details.get("networks", []) network = networks[0].get("name", "") if networks else "" poster_url = show_details.get("poster_path") num_seasons = show_details.get("number_of_seasons", 0) # Map credit_type to TMDB department department_map = { 'directing': 'Directing', 'producing': 'Production', 'writing': 'Writing', 'creator': 'Production', # Creators are often listed under Production } target_department = department_map.get(credit_type, credit_type.capitalize()) # Iterate through seasons to get episode-level details for season_num in range(1, num_seasons + 1): await asyncio.sleep(0.1) # Rate limiting season = await self.get_tv_season_details(tv_id, season_num) if not season: continue for episode in season.get("episodes", []): ep_air_date = episode.get("air_date") if not ep_air_date: continue # Check if person is in crew for this episode crew = episode.get("crew", []) person_in_crew = False found_job = None for crew_member in crew: # Match by ID or name if crew_member.get("id") == person_id or crew_member.get("name", "").lower() == person_name.lower(): # Check if department/job matches dept = crew_member.get("department", "") job = crew_member.get("job", "") # Match by department or specific job if dept == target_department or (job_title and job_title.lower() in job.lower()): person_in_crew = True found_job = job break # Also check for Creator in Writing department if credit_type == 'creator' and 'creator' in job.lower(): person_in_crew = True found_job = job break if person_in_crew: episodes.append({ "tmdb_show_id": tv_id, "show_name": show_name, "network": network, "episode_title": episode.get("name"), "season_number": episode.get("season_number"), "episode_number": episode.get("episode_number"), "appearance_date": ep_air_date, "description": episode.get("overview"), "poster_url": poster_url, "tmdb_episode_id": episode.get("id"), "credit_type": credit_type, "character_name": None, "job_title": found_job or job_title, }) logger.info(f"Found {len(episodes)} crew episodes for person {person_id} on {show_name} ({credit_type})") return episodes except Exception as e: logger.error(f"Error getting crew episodes for {person_id} on tv {tv_id}: {e}") return [] async def find_upcoming_tv_appearances(self, person_id: int) -> List[Dict]: """ Find all upcoming TV show appearances for a person. Includes both cast (acting/guest) and crew (directing/producing/writing) credits. Returns list of upcoming episodes with air dates. """ appearances = [] today = datetime.now().date() try: # Get all TV credits (cast AND crew) credits = await self.get_person_combined_credits(person_id) cast_shows = [show for show in credits.get("cast", []) if show.get("media_type") == "tv"] crew_shows = [show for show in credits.get("crew", []) if show.get("media_type") == "tv"] # Process and dedupe by show_id + credit_type processed_shows = {} # Process cast (acting or guest credits) for show in cast_shows: tv_id = show.get("id") if not tv_id: continue character = show.get('character') episode_count = show.get('episode_count', 0) genre_ids = show.get('genre_ids', []) show_name = show.get('name') or show.get('original_name', '') # Determine credit type (acting, guest, host, etc.) # Use _determine_credit_type directly to properly detect hosts (e.g., SNL hosts) credit_type = self._determine_credit_type(character, episode_count, genre_ids, show_name) key = (tv_id, credit_type) if key not in processed_shows: processed_shows[key] = { 'show': show, 'credit_type': credit_type, 'character_name': character, 'job_title': None, } # Process crew (directing, producing, writing credits) for show in crew_shows: tv_id = show.get("id") if not tv_id: continue credit_type = self._map_department_to_credit_type( show.get('department'), show.get('job') ) key = (tv_id, credit_type) if key not in processed_shows: processed_shows[key] = { 'show': show, 'credit_type': credit_type, 'character_name': None, 'job_title': show.get('job'), } # Check each TV show for upcoming episodes for (tv_id, credit_type), show_data in processed_shows.items(): # Rate limiting await asyncio.sleep(0.25) # Get show details show_details = await self.get_tv_show_details(tv_id) if not show_details: continue # Check if show has upcoming episodes next_episode = show_details.get("next_episode_to_air") if next_episode: air_date_str = next_episode.get("air_date") if air_date_str: air_date = datetime.strptime(air_date_str, "%Y-%m-%d").date() if air_date >= today: season_num = next_episode.get("season_number") episode_num = next_episode.get("episode_number") # For guest/host/cameo credits, verify the person is in the episode credits # These are one-time appearances and may not continue to future episodes episode_count = show_data['show'].get('episode_count', 0) requires_verification = ( credit_type in ('guest', 'host', 'cameo', 'self') or episode_count <= 5 # Few episodes suggests guest appearances ) if requires_verification and season_num and episode_num: # Rate limiting for episode credits check await asyncio.sleep(0.25) is_in_episode = await self.is_person_in_episode( person_id, tv_id, season_num, episode_num ) if not is_in_episode: logger.debug( f"Skipping {show_details.get('name')} - person not in episode credits" ) continue appearances.append({ "tmdb_show_id": tv_id, "show_name": show_details.get("name"), "network": show_details.get("networks", [{}])[0].get("name", ""), "episode_title": next_episode.get("name"), "season_number": season_num, "episode_number": episode_num, "appearance_date": air_date_str, "description": next_episode.get("overview"), "poster_url": show_details.get("poster_path"), "tmdb_episode_id": next_episode.get("id"), "credit_type": show_data['credit_type'], "character_name": show_data['character_name'], "job_title": show_data['job_title'], }) return appearances except Exception as e: logger.error(f"Error finding upcoming appearances for person {person_id}: {e}") return [] async def find_recent_aired_tv_appearances(self, person_id: int, days_back: int = 90) -> List[Dict]: """ Find recent aired TV show appearances for a person Goes back through recent seasons to find aired episodes Args: person_id: TMDb person ID days_back: How many days back to look for aired episodes (default 90) Returns: List of aired episodes with air dates """ appearances = [] today = datetime.now().date() cutoff_date = today - timedelta(days=days_back) try: # Get all TV credits credits = await self.get_person_combined_credits(person_id) tv_shows = credits.get("cast", []) # Filter for TV shows only tv_shows = [show for show in tv_shows if show.get("media_type") == "tv"] logger.info(f"Checking {len(tv_shows)} TV shows for recent aired episodes (last {days_back} days)") # Check each TV show for recent aired episodes for show in tv_shows: tv_id = show.get("id") if not tv_id: continue # Rate limiting await asyncio.sleep(0.25) # Get show details show_details = await self.get_tv_show_details(tv_id) if not show_details: continue show_name = show_details.get("name") network = show_details.get("networks", [{}])[0].get("name", "") poster_url = show_details.get("poster_path") # Get the last aired episode last_episode = show_details.get("last_episode_to_air") if not last_episode: continue # Check if last episode is within our window last_air_date_str = last_episode.get("air_date") if not last_air_date_str: continue try: last_air_date = datetime.strptime(last_air_date_str, "%Y-%m-%d").date() except ValueError: continue # If the last episode is too old, skip this show if last_air_date < cutoff_date: continue # Get the season details for the last aired episode last_season = last_episode.get("season_number") if not last_season: continue # Rate limiting await asyncio.sleep(0.25) # Get all episodes from this season season_details = await self.get_tv_season_details(tv_id, last_season) if not season_details: continue episodes = season_details.get("episodes", []) # Filter for episodes within our date range that have aired for episode in episodes: ep_air_date_str = episode.get("air_date") if not ep_air_date_str: continue try: ep_air_date = datetime.strptime(ep_air_date_str, "%Y-%m-%d").date() except ValueError: continue # Only include episodes that: # 1. Have already aired (< today) # 2. Are within our lookback window (>= cutoff_date) if cutoff_date <= ep_air_date < today: appearances.append({ "tmdb_show_id": tv_id, "show_name": show_name, "network": network, "episode_title": episode.get("name"), "season_number": last_season, "episode_number": episode.get("episode_number"), "appearance_date": ep_air_date_str, "description": episode.get("overview"), "poster_url": poster_url, "tmdb_episode_id": episode.get("id"), "status": "aired" }) logger.info(f"Found {len(appearances)} aired episodes for person {person_id}") return appearances except Exception as e: logger.error(f"Error finding aired appearances for person {person_id}: {e}") return [] async def find_all_aired_tv_appearances(self, person_id: int) -> List[Dict]: """ Find ALL aired TV show appearances for a person across all seasons This is a complete historical sync - can be slow for people with many shows Args: person_id: TMDb person ID Returns: List of ALL aired episodes with air dates """ appearances = [] today = datetime.now().date() try: # Get all TV credits credits = await self.get_person_combined_credits(person_id) tv_shows = credits.get("cast", []) # Filter for TV shows only tv_shows = [show for show in tv_shows if show.get("media_type") == "tv"] logger.info(f"Starting FULL historical sync for {len(tv_shows)} TV shows") # Check each TV show for ALL aired episodes for show in tv_shows: tv_id = show.get("id") if not tv_id: continue # Rate limiting await asyncio.sleep(0.25) # Get show details show_details = await self.get_tv_show_details(tv_id) if not show_details: continue show_name = show_details.get("name") networks = show_details.get("networks", []) network = networks[0].get("name", "") if networks else "" poster_url = show_details.get("poster_path") num_seasons = show_details.get("number_of_seasons", 0) if num_seasons == 0: continue logger.info(f" Processing '{show_name}' ({num_seasons} seasons)") # Get all episodes from ALL seasons for season_num in range(1, num_seasons + 1): # Rate limiting await asyncio.sleep(0.25) season_details = await self.get_tv_season_details(tv_id, season_num) if not season_details: continue episodes = season_details.get("episodes", []) # Get all aired episodes from this season for episode in episodes: ep_air_date_str = episode.get("air_date") if not ep_air_date_str: continue try: ep_air_date = datetime.strptime(ep_air_date_str, "%Y-%m-%d").date() except ValueError: continue # Only include episodes that have already aired if ep_air_date < today: appearances.append({ "tmdb_show_id": tv_id, "show_name": show_name, "network": network, "episode_title": episode.get("name"), "season_number": season_num, "episode_number": episode.get("episode_number"), "appearance_date": ep_air_date_str, "description": episode.get("overview"), "poster_url": poster_url, "tmdb_episode_id": episode.get("id"), "status": "aired" }) logger.info(f" Found {sum(1 for a in appearances if a['show_name'] == show_name)} aired episodes") logger.info(f"FULL historical sync complete: {len(appearances)} total aired episodes for person {person_id}") return appearances except Exception as e: logger.error(f"Error in full historical sync for person {person_id}: {e}") return [] async def get_movie_details(self, movie_id: int) -> Optional[Dict]: """Get movie details including release date""" try: url = f"{self.BASE_URL}/movie/{movie_id}" params = {"api_key": self.api_key} response = await http_client.get(url, params=params) return response.json() except Exception as e: logger.error(f"TMDb get movie error for movie_id {movie_id}: {e}") return None async def find_all_movie_appearances(self, person_id: int) -> List[Dict]: """ Find ALL movie appearances for a person (past, present, future). Includes both cast (acting/guest) and crew (directing/producing/writing) credits. Returns complete filmography with credit types. """ appearances = [] today = datetime.now().date() try: # Get all movie credits (cast AND crew) credits = await self.get_person_combined_credits(person_id) cast_movies = [movie for movie in credits.get("cast", []) if movie.get("media_type") == "movie"] crew_movies = [movie for movie in credits.get("crew", []) if movie.get("media_type") == "movie"] # Process and dedupe by movie_id + credit_type processed_movies = {} # Process cast (acting or guest credits) for movie in cast_movies: movie_id = movie.get("id") if not movie_id: continue character = movie.get('character') # For movies, guest = playing themselves (documentaries, etc.) is_guest = self._is_guest_appearance(character, episode_count=1, genre_ids=None) credit_type = 'guest' if is_guest else 'acting' key = (movie_id, credit_type) if key not in processed_movies: processed_movies[key] = { 'movie': movie, 'credit_type': credit_type, 'character_name': character, 'job_title': None, } # Process crew (directing, producing, writing credits) for movie in crew_movies: movie_id = movie.get("id") if not movie_id: continue credit_type = self._map_department_to_credit_type( movie.get('department'), movie.get('job') ) key = (movie_id, credit_type) if key not in processed_movies: processed_movies[key] = { 'movie': movie, 'credit_type': credit_type, 'character_name': None, 'job_title': movie.get('job'), } logger.info(f"Processing {len(processed_movies)} movies for complete filmography (cast + crew)") # Check each movie for (movie_id, credit_type), movie_data in processed_movies.items(): # Rate limiting await asyncio.sleep(0.25) # Get movie details movie_details = await self.get_movie_details(movie_id) if not movie_details: continue # Check release date release_date_str = movie_details.get("release_date") if release_date_str: try: release_date = datetime.strptime(release_date_str, "%Y-%m-%d").date() except ValueError: continue # Get production companies/studios studios = movie_details.get("production_companies", []) studio_name = studios[0].get("name") if studios else "" # Determine status status = "aired" if release_date < today else "upcoming" appearances.append({ "tmdb_movie_id": movie_id, "movie_name": movie_details.get("title"), "studio": studio_name, "release_date": release_date_str, "description": movie_details.get("overview"), "poster_url": movie_details.get("poster_path"), "runtime": movie_details.get("runtime"), "status": status, "credit_type": movie_data['credit_type'], "character_name": movie_data['character_name'], "job_title": movie_data['job_title'], }) logger.info(f"Found {len(appearances)} total movies (complete filmography)") return appearances except Exception as e: logger.error(f"Error finding complete filmography for person {person_id}: {e}") return [] async def find_upcoming_movie_appearances(self, person_id: int) -> List[Dict]: """ Find all upcoming movie releases for a person. Includes both cast (acting/guest) and crew (directing/producing/writing) credits. Returns list of upcoming/recent movies with release dates. """ appearances = [] today = datetime.now().date() # Look back 30 days and forward 365 days cutoff_past = today - timedelta(days=30) cutoff_future = today + timedelta(days=365) try: # Get all movie credits (cast AND crew) credits = await self.get_person_combined_credits(person_id) cast_movies = [movie for movie in credits.get("cast", []) if movie.get("media_type") == "movie"] crew_movies = [movie for movie in credits.get("crew", []) if movie.get("media_type") == "movie"] # Process and dedupe by movie_id + credit_type processed_movies = {} # Process cast (acting or guest credits) for movie in cast_movies: movie_id = movie.get("id") if not movie_id: continue character = movie.get('character') # For movies, guest = playing themselves (documentaries, etc.) is_guest = self._is_guest_appearance(character, episode_count=1, genre_ids=None) credit_type = 'guest' if is_guest else 'acting' key = (movie_id, credit_type) if key not in processed_movies: processed_movies[key] = { 'movie': movie, 'credit_type': credit_type, 'character_name': character, 'job_title': None, } # Process crew (directing, producing, writing credits) for movie in crew_movies: movie_id = movie.get("id") if not movie_id: continue credit_type = self._map_department_to_credit_type( movie.get('department'), movie.get('job') ) key = (movie_id, credit_type) if key not in processed_movies: processed_movies[key] = { 'movie': movie, 'credit_type': credit_type, 'character_name': None, 'job_title': movie.get('job'), } # Check each movie for upcoming/recent releases for (movie_id, credit_type), movie_data in processed_movies.items(): # Rate limiting await asyncio.sleep(0.25) # Get movie details movie_details = await self.get_movie_details(movie_id) if not movie_details: continue # Check release date release_date_str = movie_details.get("release_date") if release_date_str: try: release_date = datetime.strptime(release_date_str, "%Y-%m-%d").date() # Include if within window (30 days past to 365 days future) if cutoff_past <= release_date <= cutoff_future: # Get production companies/studios studios = movie_details.get("production_companies", []) studio_name = studios[0].get("name") if studios else "" appearances.append({ "tmdb_movie_id": movie_id, "movie_name": movie_details.get("title"), "studio": studio_name, "release_date": release_date_str, "description": movie_details.get("overview"), "poster_url": movie_details.get("poster_path"), "runtime": movie_details.get("runtime"), "credit_type": movie_data['credit_type'], "character_name": movie_data['character_name'], "job_title": movie_data['job_title'], }) except ValueError: continue return appearances except Exception as e: logger.error(f"Error finding upcoming movie appearances for person {person_id}: {e}") return [] def _determine_credit_type(self, character: str, episode_count: int, genre_ids: List[int] = None, show_name: str = None, person_name: str = None, total_episodes: int = None) -> str: """ Determine the credit type for a TV appearance. Args: character: Character name from TMDB (e.g., 'Self', 'Gabrielle Solis') episode_count: Number of episodes appeared in genre_ids: List of TMDB genre IDs for the show show_name: Name of the TV show person_name: Name of the person (to check if they're the host/star of the show) total_episodes: Total episodes in the show (to determine if they're main cast) Returns: Credit type: 'acting', 'host', or 'guest' """ character_lower = (character or '').lower().strip() # Explicit guest indicators - "Self - Guest", "Herself - Interview", etc. if 'guest' in character_lower or 'interview' in character_lower: return 'guest' # Voice roles in animated shows (e.g., "Isabel GutiƩrrez (voice)") are guest appearances # unless they're main cast (many episodes). Single/few episode voice roles = guest. if '(voice)' in character_lower and episode_count <= 3: return 'guest' # Check if playing themselves (but not explicitly as guest) is_playing_self = character_lower in ['self', 'herself', 'himself', 'themselves', 'themself'] # "Self - Host" = they're the host if 'host' in character_lower or 'presenter' in character_lower: return 'host' # Plain "Self - " with other descriptions (not guest/interview) = check further if character_lower.startswith(('self ', 'self-', 'herself ', 'himself ')): is_playing_self = True # Check if this is the person's own show (their name in the title) is_their_show = False if person_name and show_name: person_parts = person_name.lower().split() show_lower = show_name.lower() for part in person_parts: if len(part) > 2 and part in show_lower: is_their_show = True break # If their name is in the show title and they play themselves, they're the HOST # (e.g., "Ellen DeGeneres" on "The Ellen DeGeneres Show") if is_their_show and is_playing_self: return 'host' # If they appear in most episodes, they're main cast is_main_cast = False if total_episodes and total_episodes > 0 and episode_count > 0: appearance_ratio = episode_count / total_episodes if episode_count >= 5 or appearance_ratio >= 0.3: is_main_cast = True # Main cast playing a CHARACTER = acting (not playing themselves) # Note: We do NOT automatically mark main cast playing themselves as "host" # because that would incorrectly label recurring guests as hosts. # Only the show's actual host (name in title or explicit "Host" label) gets 'host'. if is_main_cast and not is_playing_self: return 'acting' # Saturday Night Live special handling - only repertory players are 'acting' # Everyone else (hosts, musical guests, cameos) should be 'host' or 'guest' # SNL has ~20+ episodes per season, so main cast would have episode_count >= 5 if show_name and 'saturday night live' in show_name.lower(): if episode_count <= 3: # Hosts/guests appear in 1-2 episodes typically # If they played themselves, they were a host (or cameo) if is_playing_self or 'host' in character_lower: return 'host' return 'guest' # Entertainment news/talk shows = always guest (unless it's their own show) GUEST_SHOW_PATTERNS = [ 'entertainment tonight', 'e! true hollywood story', 'true hollywood story', 'access hollywood', 'extra', 'inside edition', 'e! news', 'et canada', 'e.t. canada', 'hollywood access', 'the insider', 'omg! insider', 'celebrity page', 'dish nation', 'hollywood today', ] if show_name: show_name_lower = show_name.lower().strip() for pattern in GUEST_SHOW_PATTERNS: if pattern in show_name_lower: return 'guest' # Talk show / variety / game show genre IDs from TMDB with limited episodes = guest # 10767 = Talk, 10763 = News, 10764 = Reality (includes game shows) guest_show_genres = {10767, 10763, 10764} if genre_ids and guest_show_genres.intersection(set(genre_ids)): if episode_count <= 2: return 'guest' # Playing themselves on a show that's NOT theirs = guest (regardless of episode count) # This covers recurring talk show guests who appear multiple times if is_playing_self: return 'guest' # Default: acting (playing a character in a show) return 'acting' def _is_guest_appearance(self, character: str, episode_count: int, genre_ids: List[int] = None, show_name: str = None, person_name: str = None, total_episodes: int = None) -> bool: """Legacy wrapper - returns True if credit_type is 'guest'""" credit_type = self._determine_credit_type(character, episode_count, genre_ids, show_name, person_name, total_episodes) return credit_type == 'guest' def _map_department_to_credit_type(self, department: str, job: str) -> str: """ Map TMDB department/job to our credit_type values. Args: department: TMDB department (e.g., 'Directing', 'Production', 'Writing') job: TMDB job title (e.g., 'Director', 'Executive Producer', 'Screenplay') Returns: Credit type: 'acting', 'directing', 'producing', 'writing', 'creator', 'guest' """ if not department: return 'acting' # Default for cast entries department_lower = department.lower() job_lower = (job or '').lower() # Check for creator role first if 'creator' in job_lower or 'created by' in job_lower: return 'creator' # Map by department if department_lower == 'directing': return 'directing' elif department_lower == 'production': return 'producing' elif department_lower == 'writing': return 'writing' elif department_lower == 'acting': return 'acting' else: # Other crew roles map to producing as a catch-all # (Sound, Art, Camera, Costume, Crew, Editing, Visual Effects, Lighting) return 'producing' async def find_all_tv_appearances_with_credits(self, person_id: int, person_name: str = "", progress_callback=None) -> List[Dict]: """ Find ALL TV show appearances for a person with credit type information. Includes both cast (acting) and crew (directing, producing, writing, creator) credits. For all cast roles (acting and guest), fetches specific episode appearances. For crew credits, creates show-level entries. Args: person_id: TMDb person ID person_name: Person's name for matching in episode credits progress_callback: Optional callback(current_show, shows_processed, total_shows) Returns: List of TV appearances with credit_type, character_name, and job_title """ appearances = [] today = datetime.now().date() # Threshold: if credited for more episodes, treat as regular cast and sync all episodes REGULAR_CAST_THRESHOLD = 10 try: # Get all combined credits credits = await self.get_person_combined_credits(person_id) # Process cast credits cast_shows = [show for show in credits.get("cast", []) if show.get("media_type") == "tv"] # Process crew credits crew_shows = [show for show in credits.get("crew", []) if show.get("media_type") == "tv"] # Combine and dedupe by tv_id + credit_type processed_shows = {} # Process cast (acting or guest credits) for show in cast_shows: tv_id = show.get("id") if not tv_id: continue character = show.get('character') episode_count = show.get('episode_count', 0) genre_ids = show.get('genre_ids', []) show_name = show.get('name') or show.get('original_name', '') # Get total episodes from the show data if available total_episodes = show.get('number_of_episodes') or show.get('episode_count', 0) # Determine credit type (acting, host, or guest) # Pass person_name to check if they're the star/host of the show credit_type = self._determine_credit_type( character, episode_count, genre_ids, show_name, person_name=person_name, total_episodes=total_episodes ) # Determine if we should sync all episodes or just show-level # Regular cast/host (many episodes, not a guest) = sync all episodes # Guest/few episodes = show-level only sync_all_episodes = (credit_type in ('acting', 'host') and episode_count >= REGULAR_CAST_THRESHOLD) key = (tv_id, credit_type) if key not in processed_shows: processed_shows[key] = { 'show': show, 'credit_type': credit_type, 'character_name': character, 'job_title': None, 'episode_count': episode_count, 'sync_all_episodes': sync_all_episodes } # Process crew (other credit types) - always show-level only for show in crew_shows: tv_id = show.get("id") if not tv_id: continue credit_type = self._map_department_to_credit_type( show.get('department'), show.get('job') ) key = (tv_id, credit_type) if key not in processed_shows: processed_shows[key] = { 'show': show, 'credit_type': credit_type, 'character_name': None, 'job_title': show.get('job'), 'episode_count': show.get('episode_count', 0), 'sync_all_episodes': False # Crew credits are always show-level } # Count by type for logging total_shows = len(processed_shows) logger.info(f"Processing {total_shows} TV shows for episode-level data") # Process each show/credit combo shows_processed = 0 for (tv_id, credit_type), show_data in processed_shows.items(): show = show_data['show'] preview_name = show.get('name') or show.get('original_name', 'Unknown') # Update progress callback if progress_callback: progress_callback(preview_name, shows_processed, total_shows) # Rate limiting await asyncio.sleep(0.25) # Get show details show_details = await self.get_tv_show_details(tv_id) if not show_details: shows_processed += 1 continue show_name = show_details.get("name") networks = show_details.get("networks", []) network = networks[0].get("name", "") if networks else "" poster_url = show_details.get("poster_path") num_seasons = show_details.get("number_of_seasons", 0) first_air_date = show_details.get("first_air_date") # For ALL cast appearances (acting, host, guest), fetch specific episode appearances # This ensures we only get episodes where the person actually appeared if show_data['credit_type'] in ('guest', 'acting', 'host') and person_name: cast_episodes = await self.get_person_tv_episodes(person_id, tv_id, person_name, show_data['credit_type']) if cast_episodes: # Update credit_type and add status to each episode for ep in cast_episodes: # Use the show's credit_type for consistent labeling # (host stays host, acting stays acting, guest can be overridden) if show_data['credit_type'] in ('host', 'acting'): ep['credit_type'] = show_data['credit_type'] ep['character_name'] = show_data['character_name'] or ep.get('character_name') try: ep_date = datetime.strptime(ep['appearance_date'], "%Y-%m-%d").date() ep['status'] = "aired" if ep_date < today else "upcoming" except (ValueError, TypeError): ep['status'] = "aired" appearances.extend(cast_episodes) shows_processed += 1 continue # Fall back to show-level if no episodes found # For crew credits (directing, producing, writing), fetch specific episode appearances if show_data['credit_type'] in ('directing', 'producing', 'writing', 'creator') and person_name: crew_episodes = await self.get_person_tv_crew_episodes( person_id, tv_id, person_name, show_data['credit_type'], show_data['job_title'] ) if crew_episodes: # Add status to each episode for ep in crew_episodes: try: ep_date = datetime.strptime(ep['appearance_date'], "%Y-%m-%d").date() ep['status'] = "aired" if ep_date < today else "upcoming" except (ValueError, TypeError): ep['status'] = "aired" appearances.extend(crew_episodes) shows_processed += 1 continue # Fall back to show-level if no episodes found # For show-level entries (fallback), create single entry # Use first air date as appearance date for show-level entries # If no first_air_date, use 1900-01-01 as placeholder (means "in production") air_date_str = first_air_date or "1900-01-01" try: air_date = datetime.strptime(air_date_str, "%Y-%m-%d").date() # 1900-01-01 means no release date = upcoming/in production if air_date_str == "1900-01-01": status = "upcoming" else: status = "aired" if air_date < today else "upcoming" except ValueError: status = "upcoming" # Unknown date = upcoming appearances.append({ "tmdb_show_id": tv_id, "show_name": show_name, "network": network, "episode_title": None, "season_number": 0, # 0 indicates show-level entry "episode_number": 0, "appearance_date": air_date_str, "description": show_details.get("overview"), "poster_url": poster_url, "tmdb_episode_id": None, "status": status, "credit_type": show_data['credit_type'], "character_name": show_data['character_name'], "job_title": show_data['job_title'], }) shows_processed += 1 logger.info(f"FULL historical sync complete: {len(appearances)} total TV appearances with credits") return appearances except Exception as e: logger.error(f"Error in full TV historical sync with credits for person {person_id}: {e}") return [] async def find_all_movie_appearances_with_credits(self, person_id: int) -> List[Dict]: """ Find ALL movie appearances for a person with credit type information. Includes both cast (acting) and crew (directing, producing, writing) credits. Args: person_id: TMDb person ID Returns: List of ALL movie appearances with credit_type, character_name, and job_title """ appearances = [] today = datetime.now().date() try: # Get all combined credits credits = await self.get_person_combined_credits(person_id) # Process cast credits cast_movies = [movie for movie in credits.get("cast", []) if movie.get("media_type") == "movie"] # Process crew credits crew_movies = [movie for movie in credits.get("crew", []) if movie.get("media_type") == "movie"] # Combine and dedupe by movie_id + credit_type processed_movies = {} # Process cast (acting or guest credits) for movie in cast_movies: movie_id = movie.get("id") if not movie_id: continue character = movie.get('character') # For movies, guest = playing themselves (documentaries, etc.) is_guest = self._is_guest_appearance(character, episode_count=1, genre_ids=None) credit_type = 'guest' if is_guest else 'acting' key = (movie_id, credit_type) if key not in processed_movies: processed_movies[key] = { 'movie': movie, 'credit_type': credit_type, 'character_name': character, 'job_title': None, } # Process crew (other credit types) for movie in crew_movies: movie_id = movie.get("id") if not movie_id: continue credit_type = self._map_department_to_credit_type( movie.get('department'), movie.get('job') ) key = (movie_id, credit_type) if key not in processed_movies: processed_movies[key] = { 'movie': movie, 'credit_type': credit_type, 'character_name': None, 'job_title': movie.get('job'), } logger.info(f"Processing {len(processed_movies)} movie credits (cast + crew)") # Process each movie/credit combo for (movie_id, credit_type), movie_data in processed_movies.items(): movie = movie_data['movie'] # Rate limiting await asyncio.sleep(0.25) # Get movie details movie_details = await self.get_movie_details(movie_id) if not movie_details: continue # Check release date release_date_str = movie_details.get("release_date") if not release_date_str: continue try: release_date = datetime.strptime(release_date_str, "%Y-%m-%d").date() except ValueError: continue # Get production companies/studios studios = movie_details.get("production_companies", []) studio_name = studios[0].get("name") if studios else "" # Determine status status = "aired" if release_date < today else "upcoming" appearances.append({ "tmdb_movie_id": movie_id, "movie_name": movie_details.get("title"), "studio": studio_name, "release_date": release_date_str, "description": movie_details.get("overview"), "poster_url": movie_details.get("poster_path"), "runtime": movie_details.get("runtime"), "status": status, "credit_type": movie_data['credit_type'], "character_name": movie_data['character_name'], "job_title": movie_data['job_title'], }) logger.info(f"Found {len(appearances)} total movie credits (complete filmography with credits)") return appearances except Exception as e: logger.error(f"Error finding complete movie filmography with credits for person {person_id}: {e}") return [] async def get_person_details(self, person_id: int) -> Optional[Dict]: """Get person details including credits where they are show creator""" try: url = f"{self.BASE_URL}/person/{person_id}" params = {"api_key": self.api_key} response = await http_client.get(url, params=params) return response.json() except Exception as e: logger.error(f"TMDb get person error for person_id {person_id}: {e}") return None