446 lines
16 KiB
Python
446 lines
16 KiB
Python
"""Podchaser GraphQL API client for podcast guest appearances tracking"""
|
|
import asyncio
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Optional
|
|
from web.backend.core.http_client import http_client
|
|
from modules.universal_logger import get_logger
|
|
|
|
logger = get_logger('Podchaser')
|
|
|
|
class PodchaserClient:
|
|
"""Client for interacting with the Podchaser GraphQL API"""
|
|
|
|
API_URL = "https://api.podchaser.com/graphql"
|
|
|
|
def __init__(self, api_key: str):
|
|
# API key is actually the access token (already exchanged from client credentials)
|
|
self.api_key = api_key
|
|
self.headers = {
|
|
"Authorization": f"Bearer {api_key}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
@classmethod
|
|
async def from_client_credentials(cls, client_id: str, client_secret: str):
|
|
"""
|
|
Create a PodchaserClient by exchanging client credentials for an access token
|
|
|
|
Args:
|
|
client_id: Podchaser client ID
|
|
client_secret: Podchaser client secret
|
|
|
|
Returns:
|
|
PodchaserClient instance with access token
|
|
"""
|
|
from web.backend.core.http_client import http_client
|
|
|
|
mutation = """
|
|
mutation GetToken($client_id: String!, $client_secret: String!) {
|
|
requestAccessToken(
|
|
input: {
|
|
grant_type: CLIENT_CREDENTIALS
|
|
client_id: $client_id
|
|
client_secret: $client_secret
|
|
}
|
|
) {
|
|
access_token
|
|
}
|
|
}
|
|
"""
|
|
|
|
variables = {
|
|
"client_id": client_id,
|
|
"client_secret": client_secret
|
|
}
|
|
|
|
try:
|
|
response = await http_client.post(
|
|
cls.API_URL,
|
|
json={"query": mutation, "variables": variables},
|
|
headers={"Content-Type": "application/json"}
|
|
)
|
|
|
|
data = response.json()
|
|
|
|
if "errors" in data:
|
|
logger.error(f"Failed to get Podchaser access token: {data['errors']}")
|
|
raise Exception(f"Podchaser authentication failed: {data['errors']}")
|
|
|
|
access_token = data.get("data", {}).get("requestAccessToken", {}).get("access_token")
|
|
|
|
if not access_token:
|
|
raise Exception("No access token returned from Podchaser")
|
|
|
|
logger.info("Successfully obtained Podchaser access token")
|
|
return cls(access_token)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting Podchaser access token: {e}")
|
|
raise
|
|
|
|
async def _execute_query(self, query: str, variables: Optional[Dict] = None) -> Dict:
|
|
"""Execute a GraphQL query"""
|
|
try:
|
|
payload = {"query": query}
|
|
if variables:
|
|
payload["variables"] = variables
|
|
|
|
response = await http_client.post(
|
|
self.API_URL,
|
|
json=payload,
|
|
headers=self.headers
|
|
)
|
|
|
|
data = response.json()
|
|
|
|
if "errors" in data:
|
|
logger.error(f"GraphQL errors: {data['errors']}")
|
|
return {}
|
|
|
|
return data.get("data", {})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Podchaser API error: {e}")
|
|
return {}
|
|
|
|
async def search_creator_by_creators_endpoint(self, name: str) -> Optional[Dict]:
|
|
"""
|
|
Search for a creator using the creators endpoint
|
|
This is more direct than searching via credits or podcasts
|
|
"""
|
|
query = """
|
|
query FindCreator($term: String!) {
|
|
creators(searchTerm: $term, first: 10) {
|
|
data {
|
|
pcid
|
|
name
|
|
informalName
|
|
subtitle
|
|
imageUrl
|
|
url
|
|
episodeAppearanceCount
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
|
|
variables = {"term": name}
|
|
data = await self._execute_query(query, variables)
|
|
|
|
if data and "creators" in data and data["creators"]["data"]:
|
|
creators = data["creators"]["data"]
|
|
# Prefer exact case-insensitive match
|
|
name_lower = name.strip().lower()
|
|
for creator in creators:
|
|
if creator.get("name") and creator["name"].strip().lower() == name_lower:
|
|
logger.info(f"Found exact creator match: {creator['name']} (pcid: {creator['pcid']})")
|
|
return creator
|
|
|
|
# Return first result if no exact match
|
|
if creators:
|
|
logger.info(f"Found creator: {creators[0]['name']} (pcid: {creators[0]['pcid']})")
|
|
return creators[0]
|
|
|
|
return None
|
|
|
|
async def search_creator(self, name: str) -> Optional[Dict]:
|
|
"""
|
|
Search for a creator by name using the creators endpoint
|
|
Returns the first matching creator or None
|
|
"""
|
|
return await self.search_creator_by_creators_endpoint(name)
|
|
|
|
async def get_creator_guest_appearances(self, creator_id: str, days_back: int = 30, days_ahead: int = 365) -> List[Dict]:
|
|
"""
|
|
Get all guest AND host appearances (episodeCredits) for a creator
|
|
Filters for recent and upcoming episodes
|
|
|
|
Args:
|
|
creator_id: Podchaser creator ID
|
|
days_back: How many days in the past to search
|
|
days_ahead: How many days in the future to search
|
|
|
|
Returns:
|
|
List of episode appearances with metadata (both guest and host roles)
|
|
"""
|
|
today = datetime.now().date()
|
|
cutoff_past = today - timedelta(days=days_back)
|
|
cutoff_future = today + timedelta(days=days_ahead)
|
|
|
|
query = """
|
|
query GetCreatorAppearances($creatorId: String!, $page: Int) {
|
|
creator(identifier: {type: PCID, id: $creatorId}) {
|
|
pcid
|
|
name
|
|
episodeCredits(
|
|
filters: { role: ["guest", "host"] }
|
|
first: 20
|
|
page: $page
|
|
sort: {sortBy: DATE, direction: DESCENDING}
|
|
) {
|
|
data {
|
|
role {
|
|
code
|
|
title
|
|
}
|
|
episode {
|
|
id
|
|
title
|
|
description
|
|
url
|
|
imageUrl
|
|
audioUrl
|
|
airDate
|
|
podcast {
|
|
id
|
|
title
|
|
imageUrl
|
|
url
|
|
categories {
|
|
title
|
|
slug
|
|
}
|
|
}
|
|
}
|
|
}
|
|
paginatorInfo {
|
|
currentPage
|
|
hasMorePages
|
|
lastPage
|
|
}
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
|
|
page = 1
|
|
max_pages = 10 # Limit to prevent excessive API calls
|
|
appearances = []
|
|
|
|
while page <= max_pages:
|
|
variables = {
|
|
"creatorId": str(creator_id),
|
|
"page": page
|
|
}
|
|
|
|
data = await self._execute_query(query, variables)
|
|
|
|
if not data or "creator" not in data or not data["creator"]:
|
|
break
|
|
|
|
creator_data = data["creator"]
|
|
episode_credits = creator_data.get("episodeCredits", {}).get("data", [])
|
|
|
|
logger.info(f"Fetched {len(episode_credits)} episodes from Podchaser (page {page})")
|
|
|
|
for credit in episode_credits:
|
|
episode = credit.get("episode")
|
|
if not episode:
|
|
continue
|
|
|
|
# Check air date
|
|
air_date_str = episode.get("airDate")
|
|
if not air_date_str:
|
|
continue
|
|
|
|
try:
|
|
# Handle both "YYYY-MM-DD" and "YYYY-MM-DD HH:MM:SS" formats
|
|
# Take only the date part (first 10 characters for YYYY-MM-DD)
|
|
date_part = air_date_str[:10] if len(air_date_str) >= 10 else air_date_str
|
|
air_date = datetime.strptime(date_part, "%Y-%m-%d").date()
|
|
|
|
# Only include episodes within our time window
|
|
if cutoff_past <= air_date <= cutoff_future:
|
|
podcast = episode.get("podcast", {})
|
|
|
|
role_obj = credit.get("role", {})
|
|
role_name = role_obj.get("title") if isinstance(role_obj, dict) else None
|
|
|
|
appearances.append({
|
|
"podchaser_episode_id": episode.get("id"),
|
|
"episode_title": episode.get("title"),
|
|
"podcast_name": podcast.get("title"),
|
|
"description": episode.get("description"),
|
|
"air_date": air_date_str,
|
|
"episode_url": episode.get("url"),
|
|
"audio_url": episode.get("audioUrl"),
|
|
"poster_url": episode.get("imageUrl") or podcast.get("imageUrl"),
|
|
"role": role_name,
|
|
"podchaser_podcast_id": podcast.get("id"),
|
|
})
|
|
except ValueError as e:
|
|
logger.debug(f"Date parse error for episode: {e}")
|
|
continue
|
|
|
|
# Check if there are more pages
|
|
paginator = creator_data.get("episodeCredits", {}).get("paginatorInfo", {})
|
|
if not paginator.get("hasMorePages"):
|
|
break
|
|
|
|
page += 1
|
|
await asyncio.sleep(0.15) # Rate limiting
|
|
|
|
logger.info(f"Returning {len(appearances)} guest/host appearances for creator {creator_id}")
|
|
return appearances
|
|
|
|
async def get_creator_podcast_episodes(self, creator_name: str, days_back: int = 30, days_ahead: int = 365) -> List[Dict]:
|
|
"""
|
|
Get podcast episodes where the creator is a host
|
|
Searches for podcasts by the creator's name and returns recent episodes
|
|
|
|
Args:
|
|
creator_name: Creator's name to search for
|
|
days_back: How many days in the past to search
|
|
days_ahead: How many days in the future to search
|
|
|
|
Returns:
|
|
List of podcast episodes with metadata
|
|
"""
|
|
today = datetime.now().date()
|
|
cutoff_past = today - timedelta(days=days_back)
|
|
cutoff_future = today + timedelta(days=days_ahead)
|
|
|
|
# Search for podcasts by creator name
|
|
query = """
|
|
query SearchPodcastByHost($searchTerm: String!) {
|
|
podcasts(searchTerm: $searchTerm, first: 5) {
|
|
data {
|
|
id
|
|
title
|
|
imageUrl
|
|
url
|
|
credits(first: 20) {
|
|
data {
|
|
role {
|
|
code
|
|
title
|
|
}
|
|
creator {
|
|
pcid
|
|
name
|
|
}
|
|
}
|
|
}
|
|
episodes(first: 50, sort: {sortBy: AIR_DATE, direction: DESCENDING}) {
|
|
data {
|
|
id
|
|
title
|
|
description
|
|
url
|
|
imageUrl
|
|
audioUrl
|
|
airDate
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
|
|
variables = {"searchTerm": creator_name}
|
|
data = await self._execute_query(query, variables)
|
|
|
|
appearances = []
|
|
|
|
if data and "podcasts" in data and data["podcasts"]["data"]:
|
|
for podcast in data["podcasts"]["data"]:
|
|
# Check if the creator is a host of this podcast
|
|
credits = podcast.get("credits", {}).get("data", [])
|
|
is_host = False
|
|
host_role = None
|
|
|
|
for credit in credits:
|
|
creator = credit.get("creator", {})
|
|
role = credit.get("role", {})
|
|
|
|
# Check if this is our creator and they're a host
|
|
if (role.get("code") == "host" and
|
|
creator.get("name") and
|
|
(creator_name.lower() in creator["name"].lower() or
|
|
creator["name"].lower() in creator_name.lower())):
|
|
is_host = True
|
|
host_role = role.get("title")
|
|
break
|
|
|
|
if not is_host:
|
|
continue
|
|
|
|
# Get episodes from this podcast
|
|
episodes = podcast.get("episodes", {}).get("data", [])
|
|
|
|
for episode in episodes:
|
|
air_date_str = episode.get("airDate")
|
|
if not air_date_str:
|
|
continue
|
|
|
|
try:
|
|
# Handle both "YYYY-MM-DD" and "YYYY-MM-DD HH:MM:SS" formats
|
|
# Take only the date part (first 10 characters for YYYY-MM-DD)
|
|
date_part = air_date_str[:10] if len(air_date_str) >= 10 else air_date_str
|
|
air_date = datetime.strptime(date_part, "%Y-%m-%d").date()
|
|
|
|
# Only include episodes within our time window
|
|
if cutoff_past <= air_date <= cutoff_future:
|
|
appearances.append({
|
|
"podchaser_episode_id": episode.get("id"),
|
|
"episode_title": episode.get("title"),
|
|
"podcast_name": podcast.get("title"),
|
|
"description": episode.get("description"),
|
|
"air_date": air_date_str,
|
|
"episode_url": episode.get("url"),
|
|
"audio_url": episode.get("audioUrl"),
|
|
"poster_url": episode.get("imageUrl") or podcast.get("imageUrl"),
|
|
"role": host_role,
|
|
"podchaser_podcast_id": podcast.get("id"),
|
|
})
|
|
except ValueError:
|
|
continue
|
|
|
|
return appearances
|
|
|
|
async def find_upcoming_podcast_appearances(self, creator_id: str, creator_name: str = None) -> List[Dict]:
|
|
"""
|
|
Find upcoming podcast appearances for a creator
|
|
Includes both guest appearances (episodeCredits) and hosted podcast episodes
|
|
Returns episodes that haven't aired yet or aired within last 90 days
|
|
|
|
Args:
|
|
creator_id: Podchaser creator ID (pcid)
|
|
creator_name: Creator's name (required for podcast search)
|
|
"""
|
|
# Get both guest appearances and hosted episodes
|
|
guest_appearances = await self.get_creator_guest_appearances(
|
|
creator_id,
|
|
days_back=365, # Look back 1 year for recent episodes
|
|
days_ahead=365
|
|
)
|
|
|
|
# For hosted episodes, we need the creator name
|
|
hosted_episodes = []
|
|
if creator_name:
|
|
hosted_episodes = await self.get_creator_podcast_episodes(
|
|
creator_name,
|
|
days_back=365, # Look back 1 year for recent episodes
|
|
days_ahead=365
|
|
)
|
|
else:
|
|
logger.warning(f"No creator name provided for {creator_id}, skipping podcast host search")
|
|
|
|
# Combine and deduplicate by episode ID
|
|
all_appearances = {}
|
|
for appearance in guest_appearances + hosted_episodes:
|
|
episode_id = appearance.get("podchaser_episode_id")
|
|
if episode_id:
|
|
# If duplicate, prefer the one with more info (hosted episodes usually have more)
|
|
if episode_id not in all_appearances or len(str(appearance.get("description", ""))) > len(str(all_appearances[episode_id].get("description", ""))):
|
|
all_appearances[episode_id] = appearance
|
|
|
|
# Sort by air date
|
|
sorted_appearances = sorted(
|
|
all_appearances.values(),
|
|
key=lambda x: x.get("air_date", ""),
|
|
reverse=True
|
|
)
|
|
|
|
return sorted_appearances
|