445
modules/podchaser_client.py
Normal file
445
modules/podchaser_client.py
Normal file
@@ -0,0 +1,445 @@
|
||||
"""Podchaser GraphQL API client for podcast guest appearances tracking"""
|
||||
import asyncio
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional
|
||||
from web.backend.core.http_client import http_client
|
||||
from modules.universal_logger import get_logger
|
||||
|
||||
logger = get_logger('Podchaser')
|
||||
|
||||
class PodchaserClient:
|
||||
"""Client for interacting with the Podchaser GraphQL API"""
|
||||
|
||||
API_URL = "https://api.podchaser.com/graphql"
|
||||
|
||||
def __init__(self, api_key: str):
|
||||
# API key is actually the access token (already exchanged from client credentials)
|
||||
self.api_key = api_key
|
||||
self.headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
@classmethod
|
||||
async def from_client_credentials(cls, client_id: str, client_secret: str):
|
||||
"""
|
||||
Create a PodchaserClient by exchanging client credentials for an access token
|
||||
|
||||
Args:
|
||||
client_id: Podchaser client ID
|
||||
client_secret: Podchaser client secret
|
||||
|
||||
Returns:
|
||||
PodchaserClient instance with access token
|
||||
"""
|
||||
from web.backend.core.http_client import http_client
|
||||
|
||||
mutation = """
|
||||
mutation GetToken($client_id: String!, $client_secret: String!) {
|
||||
requestAccessToken(
|
||||
input: {
|
||||
grant_type: CLIENT_CREDENTIALS
|
||||
client_id: $client_id
|
||||
client_secret: $client_secret
|
||||
}
|
||||
) {
|
||||
access_token
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
variables = {
|
||||
"client_id": client_id,
|
||||
"client_secret": client_secret
|
||||
}
|
||||
|
||||
try:
|
||||
response = await http_client.post(
|
||||
cls.API_URL,
|
||||
json={"query": mutation, "variables": variables},
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
data = response.json()
|
||||
|
||||
if "errors" in data:
|
||||
logger.error(f"Failed to get Podchaser access token: {data['errors']}")
|
||||
raise Exception(f"Podchaser authentication failed: {data['errors']}")
|
||||
|
||||
access_token = data.get("data", {}).get("requestAccessToken", {}).get("access_token")
|
||||
|
||||
if not access_token:
|
||||
raise Exception("No access token returned from Podchaser")
|
||||
|
||||
logger.info("Successfully obtained Podchaser access token")
|
||||
return cls(access_token)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting Podchaser access token: {e}")
|
||||
raise
|
||||
|
||||
async def _execute_query(self, query: str, variables: Optional[Dict] = None) -> Dict:
|
||||
"""Execute a GraphQL query"""
|
||||
try:
|
||||
payload = {"query": query}
|
||||
if variables:
|
||||
payload["variables"] = variables
|
||||
|
||||
response = await http_client.post(
|
||||
self.API_URL,
|
||||
json=payload,
|
||||
headers=self.headers
|
||||
)
|
||||
|
||||
data = response.json()
|
||||
|
||||
if "errors" in data:
|
||||
logger.error(f"GraphQL errors: {data['errors']}")
|
||||
return {}
|
||||
|
||||
return data.get("data", {})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Podchaser API error: {e}")
|
||||
return {}
|
||||
|
||||
async def search_creator_by_creators_endpoint(self, name: str) -> Optional[Dict]:
|
||||
"""
|
||||
Search for a creator using the creators endpoint
|
||||
This is more direct than searching via credits or podcasts
|
||||
"""
|
||||
query = """
|
||||
query FindCreator($term: String!) {
|
||||
creators(searchTerm: $term, first: 10) {
|
||||
data {
|
||||
pcid
|
||||
name
|
||||
informalName
|
||||
subtitle
|
||||
imageUrl
|
||||
url
|
||||
episodeAppearanceCount
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
variables = {"term": name}
|
||||
data = await self._execute_query(query, variables)
|
||||
|
||||
if data and "creators" in data and data["creators"]["data"]:
|
||||
creators = data["creators"]["data"]
|
||||
# Prefer exact case-insensitive match
|
||||
name_lower = name.strip().lower()
|
||||
for creator in creators:
|
||||
if creator.get("name") and creator["name"].strip().lower() == name_lower:
|
||||
logger.info(f"Found exact creator match: {creator['name']} (pcid: {creator['pcid']})")
|
||||
return creator
|
||||
|
||||
# Return first result if no exact match
|
||||
if creators:
|
||||
logger.info(f"Found creator: {creators[0]['name']} (pcid: {creators[0]['pcid']})")
|
||||
return creators[0]
|
||||
|
||||
return None
|
||||
|
||||
async def search_creator(self, name: str) -> Optional[Dict]:
|
||||
"""
|
||||
Search for a creator by name using the creators endpoint
|
||||
Returns the first matching creator or None
|
||||
"""
|
||||
return await self.search_creator_by_creators_endpoint(name)
|
||||
|
||||
async def get_creator_guest_appearances(self, creator_id: str, days_back: int = 30, days_ahead: int = 365) -> List[Dict]:
|
||||
"""
|
||||
Get all guest AND host appearances (episodeCredits) for a creator
|
||||
Filters for recent and upcoming episodes
|
||||
|
||||
Args:
|
||||
creator_id: Podchaser creator ID
|
||||
days_back: How many days in the past to search
|
||||
days_ahead: How many days in the future to search
|
||||
|
||||
Returns:
|
||||
List of episode appearances with metadata (both guest and host roles)
|
||||
"""
|
||||
today = datetime.now().date()
|
||||
cutoff_past = today - timedelta(days=days_back)
|
||||
cutoff_future = today + timedelta(days=days_ahead)
|
||||
|
||||
query = """
|
||||
query GetCreatorAppearances($creatorId: String!, $page: Int) {
|
||||
creator(identifier: {type: PCID, id: $creatorId}) {
|
||||
pcid
|
||||
name
|
||||
episodeCredits(
|
||||
filters: { role: ["guest", "host"] }
|
||||
first: 20
|
||||
page: $page
|
||||
sort: {sortBy: DATE, direction: DESCENDING}
|
||||
) {
|
||||
data {
|
||||
role {
|
||||
code
|
||||
title
|
||||
}
|
||||
episode {
|
||||
id
|
||||
title
|
||||
description
|
||||
url
|
||||
imageUrl
|
||||
audioUrl
|
||||
airDate
|
||||
podcast {
|
||||
id
|
||||
title
|
||||
imageUrl
|
||||
url
|
||||
categories {
|
||||
title
|
||||
slug
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
paginatorInfo {
|
||||
currentPage
|
||||
hasMorePages
|
||||
lastPage
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
page = 1
|
||||
max_pages = 10 # Limit to prevent excessive API calls
|
||||
appearances = []
|
||||
|
||||
while page <= max_pages:
|
||||
variables = {
|
||||
"creatorId": str(creator_id),
|
||||
"page": page
|
||||
}
|
||||
|
||||
data = await self._execute_query(query, variables)
|
||||
|
||||
if not data or "creator" not in data or not data["creator"]:
|
||||
break
|
||||
|
||||
creator_data = data["creator"]
|
||||
episode_credits = creator_data.get("episodeCredits", {}).get("data", [])
|
||||
|
||||
logger.info(f"Fetched {len(episode_credits)} episodes from Podchaser (page {page})")
|
||||
|
||||
for credit in episode_credits:
|
||||
episode = credit.get("episode")
|
||||
if not episode:
|
||||
continue
|
||||
|
||||
# Check air date
|
||||
air_date_str = episode.get("airDate")
|
||||
if not air_date_str:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Handle both "YYYY-MM-DD" and "YYYY-MM-DD HH:MM:SS" formats
|
||||
# Take only the date part (first 10 characters for YYYY-MM-DD)
|
||||
date_part = air_date_str[:10] if len(air_date_str) >= 10 else air_date_str
|
||||
air_date = datetime.strptime(date_part, "%Y-%m-%d").date()
|
||||
|
||||
# Only include episodes within our time window
|
||||
if cutoff_past <= air_date <= cutoff_future:
|
||||
podcast = episode.get("podcast", {})
|
||||
|
||||
role_obj = credit.get("role", {})
|
||||
role_name = role_obj.get("title") if isinstance(role_obj, dict) else None
|
||||
|
||||
appearances.append({
|
||||
"podchaser_episode_id": episode.get("id"),
|
||||
"episode_title": episode.get("title"),
|
||||
"podcast_name": podcast.get("title"),
|
||||
"description": episode.get("description"),
|
||||
"air_date": air_date_str,
|
||||
"episode_url": episode.get("url"),
|
||||
"audio_url": episode.get("audioUrl"),
|
||||
"poster_url": episode.get("imageUrl") or podcast.get("imageUrl"),
|
||||
"role": role_name,
|
||||
"podchaser_podcast_id": podcast.get("id"),
|
||||
})
|
||||
except ValueError as e:
|
||||
logger.debug(f"Date parse error for episode: {e}")
|
||||
continue
|
||||
|
||||
# Check if there are more pages
|
||||
paginator = creator_data.get("episodeCredits", {}).get("paginatorInfo", {})
|
||||
if not paginator.get("hasMorePages"):
|
||||
break
|
||||
|
||||
page += 1
|
||||
await asyncio.sleep(0.15) # Rate limiting
|
||||
|
||||
logger.info(f"Returning {len(appearances)} guest/host appearances for creator {creator_id}")
|
||||
return appearances
|
||||
|
||||
async def get_creator_podcast_episodes(self, creator_name: str, days_back: int = 30, days_ahead: int = 365) -> List[Dict]:
|
||||
"""
|
||||
Get podcast episodes where the creator is a host
|
||||
Searches for podcasts by the creator's name and returns recent episodes
|
||||
|
||||
Args:
|
||||
creator_name: Creator's name to search for
|
||||
days_back: How many days in the past to search
|
||||
days_ahead: How many days in the future to search
|
||||
|
||||
Returns:
|
||||
List of podcast episodes with metadata
|
||||
"""
|
||||
today = datetime.now().date()
|
||||
cutoff_past = today - timedelta(days=days_back)
|
||||
cutoff_future = today + timedelta(days=days_ahead)
|
||||
|
||||
# Search for podcasts by creator name
|
||||
query = """
|
||||
query SearchPodcastByHost($searchTerm: String!) {
|
||||
podcasts(searchTerm: $searchTerm, first: 5) {
|
||||
data {
|
||||
id
|
||||
title
|
||||
imageUrl
|
||||
url
|
||||
credits(first: 20) {
|
||||
data {
|
||||
role {
|
||||
code
|
||||
title
|
||||
}
|
||||
creator {
|
||||
pcid
|
||||
name
|
||||
}
|
||||
}
|
||||
}
|
||||
episodes(first: 50, sort: {sortBy: AIR_DATE, direction: DESCENDING}) {
|
||||
data {
|
||||
id
|
||||
title
|
||||
description
|
||||
url
|
||||
imageUrl
|
||||
audioUrl
|
||||
airDate
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
variables = {"searchTerm": creator_name}
|
||||
data = await self._execute_query(query, variables)
|
||||
|
||||
appearances = []
|
||||
|
||||
if data and "podcasts" in data and data["podcasts"]["data"]:
|
||||
for podcast in data["podcasts"]["data"]:
|
||||
# Check if the creator is a host of this podcast
|
||||
credits = podcast.get("credits", {}).get("data", [])
|
||||
is_host = False
|
||||
host_role = None
|
||||
|
||||
for credit in credits:
|
||||
creator = credit.get("creator", {})
|
||||
role = credit.get("role", {})
|
||||
|
||||
# Check if this is our creator and they're a host
|
||||
if (role.get("code") == "host" and
|
||||
creator.get("name") and
|
||||
(creator_name.lower() in creator["name"].lower() or
|
||||
creator["name"].lower() in creator_name.lower())):
|
||||
is_host = True
|
||||
host_role = role.get("title")
|
||||
break
|
||||
|
||||
if not is_host:
|
||||
continue
|
||||
|
||||
# Get episodes from this podcast
|
||||
episodes = podcast.get("episodes", {}).get("data", [])
|
||||
|
||||
for episode in episodes:
|
||||
air_date_str = episode.get("airDate")
|
||||
if not air_date_str:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Handle both "YYYY-MM-DD" and "YYYY-MM-DD HH:MM:SS" formats
|
||||
# Take only the date part (first 10 characters for YYYY-MM-DD)
|
||||
date_part = air_date_str[:10] if len(air_date_str) >= 10 else air_date_str
|
||||
air_date = datetime.strptime(date_part, "%Y-%m-%d").date()
|
||||
|
||||
# Only include episodes within our time window
|
||||
if cutoff_past <= air_date <= cutoff_future:
|
||||
appearances.append({
|
||||
"podchaser_episode_id": episode.get("id"),
|
||||
"episode_title": episode.get("title"),
|
||||
"podcast_name": podcast.get("title"),
|
||||
"description": episode.get("description"),
|
||||
"air_date": air_date_str,
|
||||
"episode_url": episode.get("url"),
|
||||
"audio_url": episode.get("audioUrl"),
|
||||
"poster_url": episode.get("imageUrl") or podcast.get("imageUrl"),
|
||||
"role": host_role,
|
||||
"podchaser_podcast_id": podcast.get("id"),
|
||||
})
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
return appearances
|
||||
|
||||
async def find_upcoming_podcast_appearances(self, creator_id: str, creator_name: str = None) -> List[Dict]:
|
||||
"""
|
||||
Find upcoming podcast appearances for a creator
|
||||
Includes both guest appearances (episodeCredits) and hosted podcast episodes
|
||||
Returns episodes that haven't aired yet or aired within last 90 days
|
||||
|
||||
Args:
|
||||
creator_id: Podchaser creator ID (pcid)
|
||||
creator_name: Creator's name (required for podcast search)
|
||||
"""
|
||||
# Get both guest appearances and hosted episodes
|
||||
guest_appearances = await self.get_creator_guest_appearances(
|
||||
creator_id,
|
||||
days_back=365, # Look back 1 year for recent episodes
|
||||
days_ahead=365
|
||||
)
|
||||
|
||||
# For hosted episodes, we need the creator name
|
||||
hosted_episodes = []
|
||||
if creator_name:
|
||||
hosted_episodes = await self.get_creator_podcast_episodes(
|
||||
creator_name,
|
||||
days_back=365, # Look back 1 year for recent episodes
|
||||
days_ahead=365
|
||||
)
|
||||
else:
|
||||
logger.warning(f"No creator name provided for {creator_id}, skipping podcast host search")
|
||||
|
||||
# Combine and deduplicate by episode ID
|
||||
all_appearances = {}
|
||||
for appearance in guest_appearances + hosted_episodes:
|
||||
episode_id = appearance.get("podchaser_episode_id")
|
||||
if episode_id:
|
||||
# If duplicate, prefer the one with more info (hosted episodes usually have more)
|
||||
if episode_id not in all_appearances or len(str(appearance.get("description", ""))) > len(str(all_appearances[episode_id].get("description", ""))):
|
||||
all_appearances[episode_id] = appearance
|
||||
|
||||
# Sort by air date
|
||||
sorted_appearances = sorted(
|
||||
all_appearances.values(),
|
||||
key=lambda x: x.get("air_date", ""),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
return sorted_appearances
|
||||
Reference in New Issue
Block a user