603
modules/tiktok_module.py
Executable file
603
modules/tiktok_module.py
Executable file
@@ -0,0 +1,603 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
TikTok Download Module - Downloads TikTok videos with proper timestamp extraction
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import subprocess
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from modules.base_module import LoggingMixin
|
||||
|
||||
|
||||
class TikTokDownloader(LoggingMixin):
|
||||
"""Downloads TikTok videos and extracts metadata including timestamps"""
|
||||
|
||||
def __init__(self, base_path: Path = None, log_callback=None, use_database=True, unified_db=None):
|
||||
"""
|
||||
Initialize TikTok downloader
|
||||
|
||||
Args:
|
||||
base_path: Base path for downloads
|
||||
log_callback: Optional callback for logging (tag, level, message)
|
||||
use_database: Whether to use database for tracking downloads
|
||||
unified_db: UnifiedDatabase instance (required)
|
||||
"""
|
||||
# Initialize logging via mixin
|
||||
self._init_logger('TikTok', log_callback, default_module='Download')
|
||||
|
||||
self.base_path = Path(base_path) if base_path else Path.cwd()
|
||||
self.file_timestamps = {} # Map of filename -> datetime
|
||||
self.use_database = use_database
|
||||
|
||||
# Always use unified database adapter
|
||||
if not unified_db:
|
||||
raise ValueError("TikTok module requires unified_db - standalone database is no longer supported")
|
||||
|
||||
from modules.tiktok_db_adapter import TikTokDatabaseAdapter
|
||||
self.db = TikTokDatabaseAdapter(unified_db)
|
||||
self.use_unified_db = True
|
||||
|
||||
# Initialize activity status manager for real-time updates
|
||||
from modules.activity_status import get_activity_manager
|
||||
self.activity_manager = get_activity_manager(unified_db)
|
||||
|
||||
self.pending_downloads = [] # Track downloads for deferred database recording
|
||||
|
||||
def _is_already_downloaded(self, video_id: str, username: str = None) -> bool:
|
||||
"""Check if a video has already been downloaded"""
|
||||
if not self.use_database:
|
||||
return False
|
||||
|
||||
# Pass username for proper database lookup
|
||||
if username:
|
||||
return self.db.is_downloaded(video_id, username)
|
||||
return self.db.is_already_downloaded(video_id)
|
||||
|
||||
def _record_download(self, video_id: str, username: str, filename: str,
|
||||
post_date: Optional[datetime] = None, metadata: Dict = None,
|
||||
deferred: bool = False):
|
||||
"""Record a successful download in the database
|
||||
|
||||
Args:
|
||||
deferred: If True, don't record to database now - add to pending_downloads list
|
||||
for later recording after file move is complete
|
||||
"""
|
||||
# Extract just the filename from the full path for database
|
||||
from pathlib import Path
|
||||
file_path = str(filename) # Full path
|
||||
filename_only = Path(filename).name # Just the filename
|
||||
|
||||
# If deferred, store for later recording instead of recording now
|
||||
if deferred:
|
||||
self.pending_downloads.append({
|
||||
'video_id': video_id,
|
||||
'username': username,
|
||||
'filename': filename_only,
|
||||
'post_date': post_date.isoformat() if post_date else None,
|
||||
'file_path': file_path,
|
||||
'metadata': metadata
|
||||
})
|
||||
self.log(f"Deferred recording for {video_id}", "debug")
|
||||
return True
|
||||
|
||||
if not self.use_database:
|
||||
return
|
||||
|
||||
return self.db.record_download(
|
||||
video_id=video_id,
|
||||
username=username,
|
||||
filename=filename_only,
|
||||
post_date=post_date,
|
||||
metadata=metadata,
|
||||
file_path=file_path
|
||||
)
|
||||
|
||||
def get_pending_downloads(self):
|
||||
"""Get list of downloads that were deferred for later recording"""
|
||||
return self.pending_downloads.copy()
|
||||
|
||||
def clear_pending_downloads(self):
|
||||
"""Clear the pending downloads list after they've been recorded"""
|
||||
self.pending_downloads = []
|
||||
|
||||
def extract_date_from_info(self, info_dict: Dict) -> Optional[datetime]:
|
||||
"""
|
||||
Extract upload date from yt-dlp info dictionary
|
||||
|
||||
Args:
|
||||
info_dict: yt-dlp info dictionary
|
||||
|
||||
Returns:
|
||||
datetime object or None
|
||||
"""
|
||||
# Try timestamp first (Unix timestamp - has full date and time)
|
||||
# TikTok provides UTC timestamps, need to convert to local time
|
||||
timestamp = info_dict.get('timestamp')
|
||||
if timestamp:
|
||||
try:
|
||||
# Use UTC timestamp and convert to local
|
||||
from datetime import timezone
|
||||
dt_utc = datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
||||
dt = dt_utc.replace(tzinfo=None) # Remove timezone info for local datetime
|
||||
self.log(f"Extracted full timestamp (UTC): {dt}", "debug")
|
||||
return dt
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try release_timestamp (also has full date and time)
|
||||
release_timestamp = info_dict.get('release_timestamp')
|
||||
if release_timestamp:
|
||||
try:
|
||||
from datetime import timezone
|
||||
dt_utc = datetime.fromtimestamp(release_timestamp, tz=timezone.utc)
|
||||
dt = dt_utc.replace(tzinfo=None) # Remove timezone info for local datetime
|
||||
self.log(f"Extracted release timestamp (UTC): {dt}", "debug")
|
||||
return dt
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try modified_timestamp
|
||||
modified_timestamp = info_dict.get('modified_timestamp')
|
||||
if modified_timestamp:
|
||||
try:
|
||||
from datetime import timezone
|
||||
dt_utc = datetime.fromtimestamp(modified_timestamp, tz=timezone.utc)
|
||||
dt = dt_utc.replace(tzinfo=None) # Remove timezone info for local datetime
|
||||
self.log(f"Extracted modified timestamp (UTC): {dt}", "debug")
|
||||
return dt
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fall back to upload_date (YYYYMMDD format - only has date, no time)
|
||||
# This should be last resort as it loses time information
|
||||
upload_date = info_dict.get('upload_date')
|
||||
if upload_date and len(upload_date) == 8:
|
||||
try:
|
||||
# Try to get time from filename if it has timestamp format
|
||||
# TikTok sometimes includes timestamp in the video ID
|
||||
dt = datetime.strptime(upload_date, '%Y%m%d')
|
||||
self.log(f"Only date available (no time): {dt.date()}", "warning")
|
||||
return dt
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
def download_profile(self,
|
||||
username: str,
|
||||
number_of_days: int = 7,
|
||||
full_profile: bool = False,
|
||||
output_dir: Path = None,
|
||||
defer_database: bool = False) -> Tuple[Dict[str, datetime], List[Path]]:
|
||||
"""
|
||||
Download TikTok profile videos
|
||||
|
||||
Args:
|
||||
username: TikTok username (without @)
|
||||
number_of_days: Number of days to download (ignored if full_profile=True)
|
||||
full_profile: If True, download entire profile
|
||||
output_dir: Output directory (uses base_path/username if not specified)
|
||||
defer_database: If True, don't record to database immediately - store in
|
||||
pending_downloads for later recording after file move is complete
|
||||
|
||||
Returns:
|
||||
Tuple of (file_timestamps dict, list of downloaded files)
|
||||
"""
|
||||
self.defer_database = defer_database # Store for use in _record_download
|
||||
username = username.lstrip('@')
|
||||
output_dir = output_dir or self.base_path / username
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.log(f"Downloading TikTok profile: @{username}", "info")
|
||||
self.activity_manager.update_status("Checking videos")
|
||||
|
||||
# HYBRID APPROACH: Use yt-dlp to get ID list (fast), then gallery-dl per video (handles carousels)
|
||||
|
||||
# Step 1: Use yt-dlp to quickly get list of video IDs with dates
|
||||
profile_url = f"https://www.tiktok.com/@{username}"
|
||||
list_cmd = [
|
||||
"yt-dlp",
|
||||
"--flat-playlist", # Don't download, just list
|
||||
"--print", "%(upload_date)s %(id)s", # Print date and ID
|
||||
"--quiet",
|
||||
"--no-warnings",
|
||||
profile_url
|
||||
]
|
||||
|
||||
self.log(f"Getting video list with yt-dlp...", "debug")
|
||||
|
||||
# Get list of video IDs with dates
|
||||
try:
|
||||
result = subprocess.run(list_cmd, capture_output=True, text=True, timeout=60)
|
||||
lines = [line.strip() for line in result.stdout.strip().split('\n') if line.strip()]
|
||||
|
||||
# Parse and filter by date if needed
|
||||
video_ids = []
|
||||
if not full_profile and number_of_days:
|
||||
from datetime import timedelta
|
||||
cutoff_date = datetime.now() - timedelta(days=number_of_days)
|
||||
cutoff_str = cutoff_date.strftime('%Y%m%d')
|
||||
|
||||
for line in lines:
|
||||
parts = line.split()
|
||||
if len(parts) >= 2:
|
||||
upload_date, video_id = parts[0], parts[1]
|
||||
# Only include videos after cutoff date
|
||||
if upload_date >= cutoff_str:
|
||||
video_ids.append(video_id)
|
||||
else:
|
||||
# No filter, take all
|
||||
video_ids = [line.split()[1] for line in lines if len(line.split()) >= 2]
|
||||
|
||||
self.log(f"Found {len(video_ids)} posts to download", "info")
|
||||
except Exception as e:
|
||||
self.log(f"Failed to get video list: {e}", "error")
|
||||
return {}, []
|
||||
|
||||
if not video_ids:
|
||||
self.log("No videos found matching criteria", "info")
|
||||
return {}, []
|
||||
|
||||
# Set initial progress so dashboard shows 0/N immediately
|
||||
self.activity_manager.update_status(
|
||||
"Downloading videos",
|
||||
progress_current=0,
|
||||
progress_total=len(video_ids)
|
||||
)
|
||||
|
||||
# Crash recovery checkpoint
|
||||
from modules.task_checkpoint import TaskCheckpoint
|
||||
checkpoint = TaskCheckpoint(f'tiktok:{username}', 'scraping')
|
||||
checkpoint.start(total_items=len(video_ids))
|
||||
if checkpoint.is_recovering():
|
||||
self.log(f"TikTok @{username}: recovering — skipping already-downloaded videos", "info")
|
||||
|
||||
# Step 2: Download each video individually with gallery-dl (fast per video, handles carousels)
|
||||
for i, video_id in enumerate(video_ids, 1):
|
||||
# Update progress at start of each iteration (fires even on skips)
|
||||
self.activity_manager.update_status(
|
||||
"Downloading videos",
|
||||
progress_current=i,
|
||||
progress_total=len(video_ids)
|
||||
)
|
||||
|
||||
# Skip if already completed in a previous crashed run
|
||||
if checkpoint.is_completed(video_id):
|
||||
continue
|
||||
|
||||
checkpoint.set_current(video_id)
|
||||
|
||||
# Skip if already downloaded
|
||||
if self._is_already_downloaded(video_id, username):
|
||||
self.log(f"[{i}/{len(video_ids)}] Skipping already downloaded: {video_id}", "debug")
|
||||
checkpoint.mark_completed(video_id)
|
||||
continue
|
||||
|
||||
video_url = f"https://www.tiktok.com/@{username}/video/{video_id}"
|
||||
self.log(f"[{i}/{len(video_ids)}] Downloading {video_id}", "debug")
|
||||
|
||||
cmd = [
|
||||
"gallery-dl",
|
||||
"--write-metadata",
|
||||
"-D", str(output_dir),
|
||||
"-f", "{date:%Y%m%d}_{desc}_{id}_{num}.{extension}",
|
||||
video_url
|
||||
]
|
||||
|
||||
try:
|
||||
self.log(f"Calling gallery-dl for {video_id}", "debug")
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
||||
self.log(f"gallery-dl returned: code={result.returncode}, stdout lines={len(result.stdout.splitlines()) if result.stdout else 0}", "debug")
|
||||
if result.returncode != 0 and result.stderr:
|
||||
stderr = result.stderr
|
||||
if "not available" in stderr.lower() or "404" in stderr:
|
||||
self.log(f"Video {video_id} not available (deleted or private)", "warning")
|
||||
else:
|
||||
self.log(f"Failed to download {video_id}: {stderr[:100]}", "warning")
|
||||
except subprocess.TimeoutExpired:
|
||||
self.log(f"Timeout downloading {video_id}", "warning")
|
||||
except Exception as e:
|
||||
self.log(f"Error downloading {video_id}: {e}", "warning")
|
||||
|
||||
checkpoint.mark_completed(video_id)
|
||||
|
||||
checkpoint.finish()
|
||||
|
||||
# Post-process: Rename files with long descriptions and remove audio-only files
|
||||
for file in output_dir.glob("*"):
|
||||
if file.is_file() and not file.suffix == '.json':
|
||||
# Remove audio-only files (.mp3, .m4a, .aac)
|
||||
if file.suffix.lower() in ['.mp3', '.m4a', '.aac', '.wav', '.ogg']:
|
||||
self.log(f"Removing audio-only file: {file.name}", "debug")
|
||||
file.unlink()
|
||||
# Also remove corresponding JSON
|
||||
json_file = file.with_suffix(file.suffix + '.json')
|
||||
if json_file.exists():
|
||||
json_file.unlink()
|
||||
continue
|
||||
|
||||
# Truncate long filenames (max 255 chars for Linux)
|
||||
if len(file.name) > 200: # Leave some margin
|
||||
# Parse filename: YYYYMMDD_description_ID_NUM.ext
|
||||
parts = file.name.rsplit('_', 2) # Split from right to preserve ID and num
|
||||
if len(parts) == 3:
|
||||
date_and_desc, video_id, num_and_ext = parts
|
||||
# Split date from description
|
||||
date_part = date_and_desc[:8] # YYYYMMDD
|
||||
desc_part = date_and_desc[9:] # Everything after date_
|
||||
|
||||
# Calculate max description length
|
||||
# Format: DATE_DESC_ID_NUM.EXT
|
||||
fixed_length = len(date_part) + len(video_id) + len(num_and_ext) + 3 # 3 underscores
|
||||
max_desc_len = 200 - fixed_length
|
||||
|
||||
if len(desc_part) > max_desc_len:
|
||||
truncated_desc = desc_part[:max_desc_len-3] + "..."
|
||||
new_name = f"{date_part}_{truncated_desc}_{video_id}_{num_and_ext}"
|
||||
new_path = file.parent / new_name
|
||||
|
||||
self.log(f"Truncating long filename: {file.name[:50]}... -> {new_name[:50]}...", "debug")
|
||||
file.rename(new_path)
|
||||
|
||||
# Rename corresponding JSON file too
|
||||
json_file = Path(str(file) + '.json')
|
||||
if json_file.exists():
|
||||
new_json = Path(str(new_path) + '.json')
|
||||
json_file.rename(new_json)
|
||||
|
||||
# Process downloaded files and extract timestamps from JSON
|
||||
downloaded_files = []
|
||||
file_timestamps = {}
|
||||
processed_ids = set() # Track IDs we've checked in DB (not in this loop, but in previous downloads)
|
||||
started_ids = set() # Track IDs we've started processing in THIS run
|
||||
|
||||
for json_file in output_dir.glob("*.json"):
|
||||
try:
|
||||
with open(json_file, 'r', encoding='utf-8') as f:
|
||||
info = json.load(f)
|
||||
|
||||
# Get video ID
|
||||
video_id = info.get('id', '')
|
||||
|
||||
# Extract timestamp from gallery-dl's createTime field (needed for all files)
|
||||
timestamp = None
|
||||
create_time = info.get('createTime')
|
||||
if create_time:
|
||||
try:
|
||||
timestamp = datetime.fromtimestamp(int(create_time))
|
||||
self.log(f"Extracted timestamp {timestamp} from createTime", "debug")
|
||||
except Exception:
|
||||
# Fall back to old yt-dlp method if createTime not available
|
||||
timestamp = self.extract_date_from_info(info)
|
||||
|
||||
# gallery-dl names JSON files as: filename.ext.json
|
||||
# So we need to remove the .json extension to get the media file
|
||||
media_file = Path(str(json_file)[:-5]) # Remove .json extension
|
||||
|
||||
if not media_file.exists():
|
||||
self.log(f"Media file not found for {json_file.name}", "warning")
|
||||
json_file.unlink()
|
||||
continue
|
||||
|
||||
video_file = media_file # Use same variable name for compatibility
|
||||
|
||||
# Check if already downloaded - but only check ONCE per video_id per run
|
||||
# (Don't check again for carousel photos #2, #3 after we've started processing #1)
|
||||
if video_id and video_id not in started_ids:
|
||||
if self._is_already_downloaded(video_id, username):
|
||||
self.log(f"Skipping already downloaded post: {video_id}", "debug")
|
||||
# Mark as processed so we don't check again for this ID's other files
|
||||
processed_ids.add(video_id)
|
||||
# Just remove JSON file, keep media files (they're already processed)
|
||||
json_file.unlink()
|
||||
continue
|
||||
# Mark that we've started processing this video_id
|
||||
started_ids.add(video_id)
|
||||
|
||||
# Skip if this video_id was marked as already downloaded
|
||||
if video_id in processed_ids:
|
||||
json_file.unlink()
|
||||
continue
|
||||
|
||||
# ALWAYS add file to downloaded list and apply timestamp (even for carousel photos #2, #3)
|
||||
downloaded_files.append(video_file)
|
||||
if timestamp:
|
||||
file_timestamps[video_file.name] = timestamp
|
||||
self.log(f"Extracted timestamp {timestamp} for {video_file.name}", "debug")
|
||||
|
||||
# Check for duplicate hash before recording (hash blacklist persists even if original deleted)
|
||||
file_hash = self.db.get_file_hash(str(video_file)) if self.db else None
|
||||
if file_hash:
|
||||
existing = self.db.get_download_by_file_hash(file_hash)
|
||||
if existing and existing.get('file_path') and str(video_file) != existing.get('file_path'):
|
||||
# Duplicate hash found - content was already downloaded (prevents redownload of deleted content)
|
||||
self.log(f"⚠ Duplicate content detected (hash match): {video_file.name} matches {existing['filename']} from {existing['platform']}/{existing['source']}", "warning")
|
||||
# Delete the duplicate regardless of whether original file still exists
|
||||
try:
|
||||
video_file.unlink()
|
||||
self.log(f"Deleted duplicate (hash blacklist): {video_file.name}", "debug")
|
||||
# Mark as processed so we don't try to download again
|
||||
processed_ids.add(video_id)
|
||||
json_file.unlink()
|
||||
continue
|
||||
except Exception as e:
|
||||
self.log(f"Failed to delete duplicate {video_file.name}: {e}", "warning")
|
||||
|
||||
# Record in database (each file gets its own entry, even for carousels)
|
||||
if video_id:
|
||||
self._record_download(
|
||||
video_id=video_id,
|
||||
username=username,
|
||||
filename=video_file.name,
|
||||
post_date=timestamp,
|
||||
metadata={"title": info.get('desc', ''), "description": info.get('desc', '')},
|
||||
deferred=self.defer_database
|
||||
)
|
||||
|
||||
# Remove JSON file after processing
|
||||
json_file.unlink()
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"Failed to process {json_file}: {e}", "error")
|
||||
|
||||
self.log(f"Downloaded {len(downloaded_files)} files from @{username}", "info")
|
||||
|
||||
# Apply timestamps to files
|
||||
import os
|
||||
for file_path in downloaded_files:
|
||||
filename = file_path.name
|
||||
if filename in file_timestamps:
|
||||
timestamp = file_timestamps[filename]
|
||||
try:
|
||||
# Convert datetime to unix timestamp
|
||||
unix_time = timestamp.timestamp()
|
||||
# Set both access time and modification time
|
||||
os.utime(str(file_path), (unix_time, unix_time))
|
||||
self.log(f"Applied timestamp {timestamp} to {filename}", "debug")
|
||||
except Exception as e:
|
||||
self.log(f"Failed to apply timestamp to {filename}: {e}", "warning")
|
||||
|
||||
# Store timestamps for later use
|
||||
self.file_timestamps.update(file_timestamps)
|
||||
|
||||
return file_timestamps, downloaded_files
|
||||
|
||||
def download_video(self, url: str, output_dir: Path = None) -> Tuple[Optional[datetime], Optional[Path]]:
|
||||
"""
|
||||
Download a single TikTok video
|
||||
|
||||
Args:
|
||||
url: TikTok video URL
|
||||
output_dir: Output directory
|
||||
|
||||
Returns:
|
||||
Tuple of (timestamp, downloaded file path)
|
||||
"""
|
||||
output_dir = output_dir or self.base_path
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.log(f"Downloading video: {url}", "info")
|
||||
|
||||
# First, get video info without downloading
|
||||
cmd_info = [
|
||||
"yt-dlp",
|
||||
"--dump-json",
|
||||
"--no-warnings",
|
||||
"--quiet",
|
||||
url
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd_info, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
self.log(f"Failed to get video info: {result.stderr}", "error")
|
||||
return None, None
|
||||
|
||||
info = json.loads(result.stdout)
|
||||
timestamp = self.extract_date_from_info(info)
|
||||
|
||||
# Check if this is a photo post (no video, only audio)
|
||||
formats = info.get('formats', [])
|
||||
has_video = any(f.get('vcodec') != 'none' for f in formats)
|
||||
|
||||
if not has_video and len(formats) > 0:
|
||||
# This is a photo/image post - skip it
|
||||
self.log("Skipping TikTok photo post (only videos are downloaded)", "info")
|
||||
return timestamp, None
|
||||
|
||||
# Download video
|
||||
output_template = str(output_dir / "%(upload_date)s_%(title)s_%(id)s.%(ext)s")
|
||||
cmd_download = [
|
||||
"yt-dlp",
|
||||
"--format", "best", # Explicitly request best video+audio format
|
||||
"--no-warnings",
|
||||
"--quiet",
|
||||
"-o", output_template,
|
||||
url
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd_download, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
self.log(f"Failed to download video: {result.stderr}", "error")
|
||||
return timestamp, None
|
||||
|
||||
# Find the downloaded file
|
||||
expected_name = output_template.replace('%(upload_date)s', info.get('upload_date', 'unknown'))
|
||||
expected_name = expected_name.replace('%(title)s', info.get('title', 'video'))
|
||||
expected_name = expected_name.replace('%(id)s', info.get('id', ''))
|
||||
expected_name = expected_name.replace('%(ext)s', info.get('ext', 'mp4'))
|
||||
|
||||
downloaded_file = Path(expected_name)
|
||||
if not downloaded_file.exists():
|
||||
# Try to find it by pattern
|
||||
pattern = f"*{info.get('id', '')}*.mp4"
|
||||
matches = list(output_dir.glob(pattern))
|
||||
if matches:
|
||||
downloaded_file = matches[0]
|
||||
|
||||
if downloaded_file.exists():
|
||||
if timestamp:
|
||||
self.file_timestamps[downloaded_file.name] = timestamp
|
||||
return timestamp, downloaded_file
|
||||
|
||||
return timestamp, None
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"Failed to download video: {e}", "error")
|
||||
return None, None
|
||||
|
||||
def get_file_timestamps(self) -> Dict[str, datetime]:
|
||||
"""Get the collected file timestamps"""
|
||||
return self.file_timestamps.copy()
|
||||
|
||||
def clear_timestamps(self):
|
||||
"""Clear the stored timestamps"""
|
||||
self.file_timestamps.clear()
|
||||
|
||||
|
||||
def download_tiktok_profile(username: str,
|
||||
days: int = 7,
|
||||
base_path: Path = None,
|
||||
log_callback=None,
|
||||
unified_db=None) -> Dict[str, datetime]:
|
||||
"""
|
||||
Simple function interface for downloading TikTok profile
|
||||
|
||||
Args:
|
||||
username: TikTok username
|
||||
days: Number of days to download
|
||||
base_path: Base download path
|
||||
log_callback: Optional logging callback
|
||||
unified_db: UnifiedDatabase instance (required)
|
||||
|
||||
Returns:
|
||||
Dictionary mapping filenames to timestamps
|
||||
"""
|
||||
if not unified_db:
|
||||
raise ValueError("unified_db is required for TikTok downloads")
|
||||
|
||||
downloader = TikTokDownloader(base_path=base_path, log_callback=log_callback, unified_db=unified_db)
|
||||
timestamps, files = downloader.download_profile(username, number_of_days=days)
|
||||
return timestamps
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test the module
|
||||
import tempfile
|
||||
|
||||
print("TikTok Downloader Module Test")
|
||||
print("="*60)
|
||||
|
||||
# Test with a small profile
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
downloader = TikTokDownloader(base_path=Path(tmpdir))
|
||||
|
||||
# You can test with a real TikTok username
|
||||
# timestamps, files = downloader.download_profile("username", number_of_days=1)
|
||||
|
||||
print("Module ready for integration")
|
||||
Reference in New Issue
Block a user