815 lines
24 KiB
Markdown
815 lines
24 KiB
Markdown
# Code Review - Specific Fix Examples
|
|
|
|
This document provides concrete code examples for implementing the recommended fixes from the comprehensive code review.
|
|
|
|
## 1. FIX: Token Exposure in URLs
|
|
|
|
### Current Code (web/frontend/src/lib/api.ts:558-568)
|
|
```typescript
|
|
getMediaThumbnailUrl(filePath: string, mediaType: 'image' | 'video') {
|
|
const token = localStorage.getItem('auth_token')
|
|
const tokenParam = token ? `&token=${encodeURIComponent(token)}` : ''
|
|
return `${API_BASE}/media/thumbnail?file_path=${encodeURIComponent(filePath)}&media_type=${mediaType}${tokenParam}`
|
|
}
|
|
```
|
|
|
|
### Recommended Fix
|
|
```typescript
|
|
// Backend creates secure session/ticket instead of token
|
|
async getMediaPreviewTicket(filePath: string): Promise<{ticket: string}> {
|
|
return this.post('/media/preview-ticket', { file_path: filePath })
|
|
}
|
|
|
|
// Frontend uses ticket (short-lived, single-use)
|
|
getMediaThumbnailUrl(filePath: string, mediaType: 'image' | 'video') {
|
|
const token = localStorage.getItem('auth_token')
|
|
if (!token) return ''
|
|
|
|
// Request ticket instead of embedding token
|
|
const ticket = await this.getMediaPreviewTicket(filePath)
|
|
return `${API_BASE}/media/thumbnail?file_path=${encodeURIComponent(filePath)}&media_type=${mediaType}&ticket=${ticket}`
|
|
}
|
|
|
|
// Always include Authorization header for critical operations
|
|
private getAuthHeaders(): HeadersInit {
|
|
const token = localStorage.getItem('auth_token')
|
|
const headers: HeadersInit = {
|
|
'Content-Type': 'application/json',
|
|
}
|
|
if (token) {
|
|
headers['Authorization'] = `Bearer ${token}` // Use header, not URL param
|
|
}
|
|
return headers
|
|
}
|
|
```
|
|
|
|
### Backend Implementation
|
|
```python
|
|
# In api.py
|
|
|
|
@app.post("/api/media/preview-ticket")
|
|
async def create_preview_ticket(
|
|
file_path: str,
|
|
current_user: Dict = Depends(get_current_user)
|
|
) -> Dict:
|
|
"""Create short-lived, single-use ticket for media preview"""
|
|
import secrets
|
|
import time
|
|
|
|
ticket = secrets.token_urlsafe(32)
|
|
expiry = time.time() + 300 # 5 minutes
|
|
|
|
# Store in Redis or in-memory cache
|
|
preview_tickets[ticket] = {
|
|
'file_path': file_path,
|
|
'user': current_user['username'],
|
|
'expiry': expiry,
|
|
'used': False
|
|
}
|
|
|
|
return {'ticket': ticket}
|
|
|
|
@app.get("/api/media/thumbnail")
|
|
async def get_thumbnail(
|
|
file_path: str,
|
|
media_type: str,
|
|
ticket: Optional[str] = None,
|
|
credentials: Optional[HTTPAuthorizationCredentials] = Depends(security)
|
|
) -> StreamingResponse:
|
|
"""Serve thumbnail with ticket or authorization header"""
|
|
|
|
auth_user = None
|
|
|
|
# Try authorization header first
|
|
if credentials:
|
|
payload = app_state.auth.verify_session(credentials.credentials)
|
|
if payload:
|
|
auth_user = payload
|
|
|
|
# Or use ticket
|
|
if ticket and ticket in preview_tickets:
|
|
ticket_data = preview_tickets[ticket]
|
|
if time.time() > ticket_data['expiry']:
|
|
raise HTTPException(status_code=401, detail="Ticket expired")
|
|
if ticket_data['used']:
|
|
raise HTTPException(status_code=401, detail="Ticket already used")
|
|
auth_user = {'username': ticket_data['user']}
|
|
preview_tickets[ticket]['used'] = True
|
|
|
|
if not auth_user:
|
|
raise HTTPException(status_code=401, detail="Not authenticated")
|
|
|
|
# ... rest of implementation
|
|
```
|
|
|
|
---
|
|
|
|
## 2. FIX: Path Traversal Vulnerability
|
|
|
|
### Problem Code (api.py file handling)
|
|
```python
|
|
# UNSAFE - vulnerable to path traversal
|
|
file_path = request.query_params.get('file_path')
|
|
with open(file_path, 'rb') as f: # Could be /etc/passwd!
|
|
return FileResponse(f)
|
|
```
|
|
|
|
### Recommended Fix
|
|
```python
|
|
from pathlib import Path
|
|
import os
|
|
|
|
# Safe path validation utility
|
|
def validate_file_path(file_path: str, allowed_base: str = None) -> Path:
|
|
"""
|
|
Validate file path is within allowed directory.
|
|
Prevents ../../../etc/passwd style attacks.
|
|
"""
|
|
if allowed_base is None:
|
|
allowed_base = '/opt/media-downloader/downloads'
|
|
|
|
# Convert to absolute paths
|
|
requested_path = Path(file_path).resolve()
|
|
base_path = Path(allowed_base).resolve()
|
|
|
|
# Check if requested path is within base directory
|
|
try:
|
|
requested_path.relative_to(base_path)
|
|
except ValueError:
|
|
raise HTTPException(
|
|
status_code=403,
|
|
detail="Access denied - path traversal detected"
|
|
)
|
|
|
|
# Check file exists
|
|
if not requested_path.exists():
|
|
raise HTTPException(status_code=404, detail="File not found")
|
|
|
|
# Check it's a file, not directory
|
|
if not requested_path.is_file():
|
|
raise HTTPException(status_code=403, detail="Invalid file")
|
|
|
|
return requested_path
|
|
|
|
# Safe endpoint implementation
|
|
@app.get("/api/media/preview")
|
|
async def get_media_preview(
|
|
file_path: str,
|
|
current_user: Dict = Depends(get_current_user)
|
|
) -> FileResponse:
|
|
"""Serve media file with safe path validation"""
|
|
try:
|
|
safe_path = validate_file_path(file_path)
|
|
return FileResponse(safe_path)
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error serving file: {e}")
|
|
raise HTTPException(status_code=500, detail="Error serving file")
|
|
```
|
|
|
|
---
|
|
|
|
## 3. FIX: CSRF Protection
|
|
|
|
### Add CSRF Middleware
|
|
```python
|
|
# In api.py
|
|
|
|
from starlette.middleware.csrf import CSRFMiddleware
|
|
|
|
app.add_middleware(
|
|
CSRFMiddleware,
|
|
secret_key=SESSION_SECRET_KEY,
|
|
safe_methods=['GET', 'HEAD', 'OPTIONS'],
|
|
exempt_urls=['/api/auth/login', '/api/auth/logout'], # Public endpoints
|
|
)
|
|
```
|
|
|
|
### Frontend Implementation
|
|
```typescript
|
|
// web/frontend/src/lib/api.ts
|
|
|
|
async post<T>(endpoint: string, data?: any): Promise<T> {
|
|
// Get CSRF token from cookie or meta tag
|
|
const csrfToken = this.getCSRFToken()
|
|
|
|
const response = await fetch(`${API_BASE}${endpoint}`, {
|
|
method: 'POST',
|
|
headers: {
|
|
...this.getAuthHeaders(),
|
|
'X-CSRFToken': csrfToken, // Include CSRF token
|
|
},
|
|
body: data ? JSON.stringify(data) : undefined,
|
|
})
|
|
|
|
if (!response.ok) {
|
|
if (response.status === 401) {
|
|
this.handleUnauthorized()
|
|
}
|
|
throw new Error(`API error: ${response.statusText}`)
|
|
}
|
|
return response.json()
|
|
}
|
|
|
|
private getCSRFToken(): string {
|
|
// Try to get from meta tag
|
|
const meta = document.querySelector('meta[name="csrf-token"]')
|
|
if (meta) {
|
|
return meta.getAttribute('content') || ''
|
|
}
|
|
|
|
// Or from cookie
|
|
const cookies = document.cookie.split('; ')
|
|
const csrfCookie = cookies.find(c => c.startsWith('csrftoken='))
|
|
return csrfCookie ? csrfCookie.split('=')[1] : ''
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## 4. FIX: Subprocess Command Injection
|
|
|
|
### Vulnerable Code (modules/tiktok_module.py:294)
|
|
```python
|
|
# DANGEROUS - username not escaped
|
|
username = "test'; rm -rf /; echo '"
|
|
output_dir = "/downloads"
|
|
|
|
# This could execute arbitrary commands!
|
|
cmd = f"yt-dlp -o '%(title)s.%(ext)s' https://www.tiktok.com/@{username}"
|
|
result = subprocess.run(cmd, capture_output=True, text=True, cwd=output_dir)
|
|
```
|
|
|
|
### Recommended Fix
|
|
```python
|
|
import subprocess
|
|
import shlex
|
|
from typing import List
|
|
|
|
def safe_run_command(cmd: List[str], cwd: str = None, **kwargs) -> subprocess.CompletedProcess:
|
|
"""
|
|
Safely run command with list-based arguments (prevents injection).
|
|
Never use shell=True with user input.
|
|
"""
|
|
try:
|
|
# Use list form - much safer than string form
|
|
result = subprocess.run(
|
|
cmd,
|
|
cwd=cwd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=300,
|
|
**kwargs
|
|
)
|
|
return result
|
|
except subprocess.TimeoutExpired:
|
|
raise ValueError("Command timed out")
|
|
except Exception as e:
|
|
raise ValueError(f"Command failed: {e}")
|
|
|
|
# Usage with validation
|
|
def download_tiktok_video(username: str, output_dir: str) -> bool:
|
|
"""Download TikTok video safely"""
|
|
|
|
# Validate input
|
|
if not username or len(username) > 100:
|
|
raise ValueError("Invalid username")
|
|
|
|
# Remove dangerous characters
|
|
safe_username = ''.join(c for c in username if c.isalnum() or c in '@_-')
|
|
|
|
# Build command as list (safer)
|
|
cmd = [
|
|
'yt-dlp',
|
|
'-o', '%(title)s.%(ext)s',
|
|
f'https://www.tiktok.com/@{safe_username}'
|
|
]
|
|
|
|
try:
|
|
result = safe_run_command(cmd, cwd=output_dir)
|
|
|
|
if result.returncode != 0:
|
|
logger.error(f"yt-dlp error: {result.stderr}")
|
|
return False
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to download TikTok: {e}")
|
|
return False
|
|
```
|
|
|
|
---
|
|
|
|
## 5. FIX: Input Validation on Config
|
|
|
|
### Current Vulnerable Code (api.py:349-351)
|
|
```python
|
|
@app.put("/api/config")
|
|
async def update_config(
|
|
config: ConfigUpdate, # Raw dict, no validation
|
|
current_user: Dict = Depends(get_current_user)
|
|
):
|
|
"""Update configuration"""
|
|
app_state.config.update(config.config)
|
|
return {"success": True}
|
|
```
|
|
|
|
### Recommended Fix with Validation
|
|
```python
|
|
from pydantic import BaseModel, Field, validator
|
|
from typing import Optional, Dict, Any
|
|
|
|
# Define validated config schemas
|
|
class PlatformConfig(BaseModel):
|
|
enabled: bool = True
|
|
check_interval_hours: int = Field(gt=0, le=24)
|
|
max_retries: int = Field(ge=1, le=10)
|
|
timeout_seconds: int = Field(gt=0, le=3600)
|
|
|
|
@validator('check_interval_hours')
|
|
def validate_interval(cls, v):
|
|
if v < 1 or v > 24:
|
|
raise ValueError('Interval must be 1-24 hours')
|
|
return v
|
|
|
|
class MediaDownloaderConfig(BaseModel):
|
|
download_path: str
|
|
max_concurrent_downloads: int = Field(ge=1, le=20)
|
|
enable_deduplication: bool = True
|
|
enable_face_recognition: bool = False
|
|
recycle_bin_enabled: bool = True
|
|
recycle_bin_retention_days: int = Field(ge=1, le=365)
|
|
|
|
@validator('max_concurrent_downloads')
|
|
def validate_concurrent(cls, v):
|
|
if v < 1 or v > 20:
|
|
raise ValueError('Max concurrent downloads must be 1-20')
|
|
return v
|
|
|
|
@validator('download_path')
|
|
def validate_path(cls, v):
|
|
from pathlib import Path
|
|
p = Path(v)
|
|
if not p.exists():
|
|
raise ValueError('Download path does not exist')
|
|
if not p.is_dir():
|
|
raise ValueError('Download path must be a directory')
|
|
return str(p)
|
|
|
|
class ConfigUpdate(BaseModel):
|
|
instagram: Optional[PlatformConfig] = None
|
|
tiktok: Optional[PlatformConfig] = None
|
|
forums: Optional[PlatformConfig] = None
|
|
general: Optional[MediaDownloaderConfig] = None
|
|
|
|
# Safe endpoint with validation
|
|
@app.put("/api/config")
|
|
async def update_config(
|
|
update: ConfigUpdate, # Automatically validated by Pydantic
|
|
current_user: Dict = Depends(get_current_user)
|
|
) -> Dict:
|
|
"""Update configuration with validation"""
|
|
|
|
try:
|
|
config_dict = update.dict(exclude_unset=True)
|
|
|
|
# Log who made the change
|
|
logger.info(f"User {current_user['username']} updating config: {list(config_dict.keys())}")
|
|
|
|
# Merge with existing config
|
|
for key, value in config_dict.items():
|
|
if value is not None:
|
|
app_state.config[key] = value.dict()
|
|
|
|
# Save to database
|
|
for key, value in config_dict.items():
|
|
if value is not None:
|
|
app_state.settings.set(
|
|
key,
|
|
value.dict(),
|
|
category=key,
|
|
updated_by=current_user['username']
|
|
)
|
|
|
|
return {
|
|
"success": True,
|
|
"message": "Configuration updated successfully",
|
|
"updated_keys": list(config_dict.keys())
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Config update failed: {e}")
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid configuration: {str(e)}"
|
|
)
|
|
```
|
|
|
|
---
|
|
|
|
## 6. FIX: JSON Metadata Search Performance
|
|
|
|
### Current Inefficient Code (unified_database.py:576-590)
|
|
```python
|
|
def get_download_by_media_id(self, media_id: str, platform: str = 'fastdl') -> Optional[Dict]:
|
|
"""Get download record by Instagram media ID"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# This causes FULL TABLE SCAN on large datasets!
|
|
pattern1 = f'%"media_id": "{media_id}"%'
|
|
pattern2 = f'%"media_id"%{media_id}%'
|
|
|
|
cursor.execute('''
|
|
SELECT * FROM downloads
|
|
WHERE platform = ?
|
|
AND (metadata LIKE ? OR metadata LIKE ?)
|
|
LIMIT 1
|
|
''', (platform, pattern1, pattern2))
|
|
```
|
|
|
|
### Recommended Fix - Option 1: Separate Column
|
|
```python
|
|
# Schema modification (add once)
|
|
def _init_database(self):
|
|
"""Initialize database with optimized schema"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Add separate column for media_id (indexed)
|
|
try:
|
|
cursor.execute("ALTER TABLE downloads ADD COLUMN media_id TEXT")
|
|
except sqlite3.OperationalError:
|
|
pass # Column already exists
|
|
|
|
# Create efficient index
|
|
cursor.execute('''
|
|
CREATE INDEX IF NOT EXISTS idx_media_id_platform
|
|
ON downloads(media_id, platform)
|
|
WHERE media_id IS NOT NULL
|
|
''')
|
|
conn.commit()
|
|
|
|
def get_download_by_media_id(self, media_id: str, platform: str = 'fastdl') -> Optional[Dict]:
|
|
"""Get download record by Instagram media ID (fast)"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Now uses fast index instead of LIKE scan
|
|
cursor.execute('''
|
|
SELECT id, url, platform, source, content_type,
|
|
filename, file_path, post_date, download_date,
|
|
file_size, file_hash, metadata
|
|
FROM downloads
|
|
WHERE platform = ? AND media_id = ?
|
|
LIMIT 1
|
|
''', (platform, media_id))
|
|
|
|
row = cursor.fetchone()
|
|
if row:
|
|
return dict(row)
|
|
return None
|
|
|
|
def record_download(self, media_id: str = None, **kwargs):
|
|
"""Record download with media_id extracted to separate column"""
|
|
# ... existing code ...
|
|
cursor.execute('''
|
|
INSERT INTO downloads (
|
|
url_hash, url, platform, source, content_type,
|
|
filename, file_path, file_size, file_hash,
|
|
post_date, status, error_message, metadata, media_id
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
''', (
|
|
url_hash, url, platform, source, content_type,
|
|
filename, file_path, file_size, file_hash,
|
|
post_date.isoformat() if post_date else None,
|
|
status, error_message,
|
|
json.dumps(metadata) if metadata else None,
|
|
media_id # Store separately for fast lookup
|
|
))
|
|
```
|
|
|
|
### Recommended Fix - Option 2: JSON_EXTRACT (if using SQLite 3.38+)
|
|
```python
|
|
# Uses SQLite's built-in JSON functions (more efficient than LIKE)
|
|
def get_download_by_media_id(self, media_id: str, platform: str = 'fastdl') -> Optional[Dict]:
|
|
"""Get download record by Instagram media ID using JSON_EXTRACT"""
|
|
with self.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute('''
|
|
SELECT id, url, platform, source, content_type,
|
|
filename, file_path, post_date, download_date,
|
|
file_size, file_hash, metadata
|
|
FROM downloads
|
|
WHERE platform = ?
|
|
AND JSON_EXTRACT(metadata, '$.media_id') = ?
|
|
LIMIT 1
|
|
''', (platform, media_id))
|
|
|
|
row = cursor.fetchone()
|
|
if row:
|
|
result = dict(row)
|
|
# Parse metadata
|
|
if result.get('metadata'):
|
|
try:
|
|
result['metadata'] = json.loads(result['metadata'])
|
|
except (ValueError, TypeError, json.JSONDecodeError):
|
|
pass
|
|
return result
|
|
return None
|
|
```
|
|
|
|
---
|
|
|
|
## 7. FIX: Bare Exception Handlers
|
|
|
|
### Problematic Code (fastdl_module.py, media-downloader.py)
|
|
```python
|
|
except: # Too broad!
|
|
break
|
|
```
|
|
|
|
### Recommended Fix
|
|
```python
|
|
import sqlite3
|
|
import requests
|
|
from requests.exceptions import RequestException, Timeout, ConnectionError
|
|
|
|
# Be specific about which exceptions to catch
|
|
try:
|
|
# ... code that might fail ...
|
|
download_file(url)
|
|
|
|
except (RequestException, Timeout, ConnectionError) as e:
|
|
# Handle network errors
|
|
logger.warning(f"Network error downloading {url}: {e}")
|
|
if isinstance(e, Timeout):
|
|
# Retry with longer timeout
|
|
continue
|
|
else:
|
|
# Skip this file
|
|
break
|
|
|
|
except sqlite3.OperationalError as e:
|
|
# Handle database errors specifically
|
|
if "database is locked" in str(e):
|
|
logger.warning("Database locked, retrying...")
|
|
time.sleep(1)
|
|
continue
|
|
else:
|
|
logger.error(f"Database error: {e}")
|
|
raise
|
|
|
|
except (OSError, IOError) as e:
|
|
# Handle file system errors
|
|
logger.error(f"File system error: {e}")
|
|
break
|
|
|
|
except Exception as e:
|
|
# Only catch unexpected errors as last resort
|
|
logger.error(f"Unexpected error: {type(e).__name__}: {e}", exc_info=True)
|
|
break
|
|
```
|
|
|
|
---
|
|
|
|
## 8. FIX: Async File I/O
|
|
|
|
### Current Blocking Code (web/backend/api.py)
|
|
```python
|
|
# This blocks the async event loop!
|
|
@app.get("/api/media/thumbnail")
|
|
async def get_thumbnail(file_path: str):
|
|
# Synchronous file I/O blocks other requests
|
|
with open(file_path, 'rb') as f:
|
|
image = Image.open(f)
|
|
# ... process image ...
|
|
return FileResponse(processed_image)
|
|
```
|
|
|
|
### Recommended Fix with aiofiles
|
|
```python
|
|
import aiofiles
|
|
from PIL import Image
|
|
import io
|
|
|
|
@app.get("/api/media/thumbnail")
|
|
async def get_thumbnail(
|
|
file_path: str,
|
|
media_type: str,
|
|
current_user: Dict = Depends(get_current_user_media)
|
|
) -> StreamingResponse:
|
|
"""Serve thumbnail efficiently without blocking"""
|
|
|
|
try:
|
|
# Use aiofiles for non-blocking file I/O
|
|
async with aiofiles.open(file_path, 'rb') as f:
|
|
file_data = await f.read()
|
|
|
|
# Offload CPU-bound image processing to thread pool
|
|
loop = asyncio.get_event_loop()
|
|
thumbnail = await loop.run_in_executor(
|
|
None, # Use default executor (ThreadPoolExecutor)
|
|
_create_thumbnail,
|
|
file_data,
|
|
media_type
|
|
)
|
|
|
|
return StreamingResponse(
|
|
io.BytesIO(thumbnail),
|
|
media_type="image/jpeg"
|
|
)
|
|
|
|
except FileNotFoundError:
|
|
raise HTTPException(status_code=404, detail="File not found")
|
|
except Exception as e:
|
|
logger.error(f"Error creating thumbnail: {e}")
|
|
raise HTTPException(status_code=500, detail="Error creating thumbnail")
|
|
|
|
def _create_thumbnail(file_data: bytes, media_type: str) -> bytes:
|
|
"""CPU-bound function to create thumbnail"""
|
|
try:
|
|
image = Image.open(io.BytesIO(file_data))
|
|
image.thumbnail((200, 200))
|
|
|
|
output = io.BytesIO()
|
|
image.save(output, format='JPEG', quality=85)
|
|
return output.getvalue()
|
|
|
|
except Exception as e:
|
|
logger.error(f"Thumbnail creation failed: {e}")
|
|
raise
|
|
```
|
|
|
|
---
|
|
|
|
## 9. FIX: Adapter Duplication
|
|
|
|
### Current Duplicated Code (unified_database.py:1708-2080)
|
|
```python
|
|
# FastDLDatabaseAdapter
|
|
class FastDLDatabaseAdapter:
|
|
def __init__(self, unified_db: UnifiedDatabase):
|
|
self.db = unified_db
|
|
self.platform = 'fastdl'
|
|
|
|
def is_already_downloaded(self, media_id: str) -> bool:
|
|
# ... 20+ lines of duplicate code ...
|
|
|
|
def record_download(self, media_id: str, username: str, **kwargs):
|
|
# ... 30+ lines of duplicate code ...
|
|
|
|
# TikTokDatabaseAdapter (similar structure)
|
|
# ToolzuDatabaseAdapter (similar structure)
|
|
# CoppermineDatabaseAdapter (similar structure)
|
|
# ... and more
|
|
```
|
|
|
|
### Recommended Fix: Generic Base Adapter
|
|
```python
|
|
from abc import ABC, abstractmethod
|
|
from typing import Any, Dict, Optional
|
|
|
|
class BaseDatabaseAdapter(ABC):
|
|
"""Generic adapter for unified database compatibility"""
|
|
|
|
def __init__(self, unified_db: UnifiedDatabase, platform: str):
|
|
self.db = unified_db
|
|
self.platform = platform
|
|
|
|
@abstractmethod
|
|
def get_identifier(self, data: Dict[str, Any]) -> str:
|
|
"""Extract unique identifier from data"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def build_metadata(self, data: Dict[str, Any]) -> Dict:
|
|
"""Build platform-specific metadata"""
|
|
pass
|
|
|
|
def is_already_downloaded(self, identifier: str) -> bool:
|
|
"""Check if content is already downloaded"""
|
|
with self.db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT 1 FROM downloads
|
|
WHERE platform = ? AND metadata LIKE ?
|
|
LIMIT 1
|
|
''', (self.platform, f'%"{self._id_key()}": "{identifier}"%'))
|
|
return cursor.fetchone() is not None
|
|
|
|
@abstractmethod
|
|
def _id_key(self) -> str:
|
|
"""Return the metadata key for identifier"""
|
|
pass
|
|
|
|
def record_download(
|
|
self,
|
|
identifier: str,
|
|
source: str,
|
|
**kwargs
|
|
) -> bool:
|
|
"""Record download with platform-specific data"""
|
|
|
|
url = self._build_url(identifier, source, kwargs)
|
|
metadata = self.build_metadata({
|
|
**kwargs,
|
|
self._id_key(): identifier
|
|
})
|
|
|
|
# Calculate file hash if provided
|
|
file_hash = None
|
|
if kwargs.get('file_path'):
|
|
try:
|
|
file_hash = UnifiedDatabase.get_file_hash(kwargs['file_path'])
|
|
except Exception:
|
|
pass
|
|
|
|
return self.db.record_download(
|
|
url=url,
|
|
platform=self.platform,
|
|
source=source,
|
|
content_type=kwargs.get('content_type', 'post'),
|
|
filename=kwargs.get('filename'),
|
|
file_path=kwargs.get('file_path'),
|
|
file_hash=file_hash,
|
|
post_date=kwargs.get('post_date'),
|
|
metadata=metadata
|
|
)
|
|
|
|
@abstractmethod
|
|
def _build_url(self, identifier: str, source: str, kwargs: Dict) -> str:
|
|
"""Build URL for the content"""
|
|
pass
|
|
|
|
# Concrete implementations
|
|
class FastDLDatabaseAdapter(BaseDatabaseAdapter):
|
|
def __init__(self, unified_db: UnifiedDatabase):
|
|
super().__init__(unified_db, 'fastdl')
|
|
|
|
def _id_key(self) -> str:
|
|
return 'media_id'
|
|
|
|
def get_identifier(self, data: Dict) -> str:
|
|
return data.get('media_id', '')
|
|
|
|
def _build_url(self, identifier: str, source: str, kwargs: Dict) -> str:
|
|
return kwargs.get('download_url') or f"instagram://{identifier}"
|
|
|
|
def build_metadata(self, data: Dict) -> Dict:
|
|
return {
|
|
'media_id': data.get('media_id'),
|
|
'source': 'fastdl',
|
|
**{k: v for k, v in data.items() if k not in ['media_id', 'file_path']}
|
|
}
|
|
|
|
class TikTokDatabaseAdapter(BaseDatabaseAdapter):
|
|
def __init__(self, unified_db: UnifiedDatabase):
|
|
super().__init__(unified_db, 'tiktok')
|
|
|
|
def _id_key(self) -> str:
|
|
return 'video_id'
|
|
|
|
def get_identifier(self, data: Dict) -> str:
|
|
return data.get('video_id', '')
|
|
|
|
def _build_url(self, identifier: str, source: str, kwargs: Dict) -> str:
|
|
return f"https://www.tiktok.com/@{source}/video/{identifier}"
|
|
|
|
def build_metadata(self, data: Dict) -> Dict:
|
|
return {
|
|
'video_id': data.get('video_id'),
|
|
**{k: v for k, v in data.items() if k != 'video_id'}
|
|
}
|
|
|
|
class SnapchatDatabaseAdapter(BaseDatabaseAdapter):
|
|
def __init__(self, unified_db: UnifiedDatabase):
|
|
super().__init__(unified_db, 'snapchat')
|
|
|
|
def _id_key(self) -> str:
|
|
return 'story_id'
|
|
|
|
def get_identifier(self, data: Dict) -> str:
|
|
return data.get('story_id', '')
|
|
|
|
def _build_url(self, identifier: str, source: str, kwargs: Dict) -> str:
|
|
return kwargs.get('url', f"snapchat://{identifier}")
|
|
|
|
def build_metadata(self, data: Dict) -> Dict:
|
|
return data.copy()
|
|
|
|
# ... similar for other platforms ...
|
|
```
|
|
|
|
---
|
|
|
|
## Summary
|
|
|
|
These code examples provide concrete implementations for the major security, performance, and quality issues identified in the review. The fixes follow Python/TypeScript best practices and can be implemented incrementally.
|
|
|
|
Start with security fixes (sections 1-5), then move to performance (sections 6-8), then code quality (section 9).
|
|
|