814
docs/archive/CODE_REVIEW_FIX_EXAMPLES.md
Normal file
814
docs/archive/CODE_REVIEW_FIX_EXAMPLES.md
Normal file
@@ -0,0 +1,814 @@
|
||||
# Code Review - Specific Fix Examples
|
||||
|
||||
This document provides concrete code examples for implementing the recommended fixes from the comprehensive code review.
|
||||
|
||||
## 1. FIX: Token Exposure in URLs
|
||||
|
||||
### Current Code (web/frontend/src/lib/api.ts:558-568)
|
||||
```typescript
|
||||
getMediaThumbnailUrl(filePath: string, mediaType: 'image' | 'video') {
|
||||
const token = localStorage.getItem('auth_token')
|
||||
const tokenParam = token ? `&token=${encodeURIComponent(token)}` : ''
|
||||
return `${API_BASE}/media/thumbnail?file_path=${encodeURIComponent(filePath)}&media_type=${mediaType}${tokenParam}`
|
||||
}
|
||||
```
|
||||
|
||||
### Recommended Fix
|
||||
```typescript
|
||||
// Backend creates secure session/ticket instead of token
|
||||
async getMediaPreviewTicket(filePath: string): Promise<{ticket: string}> {
|
||||
return this.post('/media/preview-ticket', { file_path: filePath })
|
||||
}
|
||||
|
||||
// Frontend uses ticket (short-lived, single-use)
|
||||
getMediaThumbnailUrl(filePath: string, mediaType: 'image' | 'video') {
|
||||
const token = localStorage.getItem('auth_token')
|
||||
if (!token) return ''
|
||||
|
||||
// Request ticket instead of embedding token
|
||||
const ticket = await this.getMediaPreviewTicket(filePath)
|
||||
return `${API_BASE}/media/thumbnail?file_path=${encodeURIComponent(filePath)}&media_type=${mediaType}&ticket=${ticket}`
|
||||
}
|
||||
|
||||
// Always include Authorization header for critical operations
|
||||
private getAuthHeaders(): HeadersInit {
|
||||
const token = localStorage.getItem('auth_token')
|
||||
const headers: HeadersInit = {
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
if (token) {
|
||||
headers['Authorization'] = `Bearer ${token}` // Use header, not URL param
|
||||
}
|
||||
return headers
|
||||
}
|
||||
```
|
||||
|
||||
### Backend Implementation
|
||||
```python
|
||||
# In api.py
|
||||
|
||||
@app.post("/api/media/preview-ticket")
|
||||
async def create_preview_ticket(
|
||||
file_path: str,
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
) -> Dict:
|
||||
"""Create short-lived, single-use ticket for media preview"""
|
||||
import secrets
|
||||
import time
|
||||
|
||||
ticket = secrets.token_urlsafe(32)
|
||||
expiry = time.time() + 300 # 5 minutes
|
||||
|
||||
# Store in Redis or in-memory cache
|
||||
preview_tickets[ticket] = {
|
||||
'file_path': file_path,
|
||||
'user': current_user['username'],
|
||||
'expiry': expiry,
|
||||
'used': False
|
||||
}
|
||||
|
||||
return {'ticket': ticket}
|
||||
|
||||
@app.get("/api/media/thumbnail")
|
||||
async def get_thumbnail(
|
||||
file_path: str,
|
||||
media_type: str,
|
||||
ticket: Optional[str] = None,
|
||||
credentials: Optional[HTTPAuthorizationCredentials] = Depends(security)
|
||||
) -> StreamingResponse:
|
||||
"""Serve thumbnail with ticket or authorization header"""
|
||||
|
||||
auth_user = None
|
||||
|
||||
# Try authorization header first
|
||||
if credentials:
|
||||
payload = app_state.auth.verify_session(credentials.credentials)
|
||||
if payload:
|
||||
auth_user = payload
|
||||
|
||||
# Or use ticket
|
||||
if ticket and ticket in preview_tickets:
|
||||
ticket_data = preview_tickets[ticket]
|
||||
if time.time() > ticket_data['expiry']:
|
||||
raise HTTPException(status_code=401, detail="Ticket expired")
|
||||
if ticket_data['used']:
|
||||
raise HTTPException(status_code=401, detail="Ticket already used")
|
||||
auth_user = {'username': ticket_data['user']}
|
||||
preview_tickets[ticket]['used'] = True
|
||||
|
||||
if not auth_user:
|
||||
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||
|
||||
# ... rest of implementation
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. FIX: Path Traversal Vulnerability
|
||||
|
||||
### Problem Code (api.py file handling)
|
||||
```python
|
||||
# UNSAFE - vulnerable to path traversal
|
||||
file_path = request.query_params.get('file_path')
|
||||
with open(file_path, 'rb') as f: # Could be /etc/passwd!
|
||||
return FileResponse(f)
|
||||
```
|
||||
|
||||
### Recommended Fix
|
||||
```python
|
||||
from pathlib import Path
|
||||
import os
|
||||
|
||||
# Safe path validation utility
|
||||
def validate_file_path(file_path: str, allowed_base: str = None) -> Path:
|
||||
"""
|
||||
Validate file path is within allowed directory.
|
||||
Prevents ../../../etc/passwd style attacks.
|
||||
"""
|
||||
if allowed_base is None:
|
||||
allowed_base = '/opt/media-downloader/downloads'
|
||||
|
||||
# Convert to absolute paths
|
||||
requested_path = Path(file_path).resolve()
|
||||
base_path = Path(allowed_base).resolve()
|
||||
|
||||
# Check if requested path is within base directory
|
||||
try:
|
||||
requested_path.relative_to(base_path)
|
||||
except ValueError:
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="Access denied - path traversal detected"
|
||||
)
|
||||
|
||||
# Check file exists
|
||||
if not requested_path.exists():
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
|
||||
# Check it's a file, not directory
|
||||
if not requested_path.is_file():
|
||||
raise HTTPException(status_code=403, detail="Invalid file")
|
||||
|
||||
return requested_path
|
||||
|
||||
# Safe endpoint implementation
|
||||
@app.get("/api/media/preview")
|
||||
async def get_media_preview(
|
||||
file_path: str,
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
) -> FileResponse:
|
||||
"""Serve media file with safe path validation"""
|
||||
try:
|
||||
safe_path = validate_file_path(file_path)
|
||||
return FileResponse(safe_path)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error serving file: {e}")
|
||||
raise HTTPException(status_code=500, detail="Error serving file")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. FIX: CSRF Protection
|
||||
|
||||
### Add CSRF Middleware
|
||||
```python
|
||||
# In api.py
|
||||
|
||||
from starlette.middleware.csrf import CSRFMiddleware
|
||||
|
||||
app.add_middleware(
|
||||
CSRFMiddleware,
|
||||
secret_key=SESSION_SECRET_KEY,
|
||||
safe_methods=['GET', 'HEAD', 'OPTIONS'],
|
||||
exempt_urls=['/api/auth/login', '/api/auth/logout'], # Public endpoints
|
||||
)
|
||||
```
|
||||
|
||||
### Frontend Implementation
|
||||
```typescript
|
||||
// web/frontend/src/lib/api.ts
|
||||
|
||||
async post<T>(endpoint: string, data?: any): Promise<T> {
|
||||
// Get CSRF token from cookie or meta tag
|
||||
const csrfToken = this.getCSRFToken()
|
||||
|
||||
const response = await fetch(`${API_BASE}${endpoint}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
...this.getAuthHeaders(),
|
||||
'X-CSRFToken': csrfToken, // Include CSRF token
|
||||
},
|
||||
body: data ? JSON.stringify(data) : undefined,
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
if (response.status === 401) {
|
||||
this.handleUnauthorized()
|
||||
}
|
||||
throw new Error(`API error: ${response.statusText}`)
|
||||
}
|
||||
return response.json()
|
||||
}
|
||||
|
||||
private getCSRFToken(): string {
|
||||
// Try to get from meta tag
|
||||
const meta = document.querySelector('meta[name="csrf-token"]')
|
||||
if (meta) {
|
||||
return meta.getAttribute('content') || ''
|
||||
}
|
||||
|
||||
// Or from cookie
|
||||
const cookies = document.cookie.split('; ')
|
||||
const csrfCookie = cookies.find(c => c.startsWith('csrftoken='))
|
||||
return csrfCookie ? csrfCookie.split('=')[1] : ''
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. FIX: Subprocess Command Injection
|
||||
|
||||
### Vulnerable Code (modules/tiktok_module.py:294)
|
||||
```python
|
||||
# DANGEROUS - username not escaped
|
||||
username = "test'; rm -rf /; echo '"
|
||||
output_dir = "/downloads"
|
||||
|
||||
# This could execute arbitrary commands!
|
||||
cmd = f"yt-dlp -o '%(title)s.%(ext)s' https://www.tiktok.com/@{username}"
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, cwd=output_dir)
|
||||
```
|
||||
|
||||
### Recommended Fix
|
||||
```python
|
||||
import subprocess
|
||||
import shlex
|
||||
from typing import List
|
||||
|
||||
def safe_run_command(cmd: List[str], cwd: str = None, **kwargs) -> subprocess.CompletedProcess:
|
||||
"""
|
||||
Safely run command with list-based arguments (prevents injection).
|
||||
Never use shell=True with user input.
|
||||
"""
|
||||
try:
|
||||
# Use list form - much safer than string form
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
cwd=cwd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300,
|
||||
**kwargs
|
||||
)
|
||||
return result
|
||||
except subprocess.TimeoutExpired:
|
||||
raise ValueError("Command timed out")
|
||||
except Exception as e:
|
||||
raise ValueError(f"Command failed: {e}")
|
||||
|
||||
# Usage with validation
|
||||
def download_tiktok_video(username: str, output_dir: str) -> bool:
|
||||
"""Download TikTok video safely"""
|
||||
|
||||
# Validate input
|
||||
if not username or len(username) > 100:
|
||||
raise ValueError("Invalid username")
|
||||
|
||||
# Remove dangerous characters
|
||||
safe_username = ''.join(c for c in username if c.isalnum() or c in '@_-')
|
||||
|
||||
# Build command as list (safer)
|
||||
cmd = [
|
||||
'yt-dlp',
|
||||
'-o', '%(title)s.%(ext)s',
|
||||
f'https://www.tiktok.com/@{safe_username}'
|
||||
]
|
||||
|
||||
try:
|
||||
result = safe_run_command(cmd, cwd=output_dir)
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.error(f"yt-dlp error: {result.stderr}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to download TikTok: {e}")
|
||||
return False
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. FIX: Input Validation on Config
|
||||
|
||||
### Current Vulnerable Code (api.py:349-351)
|
||||
```python
|
||||
@app.put("/api/config")
|
||||
async def update_config(
|
||||
config: ConfigUpdate, # Raw dict, no validation
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
):
|
||||
"""Update configuration"""
|
||||
app_state.config.update(config.config)
|
||||
return {"success": True}
|
||||
```
|
||||
|
||||
### Recommended Fix with Validation
|
||||
```python
|
||||
from pydantic import BaseModel, Field, validator
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
# Define validated config schemas
|
||||
class PlatformConfig(BaseModel):
|
||||
enabled: bool = True
|
||||
check_interval_hours: int = Field(gt=0, le=24)
|
||||
max_retries: int = Field(ge=1, le=10)
|
||||
timeout_seconds: int = Field(gt=0, le=3600)
|
||||
|
||||
@validator('check_interval_hours')
|
||||
def validate_interval(cls, v):
|
||||
if v < 1 or v > 24:
|
||||
raise ValueError('Interval must be 1-24 hours')
|
||||
return v
|
||||
|
||||
class MediaDownloaderConfig(BaseModel):
|
||||
download_path: str
|
||||
max_concurrent_downloads: int = Field(ge=1, le=20)
|
||||
enable_deduplication: bool = True
|
||||
enable_face_recognition: bool = False
|
||||
recycle_bin_enabled: bool = True
|
||||
recycle_bin_retention_days: int = Field(ge=1, le=365)
|
||||
|
||||
@validator('max_concurrent_downloads')
|
||||
def validate_concurrent(cls, v):
|
||||
if v < 1 or v > 20:
|
||||
raise ValueError('Max concurrent downloads must be 1-20')
|
||||
return v
|
||||
|
||||
@validator('download_path')
|
||||
def validate_path(cls, v):
|
||||
from pathlib import Path
|
||||
p = Path(v)
|
||||
if not p.exists():
|
||||
raise ValueError('Download path does not exist')
|
||||
if not p.is_dir():
|
||||
raise ValueError('Download path must be a directory')
|
||||
return str(p)
|
||||
|
||||
class ConfigUpdate(BaseModel):
|
||||
instagram: Optional[PlatformConfig] = None
|
||||
tiktok: Optional[PlatformConfig] = None
|
||||
forums: Optional[PlatformConfig] = None
|
||||
general: Optional[MediaDownloaderConfig] = None
|
||||
|
||||
# Safe endpoint with validation
|
||||
@app.put("/api/config")
|
||||
async def update_config(
|
||||
update: ConfigUpdate, # Automatically validated by Pydantic
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
) -> Dict:
|
||||
"""Update configuration with validation"""
|
||||
|
||||
try:
|
||||
config_dict = update.dict(exclude_unset=True)
|
||||
|
||||
# Log who made the change
|
||||
logger.info(f"User {current_user['username']} updating config: {list(config_dict.keys())}")
|
||||
|
||||
# Merge with existing config
|
||||
for key, value in config_dict.items():
|
||||
if value is not None:
|
||||
app_state.config[key] = value.dict()
|
||||
|
||||
# Save to database
|
||||
for key, value in config_dict.items():
|
||||
if value is not None:
|
||||
app_state.settings.set(
|
||||
key,
|
||||
value.dict(),
|
||||
category=key,
|
||||
updated_by=current_user['username']
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": "Configuration updated successfully",
|
||||
"updated_keys": list(config_dict.keys())
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Config update failed: {e}")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid configuration: {str(e)}"
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. FIX: JSON Metadata Search Performance
|
||||
|
||||
### Current Inefficient Code (unified_database.py:576-590)
|
||||
```python
|
||||
def get_download_by_media_id(self, media_id: str, platform: str = 'fastdl') -> Optional[Dict]:
|
||||
"""Get download record by Instagram media ID"""
|
||||
with self.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# This causes FULL TABLE SCAN on large datasets!
|
||||
pattern1 = f'%"media_id": "{media_id}"%'
|
||||
pattern2 = f'%"media_id"%{media_id}%'
|
||||
|
||||
cursor.execute('''
|
||||
SELECT * FROM downloads
|
||||
WHERE platform = ?
|
||||
AND (metadata LIKE ? OR metadata LIKE ?)
|
||||
LIMIT 1
|
||||
''', (platform, pattern1, pattern2))
|
||||
```
|
||||
|
||||
### Recommended Fix - Option 1: Separate Column
|
||||
```python
|
||||
# Schema modification (add once)
|
||||
def _init_database(self):
|
||||
"""Initialize database with optimized schema"""
|
||||
with self.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Add separate column for media_id (indexed)
|
||||
try:
|
||||
cursor.execute("ALTER TABLE downloads ADD COLUMN media_id TEXT")
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
|
||||
# Create efficient index
|
||||
cursor.execute('''
|
||||
CREATE INDEX IF NOT EXISTS idx_media_id_platform
|
||||
ON downloads(media_id, platform)
|
||||
WHERE media_id IS NOT NULL
|
||||
''')
|
||||
conn.commit()
|
||||
|
||||
def get_download_by_media_id(self, media_id: str, platform: str = 'fastdl') -> Optional[Dict]:
|
||||
"""Get download record by Instagram media ID (fast)"""
|
||||
with self.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Now uses fast index instead of LIKE scan
|
||||
cursor.execute('''
|
||||
SELECT id, url, platform, source, content_type,
|
||||
filename, file_path, post_date, download_date,
|
||||
file_size, file_hash, metadata
|
||||
FROM downloads
|
||||
WHERE platform = ? AND media_id = ?
|
||||
LIMIT 1
|
||||
''', (platform, media_id))
|
||||
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return dict(row)
|
||||
return None
|
||||
|
||||
def record_download(self, media_id: str = None, **kwargs):
|
||||
"""Record download with media_id extracted to separate column"""
|
||||
# ... existing code ...
|
||||
cursor.execute('''
|
||||
INSERT INTO downloads (
|
||||
url_hash, url, platform, source, content_type,
|
||||
filename, file_path, file_size, file_hash,
|
||||
post_date, status, error_message, metadata, media_id
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (
|
||||
url_hash, url, platform, source, content_type,
|
||||
filename, file_path, file_size, file_hash,
|
||||
post_date.isoformat() if post_date else None,
|
||||
status, error_message,
|
||||
json.dumps(metadata) if metadata else None,
|
||||
media_id # Store separately for fast lookup
|
||||
))
|
||||
```
|
||||
|
||||
### Recommended Fix - Option 2: JSON_EXTRACT (if using SQLite 3.38+)
|
||||
```python
|
||||
# Uses SQLite's built-in JSON functions (more efficient than LIKE)
|
||||
def get_download_by_media_id(self, media_id: str, platform: str = 'fastdl') -> Optional[Dict]:
|
||||
"""Get download record by Instagram media ID using JSON_EXTRACT"""
|
||||
with self.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute('''
|
||||
SELECT id, url, platform, source, content_type,
|
||||
filename, file_path, post_date, download_date,
|
||||
file_size, file_hash, metadata
|
||||
FROM downloads
|
||||
WHERE platform = ?
|
||||
AND JSON_EXTRACT(metadata, '$.media_id') = ?
|
||||
LIMIT 1
|
||||
''', (platform, media_id))
|
||||
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
result = dict(row)
|
||||
# Parse metadata
|
||||
if result.get('metadata'):
|
||||
try:
|
||||
result['metadata'] = json.loads(result['metadata'])
|
||||
except (ValueError, TypeError, json.JSONDecodeError):
|
||||
pass
|
||||
return result
|
||||
return None
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. FIX: Bare Exception Handlers
|
||||
|
||||
### Problematic Code (fastdl_module.py, media-downloader.py)
|
||||
```python
|
||||
except: # Too broad!
|
||||
break
|
||||
```
|
||||
|
||||
### Recommended Fix
|
||||
```python
|
||||
import sqlite3
|
||||
import requests
|
||||
from requests.exceptions import RequestException, Timeout, ConnectionError
|
||||
|
||||
# Be specific about which exceptions to catch
|
||||
try:
|
||||
# ... code that might fail ...
|
||||
download_file(url)
|
||||
|
||||
except (RequestException, Timeout, ConnectionError) as e:
|
||||
# Handle network errors
|
||||
logger.warning(f"Network error downloading {url}: {e}")
|
||||
if isinstance(e, Timeout):
|
||||
# Retry with longer timeout
|
||||
continue
|
||||
else:
|
||||
# Skip this file
|
||||
break
|
||||
|
||||
except sqlite3.OperationalError as e:
|
||||
# Handle database errors specifically
|
||||
if "database is locked" in str(e):
|
||||
logger.warning("Database locked, retrying...")
|
||||
time.sleep(1)
|
||||
continue
|
||||
else:
|
||||
logger.error(f"Database error: {e}")
|
||||
raise
|
||||
|
||||
except (OSError, IOError) as e:
|
||||
# Handle file system errors
|
||||
logger.error(f"File system error: {e}")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
# Only catch unexpected errors as last resort
|
||||
logger.error(f"Unexpected error: {type(e).__name__}: {e}", exc_info=True)
|
||||
break
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. FIX: Async File I/O
|
||||
|
||||
### Current Blocking Code (web/backend/api.py)
|
||||
```python
|
||||
# This blocks the async event loop!
|
||||
@app.get("/api/media/thumbnail")
|
||||
async def get_thumbnail(file_path: str):
|
||||
# Synchronous file I/O blocks other requests
|
||||
with open(file_path, 'rb') as f:
|
||||
image = Image.open(f)
|
||||
# ... process image ...
|
||||
return FileResponse(processed_image)
|
||||
```
|
||||
|
||||
### Recommended Fix with aiofiles
|
||||
```python
|
||||
import aiofiles
|
||||
from PIL import Image
|
||||
import io
|
||||
|
||||
@app.get("/api/media/thumbnail")
|
||||
async def get_thumbnail(
|
||||
file_path: str,
|
||||
media_type: str,
|
||||
current_user: Dict = Depends(get_current_user_media)
|
||||
) -> StreamingResponse:
|
||||
"""Serve thumbnail efficiently without blocking"""
|
||||
|
||||
try:
|
||||
# Use aiofiles for non-blocking file I/O
|
||||
async with aiofiles.open(file_path, 'rb') as f:
|
||||
file_data = await f.read()
|
||||
|
||||
# Offload CPU-bound image processing to thread pool
|
||||
loop = asyncio.get_event_loop()
|
||||
thumbnail = await loop.run_in_executor(
|
||||
None, # Use default executor (ThreadPoolExecutor)
|
||||
_create_thumbnail,
|
||||
file_data,
|
||||
media_type
|
||||
)
|
||||
|
||||
return StreamingResponse(
|
||||
io.BytesIO(thumbnail),
|
||||
media_type="image/jpeg"
|
||||
)
|
||||
|
||||
except FileNotFoundError:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating thumbnail: {e}")
|
||||
raise HTTPException(status_code=500, detail="Error creating thumbnail")
|
||||
|
||||
def _create_thumbnail(file_data: bytes, media_type: str) -> bytes:
|
||||
"""CPU-bound function to create thumbnail"""
|
||||
try:
|
||||
image = Image.open(io.BytesIO(file_data))
|
||||
image.thumbnail((200, 200))
|
||||
|
||||
output = io.BytesIO()
|
||||
image.save(output, format='JPEG', quality=85)
|
||||
return output.getvalue()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Thumbnail creation failed: {e}")
|
||||
raise
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 9. FIX: Adapter Duplication
|
||||
|
||||
### Current Duplicated Code (unified_database.py:1708-2080)
|
||||
```python
|
||||
# FastDLDatabaseAdapter
|
||||
class FastDLDatabaseAdapter:
|
||||
def __init__(self, unified_db: UnifiedDatabase):
|
||||
self.db = unified_db
|
||||
self.platform = 'fastdl'
|
||||
|
||||
def is_already_downloaded(self, media_id: str) -> bool:
|
||||
# ... 20+ lines of duplicate code ...
|
||||
|
||||
def record_download(self, media_id: str, username: str, **kwargs):
|
||||
# ... 30+ lines of duplicate code ...
|
||||
|
||||
# TikTokDatabaseAdapter (similar structure)
|
||||
# ToolzuDatabaseAdapter (similar structure)
|
||||
# CoppermineDatabaseAdapter (similar structure)
|
||||
# ... and more
|
||||
```
|
||||
|
||||
### Recommended Fix: Generic Base Adapter
|
||||
```python
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
class BaseDatabaseAdapter(ABC):
|
||||
"""Generic adapter for unified database compatibility"""
|
||||
|
||||
def __init__(self, unified_db: UnifiedDatabase, platform: str):
|
||||
self.db = unified_db
|
||||
self.platform = platform
|
||||
|
||||
@abstractmethod
|
||||
def get_identifier(self, data: Dict[str, Any]) -> str:
|
||||
"""Extract unique identifier from data"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def build_metadata(self, data: Dict[str, Any]) -> Dict:
|
||||
"""Build platform-specific metadata"""
|
||||
pass
|
||||
|
||||
def is_already_downloaded(self, identifier: str) -> bool:
|
||||
"""Check if content is already downloaded"""
|
||||
with self.db.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
SELECT 1 FROM downloads
|
||||
WHERE platform = ? AND metadata LIKE ?
|
||||
LIMIT 1
|
||||
''', (self.platform, f'%"{self._id_key()}": "{identifier}"%'))
|
||||
return cursor.fetchone() is not None
|
||||
|
||||
@abstractmethod
|
||||
def _id_key(self) -> str:
|
||||
"""Return the metadata key for identifier"""
|
||||
pass
|
||||
|
||||
def record_download(
|
||||
self,
|
||||
identifier: str,
|
||||
source: str,
|
||||
**kwargs
|
||||
) -> bool:
|
||||
"""Record download with platform-specific data"""
|
||||
|
||||
url = self._build_url(identifier, source, kwargs)
|
||||
metadata = self.build_metadata({
|
||||
**kwargs,
|
||||
self._id_key(): identifier
|
||||
})
|
||||
|
||||
# Calculate file hash if provided
|
||||
file_hash = None
|
||||
if kwargs.get('file_path'):
|
||||
try:
|
||||
file_hash = UnifiedDatabase.get_file_hash(kwargs['file_path'])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return self.db.record_download(
|
||||
url=url,
|
||||
platform=self.platform,
|
||||
source=source,
|
||||
content_type=kwargs.get('content_type', 'post'),
|
||||
filename=kwargs.get('filename'),
|
||||
file_path=kwargs.get('file_path'),
|
||||
file_hash=file_hash,
|
||||
post_date=kwargs.get('post_date'),
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def _build_url(self, identifier: str, source: str, kwargs: Dict) -> str:
|
||||
"""Build URL for the content"""
|
||||
pass
|
||||
|
||||
# Concrete implementations
|
||||
class FastDLDatabaseAdapter(BaseDatabaseAdapter):
|
||||
def __init__(self, unified_db: UnifiedDatabase):
|
||||
super().__init__(unified_db, 'fastdl')
|
||||
|
||||
def _id_key(self) -> str:
|
||||
return 'media_id'
|
||||
|
||||
def get_identifier(self, data: Dict) -> str:
|
||||
return data.get('media_id', '')
|
||||
|
||||
def _build_url(self, identifier: str, source: str, kwargs: Dict) -> str:
|
||||
return kwargs.get('download_url') or f"instagram://{identifier}"
|
||||
|
||||
def build_metadata(self, data: Dict) -> Dict:
|
||||
return {
|
||||
'media_id': data.get('media_id'),
|
||||
'source': 'fastdl',
|
||||
**{k: v for k, v in data.items() if k not in ['media_id', 'file_path']}
|
||||
}
|
||||
|
||||
class TikTokDatabaseAdapter(BaseDatabaseAdapter):
|
||||
def __init__(self, unified_db: UnifiedDatabase):
|
||||
super().__init__(unified_db, 'tiktok')
|
||||
|
||||
def _id_key(self) -> str:
|
||||
return 'video_id'
|
||||
|
||||
def get_identifier(self, data: Dict) -> str:
|
||||
return data.get('video_id', '')
|
||||
|
||||
def _build_url(self, identifier: str, source: str, kwargs: Dict) -> str:
|
||||
return f"https://www.tiktok.com/@{source}/video/{identifier}"
|
||||
|
||||
def build_metadata(self, data: Dict) -> Dict:
|
||||
return {
|
||||
'video_id': data.get('video_id'),
|
||||
**{k: v for k, v in data.items() if k != 'video_id'}
|
||||
}
|
||||
|
||||
class SnapchatDatabaseAdapter(BaseDatabaseAdapter):
|
||||
def __init__(self, unified_db: UnifiedDatabase):
|
||||
super().__init__(unified_db, 'snapchat')
|
||||
|
||||
def _id_key(self) -> str:
|
||||
return 'story_id'
|
||||
|
||||
def get_identifier(self, data: Dict) -> str:
|
||||
return data.get('story_id', '')
|
||||
|
||||
def _build_url(self, identifier: str, source: str, kwargs: Dict) -> str:
|
||||
return kwargs.get('url', f"snapchat://{identifier}")
|
||||
|
||||
def build_metadata(self, data: Dict) -> Dict:
|
||||
return data.copy()
|
||||
|
||||
# ... similar for other platforms ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
These code examples provide concrete implementations for the major security, performance, and quality issues identified in the review. The fixes follow Python/TypeScript best practices and can be implemented incrementally.
|
||||
|
||||
Start with security fixes (sections 1-5), then move to performance (sections 6-8), then code quality (section 9).
|
||||
|
||||
Reference in New Issue
Block a user