Files
media-downloader/tests/test_repost_detection_manual.py
Todd 0d7b2b1aab Initial commit
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-29 22:42:55 -04:00

310 lines
9.4 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Manual Integration Test for Instagram Repost Detector
This script tests the repost detector with real files and can be run manually
to validate the implementation before integrating into the main system.
Usage:
python3 tests/test_repost_detection_manual.py [test_file_path] [source_username]
Example:
python3 tests/test_repost_detection_manual.py \
"/media/.../evalongoria_20251109_154548_story6.mp4" \
"evalongoria"
"""
import sys
import os
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from modules.instagram_repost_detector import InstagramRepostDetector
from modules.unified_database import UnifiedDatabase
def test_dependencies():
"""Test if all dependencies are installed"""
print("=" * 70)
print("CHECKING DEPENDENCIES")
print("=" * 70)
missing = []
try:
import pytesseract
from PIL import Image
print("✓ pytesseract and PIL installed")
except ImportError:
print("✗ pytesseract or PIL not installed")
print(" Install: pip3 install pytesseract pillow")
missing.append("pytesseract/PIL")
try:
import cv2
print("✓ opencv-python installed")
except ImportError:
print("✗ opencv-python not installed")
print(" Install: pip3 install opencv-python")
missing.append("opencv-python")
try:
import imagehash
print("✓ imagehash installed")
except ImportError:
print("✗ imagehash not installed")
print(" Install: pip3 install imagehash")
missing.append("imagehash")
# Check tesseract binary
try:
import pytesseract
pytesseract.get_tesseract_version()
print("✓ tesseract-ocr binary installed")
except Exception:
print("✗ tesseract-ocr binary not installed")
print(" Install: sudo apt-get install tesseract-ocr tesseract-ocr-eng")
missing.append("tesseract-ocr")
print()
if missing:
print(f"❌ Missing dependencies: {', '.join(missing)}")
print("\nPlease install missing dependencies before running tests.")
return False
else:
print("✅ All dependencies installed")
return True
def test_ocr_extraction(file_path: str):
"""Test OCR username extraction on a file"""
print("\n" + "=" * 70)
print("TEST 1: OCR USERNAME EXTRACTION")
print("=" * 70)
print(f"File: {file_path}")
# Create mock database for testing
db = UnifiedDatabase()
detector = InstagramRepostDetector(unified_db=db)
username = detector._extract_username_from_repost(file_path)
if username:
print(f"✅ SUCCESS: Extracted username: @{username}")
return username
else:
print("❌ FAILED: No username found")
return None
def test_monitored_check(username: str):
"""Test if username is in monitored accounts"""
print("\n" + "=" * 70)
print("TEST 2: MONITORED ACCOUNT CHECK")
print("=" * 70)
print(f"Username: @{username}")
db = UnifiedDatabase()
detector = InstagramRepostDetector(unified_db=db)
is_monitored = detector._is_monitored_account(username)
if is_monitored:
print(f"✅ @{username} IS monitored (will use normal download path)")
else:
print(f" @{username} NOT monitored (will use temp queue)")
return is_monitored
def test_perceptual_hash(file_path: str):
"""Test perceptual hash calculation"""
print("\n" + "=" * 70)
print("TEST 3: PERCEPTUAL HASH CALCULATION")
print("=" * 70)
print(f"File: {file_path}")
db = UnifiedDatabase()
detector = InstagramRepostDetector(unified_db=db)
hash_value = detector._get_perceptual_hash(file_path)
if hash_value:
print(f"✅ SUCCESS: Hash = {hash_value}")
return hash_value
else:
print("❌ FAILED: Could not calculate hash")
return None
def test_full_detection(file_path: str, source_username: str, dry_run: bool = True):
"""Test full repost detection workflow"""
print("\n" + "=" * 70)
print("TEST 4: FULL REPOST DETECTION WORKFLOW")
print("=" * 70)
print(f"File: {file_path}")
print(f"Source: @{source_username}")
print(f"Mode: {'DRY RUN (no downloads)' if dry_run else 'LIVE (will download)'}")
if dry_run:
print("\n⚠️ DRY RUN MODE - Will not download content from ImgInn")
print("To test with actual downloads, run with --live flag")
return None
db = UnifiedDatabase()
detector = InstagramRepostDetector(unified_db=db)
print("\nStarting detection...")
replacement = detector.check_and_replace_repost(file_path, source_username)
if replacement:
print(f"\n✅ SUCCESS: Repost replaced!")
print(f"Original file: {file_path}")
print(f"Replacement file: {replacement}")
return replacement
else:
print("\n❌ FAILED: No replacement found")
print("Possible reasons:")
print(" - No @username detected in the file")
print(" - Original content not available")
print(" - No matching content found via perceptual hash")
return None
def test_database_tracking():
"""Test database tracking tables"""
print("\n" + "=" * 70)
print("TEST 5: DATABASE TRACKING")
print("=" * 70)
db = UnifiedDatabase()
# Check if repost_fetch_cache table exists
with db.get_connection() as conn:
cursor = conn.cursor()
# Check fetch cache
cursor.execute("""
SELECT name FROM sqlite_master
WHERE type='table' AND name='repost_fetch_cache'
""")
has_cache = cursor.fetchone() is not None
# Check replacements table
cursor.execute("""
SELECT name FROM sqlite_master
WHERE type='table' AND name='repost_replacements'
""")
has_replacements = cursor.fetchone() is not None
if has_cache:
print("✓ repost_fetch_cache table exists")
with db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM repost_fetch_cache")
count = cursor.fetchone()[0]
print(f" {count} usernames in cache")
else:
print(" repost_fetch_cache table will be created on first use")
if has_replacements:
print("✓ repost_replacements table exists")
with db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM repost_replacements")
count = cursor.fetchone()[0]
print(f" {count} replacements tracked")
if count > 0:
print("\nRecent replacements:")
cursor.execute("""
SELECT repost_source, original_username, repost_filename, detected_at
FROM repost_replacements
ORDER BY detected_at DESC
LIMIT 5
""")
for row in cursor.fetchall():
print(f" - @{row[0]} reposted from @{row[1]}: {row[2]} ({row[3]})")
else:
print(" repost_replacements table will be created on first use")
def main():
"""Main test runner"""
print("\n" + "=" * 70)
print("INSTAGRAM REPOST DETECTOR - MANUAL TEST SUITE")
print("=" * 70)
# Check if test file provided
if len(sys.argv) < 2:
print("\nUsage:")
print(" python3 tests/test_repost_detection_manual.py [file_path] [source_username] [--live]")
print("\nExamples:")
print(" # Test with real example file (dry run)")
print(' python3 tests/test_repost_detection_manual.py \\')
print(' "/media/.../evalongoria_20251109_154548_story6.mp4" \\')
print(' "evalongoria"')
print()
print(" # Test with actual downloads")
print(' python3 tests/test_repost_detection_manual.py \\')
print(' "/media/.../evalongoria_20251109_154548_story6.mp4" \\')
print(' "evalongoria" \\')
print(' --live')
print()
# Run dependency check and database check only
deps_ok = test_dependencies()
if deps_ok:
test_database_tracking()
return
file_path = sys.argv[1]
source_username = sys.argv[2] if len(sys.argv) >= 3 else "unknown"
dry_run = "--live" not in sys.argv
# Validate file exists
if not os.path.exists(file_path):
print(f"\n❌ ERROR: File not found: {file_path}")
return
# Test 1: Dependencies
deps_ok = test_dependencies()
if not deps_ok:
print("\n⚠️ Cannot proceed with tests - missing dependencies")
return
# Test 2: OCR extraction
username = test_ocr_extraction(file_path)
# Test 3: Monitored check (if username found)
if username:
test_monitored_check(username)
# Test 4: Perceptual hash
test_perceptual_hash(file_path)
# Test 5: Database tracking
test_database_tracking()
# Test 6: Full detection (if not dry run)
if not dry_run:
test_full_detection(file_path, source_username, dry_run=False)
else:
print("\n" + "=" * 70)
print("SKIPPING FULL WORKFLOW TEST (DRY RUN)")
print("=" * 70)
print("To test full workflow with actual downloads, add --live flag")
print("\n" + "=" * 70)
print("TEST SUITE COMPLETE")
print("=" * 70)
if __name__ == "__main__":
main()